Skip to content

Commit b52ff9c

Browse files
committed
Add record keeping for debugging
This PR has the enclave runner keep track about all of the usercalls that enclaves do over their lifetimes. This includes: - The total number of each async usercall done - How many of each sync usercall each enclave TCS has done - For `wait`s and `send`s, additional grouping by signal type
1 parent b6f0262 commit b52ff9c

File tree

4 files changed

+254
-18
lines changed

4 files changed

+254
-18
lines changed

intel-sgx/enclave-runner/src/lib.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
* License, v. 2.0. If a copy of the MPL was not distributed with this
55
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
66

7-
#![allow(non_local_definitions)] // Required by failure
8-
#![deny(warnings)]
7+
#![feature(macro_metavar_expr)]
8+
#![feature(thread_id_value)]
9+
#![feature(local_key_cell_methods)]
910
#![doc(
1011
html_logo_url = "https://edp.fortanix.com/img/docs/edp-logo.svg",
1112
html_favicon_url = "https://edp.fortanix.com/favicon.ico",
@@ -16,6 +17,8 @@ mod command;
1617
mod library;
1718
mod loader;
1819
mod tcs;
20+
pub mod stats;
21+
1922
pub mod usercalls;
2023

2124
pub use crate::command::Command;

intel-sgx/enclave-runner/src/stats.rs

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
use std::collections::HashMap;
2+
use std::convert::TryInto;
3+
use std::fmt::{Write, Error as FmtError};
4+
use std::result::Result as StdResult;
5+
use std::sync::atomic::{AtomicUsize, Ordering};
6+
use std::sync::{Arc, Mutex};
7+
8+
use crate::usercalls::*;
9+
10+
use fortanix_sgx_abi::*;
11+
12+
use lazy_static::lazy_static;
13+
14+
lazy_static! {
15+
static ref USERCALL_COUNTERS: [AtomicUsize; 17] = [
16+
AtomicUsize::default(),
17+
AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(),
18+
AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(),
19+
AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(),
20+
AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(),
21+
];
22+
static ref TCS_MAPPINGS: Arc<Mutex<HashMap<usize, TcsStats>>> = Arc::new(Mutex::new(HashMap::new()));
23+
}
24+
25+
#[derive(Clone, Default, Debug)]
26+
pub struct TcsStats {
27+
// Each index corresponds to that usercall number
28+
sync_calls: [usize; 17],
29+
// WAIT_NO, WAIT_INDEFINITE, other
30+
// There are 16 because there are 4 possible events to wait for
31+
waits: [(usize, usize, usize); 16],
32+
// targeted, not targeted
33+
sends: [usize; 16],
34+
}
35+
36+
pub struct RunnerStats {
37+
pub sync_calls: HashMap<usize, TcsStats>,
38+
pub async_calls: [usize; 17],
39+
}
40+
41+
impl RunnerStats {
42+
/// The total number of sync usercalls that have been handled as of this snapshot
43+
pub fn total_sync_calls(&self) -> usize {
44+
self.sync_calls.iter()
45+
.map(|(_, stats)| stats.sync_calls.iter().sum::<usize>())
46+
.sum()
47+
}
48+
49+
/// The total number of async usercalls that have been handled as of this snapshot
50+
pub fn total_async_calls(&self) -> usize {
51+
self.async_calls.iter().sum::<usize>()
52+
}
53+
54+
/// The total number of usercalls that have been handled as of this snapshot
55+
pub fn total_calls(&self) -> usize {
56+
self.total_sync_calls() + self.total_async_calls()
57+
}
58+
59+
// A "stock" formatting for this information
60+
pub fn pretty_format(&self) -> StdResult<String, FmtError> {
61+
let mut out = String::new();
62+
let mut counts = USERCALL_COUNTERS.iter()
63+
.enumerate()
64+
.map(|(i, counter)| (i, counter.load(Ordering::Relaxed)))
65+
.filter(|(_, counter)| *counter > 0)
66+
.map(|(i, counter)| format!("{:?}: {}", abi::UsercallList::from_u64(i as _), counter))
67+
.collect::<Vec<_>>()
68+
.join(", ");
69+
70+
if counts.is_empty() {
71+
counts = "None".to_owned();
72+
}
73+
74+
writeln!(out, "Async usercall counts: {}", counts)?;
75+
writeln!(out, "Sync usercall count mappings:")?;
76+
for (addr, stats) in TCS_MAPPINGS.lock().map_err(|_| FmtError)?.iter() {
77+
if stats.should_print() {
78+
writeln!(out, "Address: 0x{:0>16x}", addr)?;
79+
write!(out, "{}", stats.format()?)?;
80+
}
81+
}
82+
83+
Ok(out)
84+
}
85+
}
86+
87+
fn mask_to_str(ev: usize) -> String {
88+
let mut events = vec!();
89+
let ev = ev as u64;
90+
if ev & EV_CANCELQ_NOT_FULL != 0 {
91+
events.push("CANCELQ_NOT_FULL");
92+
}
93+
if ev & EV_RETURNQ_NOT_EMPTY != 0 {
94+
events.push("RETURNQ_NOT_EMPTY");
95+
}
96+
if ev & EV_USERCALLQ_NOT_FULL != 0 {
97+
events.push("USERCALLQ_NOT_FULL");
98+
}
99+
if ev & EV_UNPARK != 0 {
100+
events.push("UNPARK");
101+
}
102+
if events.is_empty() {
103+
events.push("NONE");
104+
}
105+
events.join(" | ")
106+
}
107+
108+
pub(crate) fn record_usercall(
109+
tcs_address: Option<TcsAddress>,
110+
p1: u64,
111+
p2: u64,
112+
p3: u64
113+
) {
114+
// Map sync usercalls to the TCS that made them
115+
if let Some(tcs_address) = tcs_address {
116+
let mut mappings = TCS_MAPPINGS.lock().expect("poisoned mutex");
117+
let entry = mappings.entry(tcs_address.0).or_default();
118+
// type
119+
entry.sync_calls[p1 as usize] += 1;
120+
if p1 == 11 {
121+
// waits
122+
let mask = &mut entry.waits[p2 as usize];
123+
match p3 {
124+
WAIT_NO => mask.0 += 1,
125+
WAIT_INDEFINITE => mask.1 += 1,
126+
_ => mask.2 += 1,
127+
}
128+
} else if p1 == 12 {
129+
// sends
130+
entry.sends[p2 as usize] += 1; // event mask
131+
}
132+
} else {
133+
// For async calls where we don't know the TCS, just store aggregates
134+
USERCALL_COUNTERS[p1 as usize].fetch_add(1, Ordering::Relaxed);
135+
}
136+
}
137+
138+
impl TcsStats {
139+
fn should_print(&self) -> bool {
140+
self.sync_calls.iter().sum::<usize>() > 10
141+
}
142+
143+
pub fn format(&self) -> StdResult<String, FmtError> {
144+
let mut out = String::new();
145+
writeln!(out,
146+
" Sync Totals: {}",
147+
self.sync_calls.iter()
148+
.enumerate()
149+
.filter(|(_, cnt)| **cnt > 0)
150+
.map(|(idx, cnt)| {
151+
format!("{:?}: {}", abi::UsercallList::from_u64(idx as u64), cnt)
152+
})
153+
.collect::<Vec<_>>()
154+
.join(", ")
155+
)?;
156+
writeln!(out,
157+
" Wait Totals: {}",
158+
self.waits.iter()
159+
.enumerate()
160+
.filter(|(_, (a, b, c))| a + b + c > 0)
161+
.map(|(idx, cnt)| {
162+
format!("{}: WAIT_NO: {} WAIT_INDEFINITE: {} OTHER: {}", mask_to_str(idx), cnt.0, cnt.1, cnt.2)
163+
})
164+
.collect::<Vec<_>>()
165+
.join("\n ")
166+
)?;
167+
writeln!(out,
168+
" Send Totals: {}",
169+
self.sends.iter()
170+
.enumerate()
171+
.filter(|(_, cnt)| **cnt > 0)
172+
.map(|(idx, cnt)| {
173+
format!("{}: {}", mask_to_str(idx), cnt)
174+
})
175+
.collect::<Vec<_>>()
176+
.join(", ")
177+
)?;
178+
Ok(out)
179+
}
180+
}
181+
182+
pub fn get_stats() -> RunnerStats {
183+
let async_calls: [usize; 17] = USERCALL_COUNTERS.iter()
184+
.map(|c| c.load(Ordering::Relaxed))
185+
.collect::<Vec<_>>()
186+
.try_into()
187+
.unwrap();
188+
189+
assert!(async_calls.len() == 17);
190+
191+
let sync_calls = {
192+
TCS_MAPPINGS.lock().expect("poison error").clone()
193+
};
194+
195+
RunnerStats {
196+
sync_calls,
197+
async_calls,
198+
}
199+
}

intel-sgx/enclave-runner/src/usercalls/abi.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
1717

1818
use futures::future::Future;
1919

20-
type Register = u64;
20+
pub(crate) type Register = u64;
2121

2222
trait RegisterArgument {
2323
fn from_register(_: Register) -> Self;
@@ -36,13 +36,23 @@ pub(super) trait ReturnValue {
3636
macro_rules! define_usercalls {
3737
// Using `$r:tt` because `$r:ty` doesn't match ! in `dispatch_return_type`
3838
($(fn $f:ident($($n:ident: $t:ty),*) $(-> $r:tt)*; )*) => {
39+
#[derive(Debug)]
3940
#[repr(C)]
4041
#[allow(non_camel_case_types)]
4142
pub(crate) enum UsercallList {
4243
__enclave_usercalls_invalid,
4344
$($f,)*
4445
}
4546

47+
impl UsercallList {
48+
pub(crate) fn from_u64(a: u64) -> Self {
49+
match a - 1 {
50+
$( ${index()} => UsercallList::$f, )*
51+
_ => Self::__enclave_usercalls_invalid,
52+
}
53+
}
54+
}
55+
4656
pub(super) trait Usercalls <'future>: Sized {
4757
$(fn $f (self, $($n: $t),*) -> dispatch_return_type!($(-> $r )* 'future);)*
4858
fn other(self, n: u64, a1: u64, a2: u64, a3: u64, a4: u64) -> (Self, DispatchResult) {

intel-sgx/enclave-runner/src/usercalls/mod.rs

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use sgxs::loader::Tcs as SgxsTcs;
3737

3838
use crate::loader::{EnclavePanic, ErasedTcs};
3939
use crate::tcs::{self, CoResult, ThreadResult};
40+
use crate::stats::record_usercall;
4041
use self::abi::dispatch;
4142
use self::abi::ReturnValue;
4243
use self::abi::UsercallList;
@@ -50,15 +51,16 @@ lazy_static! {
5051
static ref DEBUGGER_TOGGLE_SYNC: Mutex<()> = Mutex::new(());
5152
}
5253

54+
5355
// This is not an event in the sense that it could be passed to `send()` or
5456
// `wait()` usercalls in enclave code. However, it's easier for the enclave
5557
// runner implementation to lump it in with events. Also note that this constant
5658
// is not public.
5759
const EV_ABORT: u64 = 0b0000_0000_0001_0000;
5860

59-
const USERCALL_QUEUE_SIZE: usize = 16;
60-
const RETURN_QUEUE_SIZE: usize = 1024;
61-
const CANCEL_QUEUE_SIZE: usize = USERCALL_QUEUE_SIZE * 2;
61+
const USERCALL_QUEUE_SIZE: usize = 32768;
62+
const RETURN_QUEUE_SIZE: usize = 32768;
63+
const CANCEL_QUEUE_SIZE: usize = USERCALL_QUEUE_SIZE;
6264

6365
enum UsercallSendData {
6466
Sync(ThreadResult<ErasedTcs>, RunningTcs, RefCell<[u8; 1024]>),
@@ -470,7 +472,7 @@ impl<T> EnclaveAbort<T> {
470472
}
471473

472474
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd)]
473-
struct TcsAddress(usize);
475+
pub(crate) struct TcsAddress(pub usize);
474476

475477
impl ErasedTcs {
476478
fn address(&self) -> TcsAddress {
@@ -551,8 +553,16 @@ impl PendingEvents {
551553
}
552554

553555
let it = std::iter::once((EV_ABORT, &self.abort))
554-
.chain(self.counts.iter().enumerate().map(|(ev, sem)| (ev as u64, sem)).filter(|&(ev, _)| ev & event_mask != 0))
555-
.map(|(ev, sem)| sem.acquire().map(move |permit| (ev, permit)).boxed());
556+
.chain(
557+
self.counts.iter()
558+
.enumerate()
559+
.map(|(ev, sem)| (ev as u64, sem))
560+
.filter(|&(ev, _)| ev & event_mask != 0)
561+
).map(|(ev, sem)| {
562+
sem.acquire()
563+
.map(move |permit| (ev, permit))
564+
.boxed()
565+
});
556566

557567
let ((ev, permit), _, _) = futures::future::select_all(it).await;
558568

@@ -773,14 +783,16 @@ impl EnclaveState {
773783
UsercallHandleData::Async(_, ref mut notifier_rx, _) => notifier_rx.take(),
774784
_ => None,
775785
};
776-
let (parameters, mode, tcs) = match handle_data {
786+
let ((p1, p2, p3, p4, p5), mode, tcs) = match handle_data {
777787
UsercallHandleData::Sync(ref usercall, ref mut tcs, _) => (usercall.parameters(), tcs.mode.into(), Some(tcs)),
778788
UsercallHandleData::Async(ref usercall, _, _) => (usercall.data.into(), ReturnSource::AsyncUsercall, None),
779789
};
790+
791+
record_usercall(tcs.as_ref().map(|tcs| tcs.tcs_address), p1, p2, p3);
792+
780793
let mut input = IOHandlerInput { enclave: enclave.clone(), tcs, work_sender: &work_sender };
781794
let handler = Handler(&mut input);
782795
let result = {
783-
let (p1, p2, p3, p4, p5) = parameters;
784796
match notifier_rx {
785797
None => dispatch(handler, p1, p2, p3, p4, p5).await.1,
786798
Some(notifier_rx) => {
@@ -932,7 +944,10 @@ impl EnclaveState {
932944
None
933945
} else {
934946
let (notifier_tx, notifier_rx) = oneshot::channel();
935-
usercall_event_tx_clone.send(UsercallEvent::Started(usercall.id, notifier_tx)).ok().expect("failed to send usercall event");
947+
usercall_event_tx_clone
948+
.send(UsercallEvent::Started(usercall.id, notifier_tx))
949+
.ok()
950+
.expect("failed to send usercall event");
936951
Some(notifier_rx)
937952
};
938953
let _ = io_queue_send.send(UsercallSendData::Async(usercall, notifier_rx));
@@ -1068,15 +1083,24 @@ impl EnclaveState {
10681083
.send(start_work)
10691084
.expect("Work sender couldn't send data to receiver");
10701085

1071-
let join_handlers =
1072-
create_worker_threads(num_of_worker_threads, work_receiver, io_queue_send.clone());
1086+
let join_handles = create_worker_threads(
1087+
num_of_worker_threads,
1088+
work_receiver,
1089+
io_queue_send.clone()
1090+
);
1091+
10731092
// main syscall polling loop
1074-
let main_result =
1075-
EnclaveState::syscall_loop(enclave.clone(), io_queue_receive, io_queue_send, work_sender);
1093+
let main_result = EnclaveState::syscall_loop(
1094+
enclave.clone(),
1095+
io_queue_receive,
1096+
io_queue_send,
1097+
work_sender
1098+
);
10761099

1077-
for handler in join_handlers {
1078-
let _ = handler.join();
1100+
for handle in join_handles {
1101+
let _ = handle.join();
10791102
}
1103+
10801104
return main_result;
10811105
}
10821106

0 commit comments

Comments
 (0)