Skip to content

Commit c5758a9

Browse files
committed
Add record keeping for debugging
This PR has the enclave runner keep track about all of the usercalls that enclaves do over their lifetimes. This includes: - The total number of each async usercall done - How many of each sync usercall each enclave TCS has done - For `wait`s and `send`s, additional grouping by signal type
1 parent b6f0262 commit c5758a9

File tree

4 files changed

+262
-17
lines changed

4 files changed

+262
-17
lines changed

intel-sgx/enclave-runner/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
* License, v. 2.0. If a copy of the MPL was not distributed with this
55
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
66

7-
#![allow(non_local_definitions)] // Required by failure
8-
#![deny(warnings)]
7+
#![feature(macro_metavar_expr)]
98
#![doc(
109
html_logo_url = "https://edp.fortanix.com/img/docs/edp-logo.svg",
1110
html_favicon_url = "https://edp.fortanix.com/favicon.ico",
@@ -16,6 +15,8 @@ mod command;
1615
mod library;
1716
mod loader;
1817
mod tcs;
18+
pub mod stats;
19+
1920
pub mod usercalls;
2021

2122
pub use crate::command::Command;

intel-sgx/enclave-runner/src/stats.rs

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
use std::collections::HashMap;
2+
use std::convert::TryInto;
3+
use std::fmt::{Write, Error as FmtError};
4+
use std::result::Result as StdResult;
5+
use std::sync::atomic::{AtomicUsize, Ordering};
6+
use std::sync::{Arc, Mutex};
7+
8+
use crate::usercalls::*;
9+
10+
use fortanix_sgx_abi::*;
11+
12+
use lazy_static::lazy_static;
13+
14+
lazy_static! {
15+
static ref USERCALL_COUNTERS: [AtomicUsize; 17] = [
16+
AtomicUsize::default(),
17+
AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(),
18+
AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(),
19+
AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(),
20+
AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(), AtomicUsize::default(),
21+
];
22+
static ref TCS_MAPPINGS: Arc<Mutex<HashMap<usize, TcsStats>>> = Arc::new(Mutex::new(HashMap::new()));
23+
}
24+
25+
#[derive(Clone, Default, Debug)]
26+
pub struct TcsStats {
27+
// Each index corresponds to that usercall number
28+
sync_calls: [usize; 17],
29+
// WAIT_NO, WAIT_INDEFINITE, other
30+
// There are 16 because there are 4 possible events to wait for
31+
waits: [(usize, usize, usize); 16],
32+
// targeted, not targeted
33+
sends: [usize; 16],
34+
}
35+
36+
pub struct RunnerStats {
37+
pub sync_calls: HashMap<usize, TcsStats>,
38+
pub async_calls: [usize; 17],
39+
}
40+
41+
impl RunnerStats {
42+
/// The total number of sync usercalls that have been handled as of this snapshot
43+
pub fn total_sync_calls(&self) -> usize {
44+
self.sync_calls.iter()
45+
.map(|(_, stats)| stats.sync_calls.iter().sum::<usize>())
46+
.sum()
47+
}
48+
49+
/// The total number of async usercalls that have been handled as of this snapshot
50+
pub fn total_async_calls(&self) -> usize {
51+
self.async_calls.iter().sum::<usize>()
52+
}
53+
54+
/// The total number of usercalls that have been handled as of this snapshot
55+
pub fn total_calls(&self) -> usize {
56+
self.total_sync_calls() + self.total_async_calls()
57+
}
58+
59+
// A "stock" formatting for this information
60+
pub fn pretty_format(&self) -> StdResult<String, FmtError> {
61+
let mut out = String::new();
62+
let mut counts = USERCALL_COUNTERS.iter()
63+
.enumerate()
64+
.map(|(i, counter)| (i, counter.load(Ordering::Relaxed)))
65+
.filter(|(_, counter)| *counter > 0)
66+
.map(|(i, counter)| format!("{:?}: {}", abi::UsercallList::from_u64(i as _), counter))
67+
.collect::<Vec<_>>()
68+
.join(", ");
69+
70+
if counts.is_empty() {
71+
counts = "None".to_owned();
72+
}
73+
74+
writeln!(out, "Async usercall counts: {}", counts)?;
75+
writeln!(out, "Sync usercall count mappings:")?;
76+
for (addr, stats) in TCS_MAPPINGS.lock().map_err(|_| FmtError)?.iter() {
77+
if stats.should_print() {
78+
writeln!(out, "Address: 0x{:0>16x}", addr)?;
79+
write!(out, "{}", stats.format()?)?;
80+
}
81+
}
82+
83+
Ok(out)
84+
}
85+
}
86+
87+
fn mask_to_str(ev: usize) -> String {
88+
let mut events = vec!();
89+
let ev = ev as u64;
90+
if ev & EV_CANCELQ_NOT_FULL != 0 {
91+
events.push("CANCELQ_NOT_FULL");
92+
}
93+
if ev & EV_RETURNQ_NOT_EMPTY != 0 {
94+
events.push("RETURNQ_NOT_EMPTY");
95+
}
96+
if ev & EV_USERCALLQ_NOT_FULL != 0 {
97+
events.push("USERCALLQ_NOT_FULL");
98+
}
99+
if ev & EV_UNPARK != 0 {
100+
events.push("UNPARK");
101+
}
102+
if events.is_empty() {
103+
events.push("NONE");
104+
}
105+
events.join(" | ")
106+
}
107+
108+
pub(crate) fn record_usercall(
109+
tcs_address: Option<TcsAddress>,
110+
p1: u64,
111+
p2: u64,
112+
p3: u64
113+
) {
114+
// Map sync usercalls to the TCS that made them
115+
if let Some(tcs_address) = tcs_address {
116+
let mut mappings = TCS_MAPPINGS.lock().expect("poisoned mutex");
117+
let entry = mappings.entry(tcs_address.0).or_default();
118+
// type
119+
entry.sync_calls[p1 as usize] += 1;
120+
if p1 == 11 {
121+
// waits
122+
let mask = &mut entry.waits[p2 as usize];
123+
match p3 {
124+
WAIT_NO => mask.0 += 1,
125+
WAIT_INDEFINITE => mask.1 += 1,
126+
_ => mask.2 += 1,
127+
}
128+
} else if p1 == 12 {
129+
// sends
130+
entry.sends[p2 as usize] += 1; // event mask
131+
}
132+
} else {
133+
// For async calls where we don't know the TCS, just store aggregates
134+
USERCALL_COUNTERS[p1 as usize].fetch_add(1, Ordering::Relaxed);
135+
}
136+
}
137+
138+
impl TcsStats {
139+
fn should_print(&self) -> bool {
140+
self.sync_calls.iter().sum::<usize>() > 10
141+
}
142+
143+
pub fn format(&self) -> StdResult<String, FmtError> {
144+
let mut out = String::new();
145+
writeln!(out,
146+
" Sync Totals: {}",
147+
self.sync_calls.iter()
148+
.enumerate()
149+
.filter(|(_, cnt)| **cnt > 0)
150+
.map(|(idx, cnt)| {
151+
format!("{:?}: {}", abi::UsercallList::from_u64(idx as u64), cnt)
152+
})
153+
.collect::<Vec<_>>()
154+
.join(", ")
155+
)?;
156+
writeln!(out,
157+
" Wait Totals: {}",
158+
self.waits.iter()
159+
.enumerate()
160+
.filter(|(_, (a, b, c))| a + b + c > 0)
161+
.map(|(idx, cnt)| {
162+
let mut out = format!("{}: ", mask_to_str(idx));
163+
let mut masks = Vec::new();
164+
if cnt.0 > 0 {
165+
masks.push(format!("WAIT_NO: {}", cnt.0))
166+
}
167+
if cnt.1 > 0 {
168+
masks.push(format!("WAIT_INDEFINITE: {}", cnt.1));
169+
}
170+
if cnt.2 > 0 {
171+
masks.push(format!("OTHER: {}", cnt.2));
172+
}
173+
out.push_str(&masks.join(", "));
174+
out
175+
})
176+
.collect::<Vec<_>>()
177+
.join("\n ")
178+
)?;
179+
writeln!(out,
180+
" Send Totals: {}",
181+
self.sends.iter()
182+
.enumerate()
183+
.filter(|(_, cnt)| **cnt > 0)
184+
.map(|(idx, cnt)| {
185+
format!("{}: {}", mask_to_str(idx), cnt)
186+
})
187+
.collect::<Vec<_>>()
188+
.join(", ")
189+
)?;
190+
Ok(out)
191+
}
192+
}
193+
194+
pub fn get_stats() -> RunnerStats {
195+
let async_calls: [usize; 17] = USERCALL_COUNTERS.iter()
196+
.map(|c| c.load(Ordering::Relaxed))
197+
.collect::<Vec<_>>()
198+
.try_into()
199+
.unwrap();
200+
201+
assert!(async_calls.len() == 17);
202+
203+
let sync_calls = {
204+
TCS_MAPPINGS.lock().expect("poison error").clone()
205+
};
206+
207+
RunnerStats {
208+
sync_calls,
209+
async_calls,
210+
}
211+
}

intel-sgx/enclave-runner/src/usercalls/abi.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,23 @@ pub(super) trait ReturnValue {
3636
macro_rules! define_usercalls {
3737
// Using `$r:tt` because `$r:ty` doesn't match ! in `dispatch_return_type`
3838
($(fn $f:ident($($n:ident: $t:ty),*) $(-> $r:tt)*; )*) => {
39+
#[derive(Debug)]
3940
#[repr(C)]
4041
#[allow(non_camel_case_types)]
4142
pub(crate) enum UsercallList {
4243
__enclave_usercalls_invalid,
4344
$($f,)*
4445
}
4546

47+
impl UsercallList {
48+
pub(crate) fn from_u64(a: u64) -> Self {
49+
match a - 1 {
50+
$( ${index()} => UsercallList::$f, )*
51+
_ => Self::__enclave_usercalls_invalid,
52+
}
53+
}
54+
}
55+
4656
pub(super) trait Usercalls <'future>: Sized {
4757
$(fn $f (self, $($n: $t),*) -> dispatch_return_type!($(-> $r )* 'future);)*
4858
fn other(self, n: u64, a1: u64, a2: u64, a3: u64, a4: u64) -> (Self, DispatchResult) {

intel-sgx/enclave-runner/src/usercalls/mod.rs

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ use sgxs::loader::Tcs as SgxsTcs;
3737

3838
use crate::loader::{EnclavePanic, ErasedTcs};
3939
use crate::tcs::{self, CoResult, ThreadResult};
40+
use crate::stats::record_usercall;
4041
use self::abi::dispatch;
4142
use self::abi::ReturnValue;
4243
use self::abi::UsercallList;
@@ -56,9 +57,9 @@ lazy_static! {
5657
// is not public.
5758
const EV_ABORT: u64 = 0b0000_0000_0001_0000;
5859

59-
const USERCALL_QUEUE_SIZE: usize = 16;
60-
const RETURN_QUEUE_SIZE: usize = 1024;
61-
const CANCEL_QUEUE_SIZE: usize = USERCALL_QUEUE_SIZE * 2;
60+
const USERCALL_QUEUE_SIZE: usize = 32768;
61+
const RETURN_QUEUE_SIZE: usize = 32768;
62+
const CANCEL_QUEUE_SIZE: usize = USERCALL_QUEUE_SIZE;
6263

6364
enum UsercallSendData {
6465
Sync(ThreadResult<ErasedTcs>, RunningTcs, RefCell<[u8; 1024]>),
@@ -470,7 +471,7 @@ impl<T> EnclaveAbort<T> {
470471
}
471472

472473
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Ord, PartialOrd)]
473-
struct TcsAddress(usize);
474+
pub(crate) struct TcsAddress(pub usize);
474475

475476
impl ErasedTcs {
476477
fn address(&self) -> TcsAddress {
@@ -551,8 +552,16 @@ impl PendingEvents {
551552
}
552553

553554
let it = std::iter::once((EV_ABORT, &self.abort))
554-
.chain(self.counts.iter().enumerate().map(|(ev, sem)| (ev as u64, sem)).filter(|&(ev, _)| ev & event_mask != 0))
555-
.map(|(ev, sem)| sem.acquire().map(move |permit| (ev, permit)).boxed());
555+
.chain(
556+
self.counts.iter()
557+
.enumerate()
558+
.map(|(ev, sem)| (ev as u64, sem))
559+
.filter(|&(ev, _)| ev & event_mask != 0)
560+
).map(|(ev, sem)| {
561+
sem.acquire()
562+
.map(move |permit| (ev, permit))
563+
.boxed()
564+
});
556565

557566
let ((ev, permit), _, _) = futures::future::select_all(it).await;
558567

@@ -773,14 +782,16 @@ impl EnclaveState {
773782
UsercallHandleData::Async(_, ref mut notifier_rx, _) => notifier_rx.take(),
774783
_ => None,
775784
};
776-
let (parameters, mode, tcs) = match handle_data {
785+
let ((p1, p2, p3, p4, p5), mode, tcs) = match handle_data {
777786
UsercallHandleData::Sync(ref usercall, ref mut tcs, _) => (usercall.parameters(), tcs.mode.into(), Some(tcs)),
778787
UsercallHandleData::Async(ref usercall, _, _) => (usercall.data.into(), ReturnSource::AsyncUsercall, None),
779788
};
789+
790+
record_usercall(tcs.as_ref().map(|tcs| tcs.tcs_address), p1, p2, p3);
791+
780792
let mut input = IOHandlerInput { enclave: enclave.clone(), tcs, work_sender: &work_sender };
781793
let handler = Handler(&mut input);
782794
let result = {
783-
let (p1, p2, p3, p4, p5) = parameters;
784795
match notifier_rx {
785796
None => dispatch(handler, p1, p2, p3, p4, p5).await.1,
786797
Some(notifier_rx) => {
@@ -932,7 +943,10 @@ impl EnclaveState {
932943
None
933944
} else {
934945
let (notifier_tx, notifier_rx) = oneshot::channel();
935-
usercall_event_tx_clone.send(UsercallEvent::Started(usercall.id, notifier_tx)).ok().expect("failed to send usercall event");
946+
usercall_event_tx_clone
947+
.send(UsercallEvent::Started(usercall.id, notifier_tx))
948+
.ok()
949+
.expect("failed to send usercall event");
936950
Some(notifier_rx)
937951
};
938952
let _ = io_queue_send.send(UsercallSendData::Async(usercall, notifier_rx));
@@ -1068,15 +1082,24 @@ impl EnclaveState {
10681082
.send(start_work)
10691083
.expect("Work sender couldn't send data to receiver");
10701084

1071-
let join_handlers =
1072-
create_worker_threads(num_of_worker_threads, work_receiver, io_queue_send.clone());
1085+
let join_handles = create_worker_threads(
1086+
num_of_worker_threads,
1087+
work_receiver,
1088+
io_queue_send.clone()
1089+
);
1090+
10731091
// main syscall polling loop
1074-
let main_result =
1075-
EnclaveState::syscall_loop(enclave.clone(), io_queue_receive, io_queue_send, work_sender);
1092+
let main_result = EnclaveState::syscall_loop(
1093+
enclave.clone(),
1094+
io_queue_receive,
1095+
io_queue_send,
1096+
work_sender
1097+
);
10761098

1077-
for handler in join_handlers {
1078-
let _ = handler.join();
1099+
for handle in join_handles {
1100+
let _ = handle.join();
10791101
}
1102+
10801103
return main_result;
10811104
}
10821105

0 commit comments

Comments
 (0)