@@ -26,10 +26,10 @@ def main():
26
26
name = path .stem
27
27
metrics = evaluate (ground_truth , load_json (path ), args .n_bootstrap )
28
28
print ('{name:<20} '
29
- 'precision={precision:.3f} ± {precision_ci :.3f} '
30
- 'recall={recall:.3f} ± {recall_ci :.3f} '
31
- 'F1={f1:.3f} ± {f1_ci :.3f} '
32
- 'accuracy={accuracy:.3f} ± {accuracy_ci :.3f} '
29
+ 'precision={precision:.3f} ± {precision_std :.3f} '
30
+ 'recall={recall:.3f} ± {recall_std :.3f} '
31
+ 'F1={f1:.3f} ± {f1_std :.3f} '
32
+ 'accuracy={accuracy:.3f} ± {accuracy_std :.3f} '
33
33
.format (name = name , ** metrics ))
34
34
metrics_by_name [name ] = metrics
35
35
@@ -78,7 +78,7 @@ def evaluate(
78
78
b_values .setdefault ('accuracy' , []).append (
79
79
statistics .mean ([accuracies [i ] for i in indices ]))
80
80
for key , values in sorted (b_values .items ()):
81
- metrics [f'{ key } _ci ' ] = 1.96 * statistics .stdev (values )
81
+ metrics [f'{ key } _std ' ] = statistics .stdev (values )
82
82
83
83
return metrics
84
84
@@ -95,8 +95,8 @@ def print_metrics_diff(tp_fp_fns, other_tp_fp_fns, n_bootstrap):
95
95
diffs .setdefault (key , []).append (metrics [key ] - other_metrics [key ])
96
96
for key , values in sorted (diffs .items ()):
97
97
mean = statistics .mean (values )
98
- confidence_interval = 1.96 * statistics .stdev (values )
99
- print (f'{ key :<10} { mean :.3f} ± { confidence_interval :.3f} ' )
98
+ std = statistics .stdev (values )
99
+ print (f'{ key :<10} { mean :.3f} ± { std :.3f} ' )
100
100
101
101
102
102
TP_FP_FN = Tuple [float , float , float ]
0 commit comments