Skip to content

Commit 3cca8c3

Browse files
committed
Report std as it's more common, clarify
1 parent 4283a02 commit 3cca8c3

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

README.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ For evaluation, run::
6565

6666
python3 evaluation.py
6767

68+
We report precision, recall, F1, accuracy and their standard deviation estimated with bootstrap.
69+
Please refer to the technical report for more details.
70+
6871
License
6972
-------
7073

evaluate.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ def main():
2626
name = path.stem
2727
metrics = evaluate(ground_truth, load_json(path), args.n_bootstrap)
2828
print('{name:<20} '
29-
'precision={precision:.3f} ± {precision_ci:.3f} '
30-
'recall={recall:.3f} ± {recall_ci:.3f} '
31-
'F1={f1:.3f} ± {f1_ci:.3f} '
32-
'accuracy={accuracy:.3f} ± {accuracy_ci:.3f} '
29+
'precision={precision:.3f} ± {precision_std:.3f} '
30+
'recall={recall:.3f} ± {recall_std:.3f} '
31+
'F1={f1:.3f} ± {f1_std:.3f} '
32+
'accuracy={accuracy:.3f} ± {accuracy_std:.3f} '
3333
.format(name=name, **metrics))
3434
metrics_by_name[name] = metrics
3535

@@ -78,7 +78,7 @@ def evaluate(
7878
b_values.setdefault('accuracy', []).append(
7979
statistics.mean([accuracies[i] for i in indices]))
8080
for key, values in sorted(b_values.items()):
81-
metrics[f'{key}_ci'] = 1.96 * statistics.stdev(values)
81+
metrics[f'{key}_std'] = statistics.stdev(values)
8282

8383
return metrics
8484

@@ -95,8 +95,8 @@ def print_metrics_diff(tp_fp_fns, other_tp_fp_fns, n_bootstrap):
9595
diffs.setdefault(key, []).append(metrics[key] - other_metrics[key])
9696
for key, values in sorted(diffs.items()):
9797
mean = statistics.mean(values)
98-
confidence_interval = 1.96 * statistics.stdev(values)
99-
print(f'{key:<10} {mean:.3f} ± {confidence_interval:.3f}')
98+
std = statistics.stdev(values)
99+
print(f'{key:<10} {mean:.3f} ± {std:.3f}')
100100

101101

102102
TP_FP_FN = Tuple[float, float, float]

0 commit comments

Comments
 (0)