Skip to content

Commit cd0e001

Browse files
committed
Update Label_Stats to avoid creating unnecessary diffs in CSV output
1 parent bc04a0c commit cd0e001

File tree

1 file changed

+18
-6
lines changed

1 file changed

+18
-6
lines changed

scripts/Label_Stats.ipynb

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@
5151
"_CONLL_4_NOT_IN_GOLD_FILE = os.path.join(data_dir, \"CoNLL_4_not_in_gold.csv\")\n",
5252
"_CONLL_4_TRAIN_NOT_IN_GOLD_FILE = os.path.join(data_dir, \"CoNLL_4_train_not_in_gold.csv\")\n",
5353
"\n",
54-
"# Optionally output figures generated\n",
55-
"save_figures = True\n",
54+
"# Change this constant to True to regenerate PDF/EPS/PNG files with figures.\n",
55+
"save_figures = False\n",
5656
"figure_dir = os.path.join(data_dir, \"label_stats_images\")\n",
5757
"if save_figures and not os.path.exists(figure_dir):\n",
5858
" os.mkdir(figure_dir)"
@@ -8655,10 +8655,22 @@
86558655
"write_file.loc[:, \"hand_labelled\"] = write_file[\"agreeing_models\"].isna()\n",
86568656
"write_file = write_file[write_file.error_type != \"None\"]\n",
86578657
"\n",
8658-
"write_file.rename(columns={\"conll_2\": \"Original entrants ensemble\", \"conll_3\": \"custom models ensemble\", \"conll_4\": \"cross validation ensemble\"}, inplace=True)\n",
8658+
"write_file = write_file.rename(columns={\"conll_2\": \"Original entrants ensemble\", \n",
8659+
" \"conll_3\": \"Custom models ensemble\", \n",
8660+
" \"conll_4\": \"Cross validation ensemble\"})\n",
86598661
"\n",
8660-
"write_file.to_csv(ALL_LABELS_OUTPUT_FILE_NAME)\n",
8661-
"print(\"Done\")"
8662+
"# Drop \"agreeing_models\" and boolean columns to make diffs smaller.\n",
8663+
"write_file = write_file.drop(columns=[\"agreeing_models\",\n",
8664+
" \"hand_labelled\",\n",
8665+
" \"Original entrants ensemble\",\n",
8666+
" \"Custom models ensemble\",\n",
8667+
" \"Cross validation ensemble\"])\n",
8668+
"\n",
8669+
"# TODO: Sort so that order is consistent across Python versions.\n",
8670+
"#write_file = write_file.sort_values([\"fold\", \"doc_offset\", \"corpus_span\", \"corpus_ent_type\", \"error_type\"])\n",
8671+
"\n",
8672+
"write_file.to_csv(ALL_LABELS_OUTPUT_FILE_NAME, index=False)\n",
8673+
"print(f\"Wrote merged labels to {ALL_LABELS_OUTPUT_FILE_NAME}\")"
86628674
]
86638675
},
86648676
{
@@ -10189,7 +10201,7 @@
1018910201
"name": "python",
1019010202
"nbconvert_exporter": "python",
1019110203
"pygments_lexer": "ipython3",
10192-
"version": "3.8.6"
10204+
"version": "3.8.5"
1019310205
}
1019410206
},
1019510207
"nbformat": 4,

0 commit comments

Comments
 (0)