Benchmark script fixes (#1301)

zphang · jeswan · web-flow · commit 65888b4b3658 · 2021-04-08T11:49:57.000-04:00
* fixes recommended by eritain

Co-authored-by: jeswan &lt;57466294+jeswan@users.noreply.github.com&gt;
diff --git a/guides/benchmarks/glue.md b/guides/benchmarks/glue.md
@@ -8,7 +8,7 @@
 python benchmark_submission_formatter.py \
     --benchmark GLUE \
     --input_base_path $INPUT_BASE_PATH \
-    --output_path $OUTPUT_BASE PATH
+    --output_path $OUTPUT_BASE_PATH
 ```
 
 where `$INPUT_BASE_PATH` contains the task folder(s) output by [runscript.py](https://github.com/jiant-dev/jiant/blob/master/jiant/proj/main/runscript.py). Alternatively, a subset of tasks can be formatted using:
@@ -18,5 +18,5 @@ python benchmark_submission_formatter.py \
     --benchmark GLUE \
     --tasks cola mrpc \
     --input_base_path $INPUT_BASE_PATH \
-    --output_path $OUTPUT_BASE PATH
+    --output_path $OUTPUT_BASE_PATH
 ```
diff --git a/guides/benchmarks/superglue.md b/guides/benchmarks/superglue.md
@@ -6,7 +6,7 @@
 python benchmark_submission_formatter.py \
     --benchmark SUPERGLUE \
     --input_base_path $INPUT_BASE_PATH \
-    --output_path $OUTPUT_BASE PATH
+    --output_path $OUTPUT_BASE_PATH
 ```
 
 where `$INPUT_BASE_PATH` contains the task folder(s) output by [runscript.py](https://github.com/nyu-mll/jiant/blob/master/jiant/proj/main/runscript.py). Alternatively, a subset of tasks can be formatted using:
@@ -16,5 +16,5 @@ python benchmark_submission_formatter.py \
     --benchmark SUPERGLUE \
     --tasks cola mrpc \
     --input_base_path $INPUT_BASE_PATH \
-    --output_path $OUTPUT_BASE PATH
+    --output_path $OUTPUT_BASE_PATH
 ```
diff --git a/jiant/scripts/benchmarks/benchmark_submission_formatter.py b/jiant/scripts/benchmarks/benchmark_submission_formatter.py
@@ -4,7 +4,7 @@
 import os
 import argparse
 
-from jiant.scripts.postproc.benchmarks import GlueBenchmark, SuperglueBenchmark
+from jiant.scripts.benchmarks.benchmarks import GlueBenchmark, SuperglueBenchmark
 
 
 SUPPORTED_BENCHMARKS = {"GLUE": GlueBenchmark, "SUPERGLUE": SuperglueBenchmark}
@@ -17,7 +17,7 @@ def main():
     parser.add_argument(
         "--input_base_path",
         required=True,
-        help="base input path of benchmark task predictions (contains the benchmark task folders)",
+        help="base path where per-task folders contain raw prediction files",
     )
     parser.add_argument("--output_path", required=True, help="output path for formatted files")
     parser.add_argument(
@@ -31,15 +31,15 @@ def main():
     benchmark = SUPPORTED_BENCHMARKS[args.benchmark]
 
     if args.tasks:
-        assert args.tasks in benchmark.TASKS
+        assert set(args.tasks) <= benchmark.TASKS
         task_names = args.tasks
     else:
         task_names = benchmark.TASKS
 
     for task_name in task_names:
         input_filepath = os.path.join(args.input_base_path, task_name, "test_preds.p")
         output_filepath = os.path.join(
-            args.output_path, benchmark.BENCHMARK_SUBMISSION_FILENAMES[task_name]
+            os.path.abspath(args.output_path), benchmark.BENCHMARK_SUBMISSION_FILENAMES[task_name]
         )
         benchmark.write_predictions(task_name, input_filepath, output_filepath)