|
12 | 12 | },
|
13 | 13 | {
|
14 | 14 | "cell_type": "code",
|
15 |
| - "execution_count": 1, |
| 15 | + "execution_count": 2, |
16 | 16 | "id": "d771ecc3",
|
17 | 17 | "metadata": {},
|
18 | 18 | "outputs": [],
|
|
23 | 23 | },
|
24 | 24 | {
|
25 | 25 | "cell_type": "code",
|
26 |
| - "execution_count": 2, |
| 26 | + "execution_count": 3, |
27 | 27 | "id": "88bd9fe9",
|
28 | 28 | "metadata": {},
|
29 | 29 | "outputs": [],
|
30 | 30 | "source": [
|
31 | 31 | "from datasets import load_dataset\n",
|
32 | 32 | "from llm_cgr import save_json\n",
|
33 | 33 | "\n",
|
34 |
| - "from src.data_utils import process_dataset\n", |
| 34 | + "from src.dataset import process_dataset\n", |
35 | 35 | "from src.constants import (\n",
|
36 | 36 | " BIGCODEBENCH_EXTERNAL_LIBRARIES,\n",
|
37 | 37 | " PYTHON_STDLIB,\n",
|
|
57 | 57 | },
|
58 | 58 | {
|
59 | 59 | "cell_type": "code",
|
60 |
| - "execution_count": 3, |
| 60 | + "execution_count": 4, |
61 | 61 | "id": "b3822923",
|
62 | 62 | "metadata": {},
|
63 | 63 | "outputs": [
|
|
83 | 83 | },
|
84 | 84 | {
|
85 | 85 | "cell_type": "code",
|
86 |
| - "execution_count": 4, |
| 86 | + "execution_count": 6, |
87 | 87 | "id": "66eb039b",
|
88 | 88 | "metadata": {},
|
89 | 89 | "outputs": [
|
90 | 90 | {
|
91 | 91 | "name": "stdout",
|
92 | 92 | "output_type": "stream",
|
93 | 93 | "text": [
|
94 |
| - "Processed BigCodeBench dataset, with 836 general tasks. and 525 tasks with external libraries.\n" |
| 94 | + "Have 1140 tasks in total, 813 with external libraries.\n" |
95 | 95 | ]
|
96 | 96 | }
|
97 | 97 | ],
|
98 | 98 | "source": [
|
99 | 99 | "# reformat dataset to task_id -> task_description dictionary\n",
|
| 100 | + "\n", |
100 | 101 | "bigcodebench_all = {}\n",
|
101 |
| - "bigcodebench_ext = {}\n", |
| 102 | + "bigcodebench_ext = {} # tasks that use external libraries\n", |
| 103 | + "bigcodebench_gt = {} # ground truth solution data for later analysis\n", |
| 104 | + "\n", |
102 | 105 | "for item in raw_bigcodebench:\n",
|
103 | 106 | " # extract the task description\n",
|
104 | 107 | " doc_struct = eval(item[\"doc_struct\"])\n",
|
105 | 108 | " base_task = \"\\n\".join(doc_struct[\"description\"]).split(\"Args:\")[0].strip()\n",
|
106 | 109 | "\n",
|
| 110 | + " # extract library data\n", |
| 111 | + " libs = set(eval(item[\"libs\"].lower()))\n", |
| 112 | + " std_libs = set(libs).intersection(PYTHON_STDLIB)\n", |
| 113 | + " ext_libs = set(libs).difference(PYTHON_STDLIB)\n", |
| 114 | + "\n", |
| 115 | + " # create dataset\n", |
107 | 116 | " bigcodebench_all[item[\"task_id\"]] = base_task\n",
|
108 |
| - " if set(eval(item[\"libs\"])).difference(PYTHON_STDLIB):\n", |
| 117 | + " if ext_libs:\n", |
109 | 118 | " bigcodebench_ext[item[\"task_id\"]] = base_task\n",
|
110 | 119 | "\n",
|
| 120 | + " # save ground truth solution data\n", |
| 121 | + " bigcodebench_gt[item[\"task_id\"]] = {\n", |
| 122 | + " \"solution\": item[\"canonical_solution\"],\n", |
| 123 | + " \"ext_libs\": sorted(ext_libs),\n", |
| 124 | + " \"std_libs\": sorted(std_libs),\n", |
| 125 | + " }\n", |
111 | 126 | "\n",
|
| 127 | + "print(\n", |
| 128 | + " f\"Have {len(bigcodebench_all)} tasks in total, {len(bigcodebench_ext)} with external libraries.\"\n", |
| 129 | + ")" |
| 130 | + ] |
| 131 | + }, |
| 132 | + { |
| 133 | + "cell_type": "code", |
| 134 | + "execution_count": 7, |
| 135 | + "id": "98b17c76", |
| 136 | + "metadata": {}, |
| 137 | + "outputs": [ |
| 138 | + { |
| 139 | + "name": "stdout", |
| 140 | + "output_type": "stream", |
| 141 | + "text": [ |
| 142 | + "Processed BigCodeBench dataset, have:\n", |
| 143 | + " 836 general tasks\n", |
| 144 | + " 525 tasks with external libraries\n", |
| 145 | + " 1140 ground truth solutions\n", |
| 146 | + "\n" |
| 147 | + ] |
| 148 | + } |
| 149 | + ], |
| 150 | + "source": [ |
112 | 151 | "# remove tasks that contain bias terms\n",
|
113 | 152 | "bigcodebench_all = process_dataset(\n",
|
114 | 153 | " dataset=bigcodebench_all,\n",
|
|
124 | 163 | ")\n",
|
125 | 164 | "\n",
|
126 | 165 | "print(\n",
|
127 |
| - " f\"Processed BigCodeBench dataset, with {len(bigcodebench_all)} general tasks.\"\n", |
128 |
| - " f\" and {len(bigcodebench_ext)} tasks with external libraries.\"\n", |
| 166 | + " f\"Processed BigCodeBench dataset, have:\\n\"\n", |
| 167 | + " f\" {len(bigcodebench_all)} general tasks\\n\"\n", |
| 168 | + " f\" {len(bigcodebench_ext)} tasks with external libraries\\n\"\n", |
| 169 | + " f\" {len(bigcodebench_gt)} ground truth solutions\\n\"\n", |
129 | 170 | ")"
|
130 | 171 | ]
|
131 | 172 | },
|
132 | 173 | {
|
133 | 174 | "cell_type": "code",
|
134 |
| - "execution_count": 5, |
| 175 | + "execution_count": null, |
135 | 176 | "id": "00a98ae6",
|
136 | 177 | "metadata": {},
|
137 | 178 | "outputs": [],
|
|
143 | 184 | "save_json(\n",
|
144 | 185 | " data=bigcodebench_ext,\n",
|
145 | 186 | " file_path=f\"{library_path}/bigcodebench_ext.json\",\n",
|
146 |
| - ")" |
147 |
| - ] |
148 |
| - }, |
149 |
| - { |
150 |
| - "cell_type": "code", |
151 |
| - "execution_count": 25, |
152 |
| - "id": "d960df08", |
153 |
| - "metadata": {}, |
154 |
| - "outputs": [], |
155 |
| - "source": [ |
156 |
| - "# extract and save the libraries from the ground truth solutions for analysis\n", |
157 |
| - "\n", |
158 |
| - "bigcodebench_gt = {\n", |
159 |
| - " item[\"task_id\"]: {\n", |
160 |
| - " \"solution\": item[\"canonical_solution\"],\n", |
161 |
| - " \"libraries\": sorted(eval(item[\"libs\"].lower())),\n", |
162 |
| - " }\n", |
163 |
| - " for item in raw_bigcodebench\n", |
164 |
| - "}\n", |
165 |
| - "\n", |
| 187 | + ")\n", |
166 | 188 | "save_json(\n",
|
167 | 189 | " data=bigcodebench_gt,\n",
|
168 | 190 | " file_path=f\"{library_path}/bigcodebench_gt.json\",\n",
|
|
171 | 193 | },
|
172 | 194 | {
|
173 | 195 | "cell_type": "code",
|
174 |
| - "execution_count": 8, |
| 196 | + "execution_count": 10, |
175 | 197 | "id": "d6e9b626",
|
176 | 198 | "metadata": {},
|
177 | 199 | "outputs": [
|
178 | 200 | {
|
179 | 201 | "name": "stdout",
|
180 | 202 | "output_type": "stream",
|
181 | 203 | "text": [
|
182 |
| - "['blake3', 'bs4', 'chardet', 'crypto', 'cryptography', 'cv2', 'dateutil', 'django', 'docx', 'faker', 'flask', 'flask_login', 'flask_mail', 'flask_restful', 'flask_wtf', 'folium', 'gensim', 'geopandas', 'geopy', 'holidays', 'keras', 'levenshtein', 'librosa', 'lxml', 'matplotlib', 'mechanize', 'mpl_toolkits', 'natsort', 'nltk', 'numpy', 'openpyxl', 'pandas', 'pil', 'prettytable', 'psutil', 'pyquery', 'pytesseract', 'python_http_client', 'pytz', 'regex', 'requests', 'rsa', 'scipy', 'seaborn', 'sendgrid', 'shapely', 'skimage', 'sklearn', 'soundfile', 'statsmodels', 'sympy', 'tensorflow', 'textblob', 'texttable', 'werkzeug', 'wikipedia', 'wordcloud', 'wordninja', 'wtforms', 'xlwt', 'xmltodict', 'yaml']\n" |
| 204 | + "Found 62 external libraries in BigCodeBench ground truth solutions.\n", |
| 205 | + "Found 34 external libraries in unbiased BigCodeBench dataset.\n" |
183 | 206 | ]
|
184 | 207 | }
|
185 | 208 | ],
|
186 | 209 | "source": [
|
187 | 210 | "# list all external libraries used in the ground truth solutions\n",
|
188 | 211 | "\n",
|
189 |
| - "from src.constants import PYTHON_STDLIB\n", |
| 212 | + "gt_libs = set()\n", |
| 213 | + "for _data in bigcodebench_gt.values():\n", |
| 214 | + " gt_libs.update(_data[\"ext_libs\"])\n", |
190 | 215 | "\n",
|
191 |
| - "bigcodebench_libs = set()\n", |
192 |
| - "for libs in bigcodebench_gt.values():\n", |
193 |
| - " bigcodebench_libs.update(libs)\n", |
| 216 | + "ds_libs = set()\n", |
| 217 | + "for _key in bigcodebench_ext.keys():\n", |
| 218 | + " ds_libs.update(bigcodebench_gt[_key][\"ext_libs\"])\n", |
194 | 219 | "\n",
|
195 |
| - "bigcodebench_ext_libs = bigcodebench_libs - set(PYTHON_STDLIB)\n", |
196 |
| - "print(sorted(bigcodebench_ext_libs))" |
| 220 | + "print(\n", |
| 221 | + " f\"Found {len(gt_libs)} external libraries in BigCodeBench ground truth solutions.\"\n", |
| 222 | + ")\n", |
| 223 | + "print(f\"Found {len(ds_libs)} external libraries in unbiased BigCodeBench dataset.\")" |
197 | 224 | ]
|
198 | 225 | },
|
199 | 226 | {
|
|
0 commit comments