Skip to content

Commit 10819d0

Browse files
authored
Merge pull request #358 from ASUS-AICS/revert-347-concurent
Revert "Speed Up Tokenization Through Multiprocessing"
2 parents df52157 + 115ac65 commit 10819d0

File tree

1 file changed

+1
-4
lines changed

1 file changed

+1
-4
lines changed

libmultilabel/nn/data_utils.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import gc
33
import logging
44
import warnings
5-
from concurrent.futures import ProcessPoolExecutor
65

76
import pandas as pd
87
import torch
@@ -176,9 +175,7 @@ def _load_raw_data(data, is_test=False, tokenize_text=True, remove_no_label_data
176175

177176
data["label"] = data["label"].astype(str).map(lambda s: s.split())
178177
if tokenize_text:
179-
# multiprocessing requires serializable objects
180-
with ProcessPoolExecutor() as executor:
181-
data["text"] = pd.Series(tqdm(executor.map(tokenize, data["text"]), total=len(data["text"])))
178+
data["text"] = data["text"].map(tokenize)
182179
data = data.to_dict("records")
183180
if not is_test:
184181
num_no_label_data = sum(1 for d in data if len(d["label"]) == 0)

0 commit comments

Comments
 (0)