Skip to content

Commit f812e52

Browse files
Myle Ottfacebook-github-bot
authored andcommitted
Rename data.transforms -> data.encoders
Summary: Pull Request resolved: fairinternal/fairseq-py#747 Differential Revision: D16403464 Pulled By: myleott fbshipit-source-id: ee3b4184f129a02be833c7bdc00685978b4de883
1 parent 69d0f7f commit f812e52

File tree

9 files changed

+14
-14
lines changed

9 files changed

+14
-14
lines changed

fairseq/data/transforms/__init__.py renamed to fairseq/data/encoders/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
)
2525

2626

27-
# automatically import any Python files in the transforms/ directory
27+
# automatically import any Python files in the encoders/ directory
2828
for file in os.listdir(os.path.dirname(__file__)):
2929
if file.endswith('.py') and not file.startswith('_'):
3030
module = file[:file.find('.py')]
31-
importlib.import_module('fairseq.data.transforms.' + module)
31+
importlib.import_module('fairseq.data.encoders.' + module)

fairseq/data/transforms/gpt2_bpe.py renamed to fairseq/data/encoders/gpt2_bpe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# can be found in the PATENTS file in the same directory.
77

88
from fairseq import file_utils
9-
from fairseq.data.transforms import register_bpe
9+
from fairseq.data.encoders import register_bpe
1010

1111

1212
@register_bpe('gpt2')

fairseq/data/transforms/moses_tokenizer.py renamed to fairseq/data/encoders/moses_tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# the root directory of this source tree. An additional grant of patent rights
66
# can be found in the PATENTS file in the same directory.
77

8-
from fairseq.data.transforms import register_tokenizer
8+
from fairseq.data.encoders import register_tokenizer
99

1010

1111
@register_tokenizer('moses')

fairseq/data/transforms/nltk_tokenizer.py renamed to fairseq/data/encoders/nltk_tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# the root directory of this source tree. An additional grant of patent rights
66
# can be found in the PATENTS file in the same directory.
77

8-
from fairseq.data.transforms import register_tokenizer
8+
from fairseq.data.encoders import register_tokenizer
99

1010

1111
@register_tokenizer('nltk')

fairseq/data/transforms/sentencepiece_bpe.py renamed to fairseq/data/encoders/sentencepiece_bpe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# can be found in the PATENTS file in the same directory.
77

88
from fairseq import file_utils
9-
from fairseq.data.transforms import register_bpe
9+
from fairseq.data.encoders import register_bpe
1010

1111

1212
@register_bpe('sentencepiece')

fairseq/data/transforms/space_tokenizer.py renamed to fairseq/data/encoders/space_tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import re
99

10-
from fairseq.data.transforms import register_tokenizer
10+
from fairseq.data.encoders import register_tokenizer
1111

1212

1313
@register_tokenizer('space')

fairseq/data/transforms/subword_nmt_bpe.py renamed to fairseq/data/encoders/subword_nmt_bpe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# can be found in the PATENTS file in the same directory.
77

88
from fairseq import file_utils
9-
from fairseq.data.transforms import register_bpe
9+
from fairseq.data.encoders import register_bpe
1010

1111

1212
@register_bpe('subword_nmt')

fairseq/hub_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import torch
1010

1111
from fairseq import utils
12-
from fairseq.data import transforms
12+
from fairseq.data import encoders
1313

1414

1515
class Generator(object):
@@ -44,8 +44,8 @@ def __init__(self, args, task, models):
4444
# (None if no unknown word replacement, empty if no path to align dictionary)
4545
self.align_dict = utils.load_align_dict(getattr(args, 'replace_unk', None))
4646

47-
self.tokenizer = transforms.build_tokenizer(args)
48-
self.bpe = transforms.build_bpe(args)
47+
self.tokenizer = encoders.build_tokenizer(args)
48+
self.bpe = encoders.build_bpe(args)
4949

5050
def generate(self, src_str, verbose=False):
5151

interactive.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import torch
1616

1717
from fairseq import checkpoint_utils, options, tasks, utils
18-
from fairseq.data import transforms
18+
from fairseq.data import encoders
1919

2020

2121
Batch = namedtuple('Batch', 'ids src_tokens src_lengths')
@@ -103,8 +103,8 @@ def main(args):
103103
generator = task.build_generator(args)
104104

105105
# Handle tokenization and BPE
106-
tokenizer = transforms.build_tokenizer(args)
107-
bpe = transforms.build_bpe(args)
106+
tokenizer = encoders.build_tokenizer(args)
107+
bpe = encoders.build_bpe(args)
108108

109109
def encode_fn(x):
110110
if tokenizer is not None:

0 commit comments

Comments
 (0)