pytorch · pmeier · May 23, 2022 · May 23, 2022 · May 23, 2022 · May 23, 2022
diff --git a/.github/workflows/prototype-tests.yml b/.github/workflows/prototype-tests.yml
@@ -7,10 +7,18 @@ jobs:
   prototype:
     strategy:
       matrix:
+        python-version:
+          - "3.7"
+          - "3.8"
+          - "3.9"
+          - "3.10"
         os:
           - ubuntu-latest
-          - windows-latest
-          - macos-latest
+        include:
+          - python-version: "3.7"
+            os: windows-latest
+          - python-version: "3.7"
+            os: macos-latest
       fail-fast: false
 
     runs-on: ${{ matrix.os }}
@@ -19,7 +27,7 @@ jobs:
       - name: Set up python
         uses: actions/setup-python@v3
         with:
-          python-version: 3.7
+          python-version: ${{ matrix.python-version }}
 
       - name: Upgrade system packages
         run: python -m pip install --upgrade pip setuptools wheel

diff --git a/torchvision/prototype/datasets/_builtin/cub200.py b/torchvision/prototype/datasets/_builtin/cub200.py
@@ -107,10 +107,8 @@ def _2011_classify_archive(self, data: Tuple[str, Any]) -> Optional[int]:
             return 0
         elif path.name == "train_test_split.txt":
             return 1
-        elif path.name == "images.txt":
-            return 2
         elif path.name == "bounding_boxes.txt":
-            return 3
+            return 2
         else:
             return None
 
@@ -180,15 +178,17 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
         prepare_ann_fn: Callable
         if self._year == "2011":
             archive_dp, segmentations_dp = resource_dps
-            images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer(
-                archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
-            )
 
+            image_files_dp = Filter(archive_dp, path_comparator("name", "images.txt"))
             image_files_dp = CSVParser(image_files_dp, dialect="cub200")
             image_files_map = dict(
                 (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp
             )
 
+            images_dp, split_dp, bounding_boxes_dp = Demultiplexer(
+                archive_dp, 3, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
+            )
+
             split_dp = CSVParser(split_dp, dialect="cub200")
             split_dp = Filter(split_dp, self._2011_filter_split)
             split_dp = Mapper(split_dp, getitem(0))

diff --git a/torchvision/prototype/datasets/_builtin/imagenet.py b/torchvision/prototype/datasets/_builtin/imagenet.py
@@ -1,4 +1,3 @@
-import enum
 import functools
 import pathlib
 import re
@@ -10,7 +9,6 @@
     IterKeyZipper,
     Mapper,
     Filter,
-    Demultiplexer,
     TarArchiveLoader,
     Enumerator,
 )
@@ -27,6 +25,7 @@
     hint_shuffling,
     read_categories_file,
     path_accessor,
+    path_comparator,
 )
 from torchvision.prototype.features import Label, EncodedImage
 
@@ -46,9 +45,9 @@ def __init__(self, **kwargs: Any) -> None:
         super().__init__("Register on https://image-net.org/ and follow the instructions there.", **kwargs)
 
 
-class ImageNetDemux(enum.IntEnum):
-    META = 0
-    LABEL = 1
+# class ImageNetDemux(enum.IntEnum):
+#     META = 0
+#     LABEL = 1
 
 
 @register_dataset(NAME)
@@ -108,12 +107,6 @@ def _prepare_train_data(self, data: Tuple[str, BinaryIO]) -> Tuple[Tuple[Label,
     def _prepare_test_data(self, data: Tuple[str, BinaryIO]) -> Tuple[None, Tuple[str, BinaryIO]]:
         return None, data
 
-    def _classifiy_devkit(self, data: Tuple[str, BinaryIO]) -> Optional[int]:
-        return {
-            "meta.mat": ImageNetDemux.META,
-            "ILSVRC2012_validation_ground_truth.txt": ImageNetDemux.LABEL,
-        }.get(pathlib.Path(data[0]).name)
-
     # Although the WordNet IDs (wnids) are unique, the corresponding categories are not. For example, both n02012849
     # and n03126707 are labeled 'crane' while the first means the bird and the latter means the construction equipment
     _WNID_MAP = {
@@ -172,13 +165,11 @@ def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str,
         else:  # config.split == "val":
             images_dp, devkit_dp = resource_dps
 
-            meta_dp, label_dp = Demultiplexer(
-                devkit_dp, 2, self._classifiy_devkit, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
-            )
-
+            meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
             meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
-            _, wnids = zip(*next(iter(meta_dp)))
+            _, wnids = zip(*list(meta_dp)[0])
 
+            label_dp = Filter(devkit_dp, path_comparator("name", "ILSVRC2012_validation_ground_truth.txt"))
             label_dp = LineReader(label_dp, decode=True, return_path=False)
             # We cannot use self._wnids here, since we use a different order than the dataset
             label_dp = Mapper(label_dp, functools.partial(self._imagenet_label_to_wnid, wnids=wnids))
@@ -204,15 +195,12 @@ def __len__(self) -> int:
             "test": 100_000,
         }[self._split]
 
-    def _filter_meta(self, data: Tuple[str, Any]) -> bool:
-        return self._classifiy_devkit(data) == ImageNetDemux.META
-
     def _generate_categories(self) -> List[Tuple[str, ...]]:
         self._split = "val"
         resources = self._resources()
 
         devkit_dp = resources[1].load(self._root)
-        meta_dp = Filter(devkit_dp, self._filter_meta)
+        meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
         meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
 
         categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp)))