Skip to content

Commit 42ccc00

Browse files
committed
fix cache_file_name docstring to make it explicit that it is a path
1 parent e36d466 commit 42ccc00

File tree

2 files changed

+14
-14
lines changed

2 files changed

+14
-14
lines changed

src/datasets/arrow_dataset.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,7 +1163,7 @@ def map(
11631163
keep_in_memory (`bool`, defaults to `False`): Keep the dataset in memory instead of writing it to a cache file.
11641164
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the current computation from `function`
11651165
can be identified, use it instead of recomputing.
1166-
cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a cache file to use to store the
1166+
cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
11671167
results of the computation instead of the automatically generated cache file name.
11681168
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
11691169
Higher value gives smaller cache files, lower value consume less temporary memory while running `.map()`.
@@ -1369,7 +1369,7 @@ def _map_single(
13691369
keep_in_memory (`bool`, defaults to `False`): Keep the dataset in memory instead of writing it to a cache file.
13701370
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the current computation from `function`
13711371
can be identified, use it instead of recomputing.
1372-
cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a cache file to use to store the
1372+
cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
13731373
results of the computation instead of the automatically generated cache file name.
13741374
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
13751375
Higher value gives smaller cache files, lower value consume less temporary memory while running `.map()`.
@@ -1590,7 +1590,7 @@ def filter(
15901590
keep_in_memory (`bool`, defaults to `False`): Keep the dataset in memory instead of writing it to a cache file.
15911591
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the current computation from `function`
15921592
can be identified, use it instead of recomputing.
1593-
cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a cache file to use to store the
1593+
cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
15941594
results of the computation instead of the automatically generated cache file name.
15951595
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
15961596
Higher value gives smaller cache files, lower value consume less temporary memory while running `.map()`.
@@ -1660,7 +1660,7 @@ def flatten_indices(
16601660
16611661
Args:
16621662
keep_in_memory (`bool`, default: `False`): Keep the dataset in memory instead of writing it to a cache file.
1663-
cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a cache file to use to store the
1663+
cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
16641664
results of the computation instead of the automatically generated cache file name.
16651665
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
16661666
Higher value gives smaller cache files, lower value consume less temporary memory while running `.map()`.
@@ -1736,7 +1736,7 @@ def select(
17361736
Args:
17371737
`indices` (sequence, iterable, ndarray or Series): List or 1D-array of integer indices for indexing.
17381738
`keep_in_memory` (`bool`, default: `False`): Keep the indices mapping in memory instead of writing it to a cache file.
1739-
`indices_cache_file_name` (`Optional[str]`, default: `None`): Provide the name of a cache file to use to store the
1739+
`indices_cache_file_name` (`Optional[str]`, default: `None`): Provide the name of a path for the cache file. It is used to store the
17401740
indices mapping instead of the automatically generated cache file name.
17411741
`writer_batch_size` (`int`, default: `1000`): Number of rows per write operation for the cache file writer.
17421742
Higher value gives smaller cache files, lower value consume less temporary memory while running `.map()`.
@@ -1830,7 +1830,7 @@ def sort(
18301830
keep_in_memory (`bool`, defaults to `False`): Keep the sorted indices in memory instead of writing it to a cache file.
18311831
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the sorted indices
18321832
can be identified, use it instead of recomputing.
1833-
indices_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a cache file to use to store the
1833+
indices_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
18341834
sorted indices instead of the automatically generated cache file name.
18351835
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
18361836
Higher value gives smaller cache files, lower value consume less temporary memory.
@@ -1906,7 +1906,7 @@ def shuffle(
19061906
keep_in_memory (`bool`, defaults to `False`): Keep the shuffled indices in memory instead of writing it to a cache file.
19071907
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the shuffled indices
19081908
can be identified, use it instead of recomputing.
1909-
indices_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a cache file to use to store the
1909+
indices_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
19101910
shuffled indices instead of the automatically generated cache file name.
19111911
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
19121912
Higher value gives smaller cache files, lower value consume less temporary memory while running `.map()`.
@@ -1998,9 +1998,9 @@ def train_test_split(
19981998
keep_in_memory (`bool`, defaults to `False`): Keep the splits indices in memory instead of writing it to a cache file.
19991999
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the splits indices
20002000
can be identified, use it instead of recomputing.
2001-
train_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a cache file to use to store the
2001+
train_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
20022002
train split indices instead of the automatically generated cache file name.
2003-
test_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a cache file to use to store the
2003+
test_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
20042004
test split indices instead of the automatically generated cache file name.
20052005
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
20062006
Higher value gives smaller cache files, lower value consume less temporary memory while running `.map()`.
@@ -2183,7 +2183,7 @@ def shard(
21832183
keep_in_memory (`bool`, defaults to `False`): Keep the dataset in memory instead of writing it to a cache file.
21842184
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the current computation from `function`
21852185
can be identified, use it instead of recomputing.
2186-
indices_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a cache file to use to store the
2186+
indices_cache_file_name (`Optional[str]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
21872187
indices of each shard instead of the automatically generated cache file name.
21882188
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
21892189
Higher value gives smaller cache files, lower value consume less temporary memory while running `.map()`.

src/datasets/dataset_dict.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def map(
267267
keep_in_memory (`bool`, defaults to `False`): Keep the dataset in memory instead of writing it to a cache file.
268268
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the current computation from `function`
269269
can be identified, use it instead of recomputing.
270-
cache_file_names (`Optional[Dict[str, str]]`, defaults to `None`): Provide the name of a cache file to use to store the
270+
cache_file_names (`Optional[Dict[str, str]]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
271271
results of the computation instead of the automatically generated cache file name.
272272
You have to provide one :obj:`cache_file_name` per dataset in the dataset dictionary.
273273
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
@@ -337,7 +337,7 @@ def filter(
337337
keep_in_memory (`bool`, defaults to `False`): Keep the dataset in memory instead of writing it to a cache file.
338338
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the current computation from `function`
339339
can be identified, use it instead of recomputing.
340-
cache_file_names (`Optional[Dict[str, str]]`, defaults to `None`): Provide the name of a cache file to use to store the
340+
cache_file_names (`Optional[Dict[str, str]]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
341341
results of the computation instead of the automatically generated cache file name.
342342
You have to provide one :obj:`cache_file_name` per dataset in the dataset dictionary.
343343
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
@@ -394,7 +394,7 @@ def sort(
394394
keep_in_memory (`bool`, defaults to `False`): Keep the dataset in memory instead of writing it to a cache file.
395395
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the current computation from `function`
396396
can be identified, use it instead of recomputing.
397-
indices_cache_file_names (`Optional[Dict[str, str]]`, defaults to `None`): Provide the name of a cache file to use to store the
397+
indices_cache_file_names (`Optional[Dict[str, str]]`, defaults to `None`): Provide the name of a path for the cache file. It is used to store the
398398
indices mapping instead of the automatically generated cache file name.
399399
You have to provide one :obj:`cache_file_name` per dataset in the dataset dictionary.
400400
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.
@@ -446,7 +446,7 @@ def shuffle(
446446
keep_in_memory (`bool`, defaults to `False`): Keep the dataset in memory instead of writing it to a cache file.
447447
load_from_cache_file (`bool`, defaults to `True`): If a cache file storing the current computation from `function`
448448
can be identified, use it instead of recomputing.
449-
indices_cache_file_names (`Optional[Dict[str, str]]`, default: `None`): Provide the name of a cache file to use to store the
449+
indices_cache_file_names (`Optional[Dict[str, str]]`, default: `None`): Provide the name of a path for the cache file. It is used to store the
450450
indices mappings instead of the automatically generated cache file name.
451451
You have to provide one :obj:`cache_file_name` per dataset in the dataset dictionary.
452452
writer_batch_size (`int`, defaults to `1000`): Number of rows per write operation for the cache file writer.

0 commit comments

Comments
 (0)