-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
support chunks
in open_groups
and open_datatree
#9660
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
fe95b16
3bfbc3a
f4abb01
b0458aa
4dbd91e
11850fd
a71f5e2
6d3deed
7f770cf
05efaf6
f9fee40
2e10bdc
a4e99c6
3e8b80c
cf1a6b0
114c4dc
9eac19d
446a53d
5b36701
66616f7
843b2fc
8950841
4d93ada
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,7 +41,9 @@ | |
) | ||
from xarray.core.dataarray import DataArray | ||
from xarray.core.dataset import Dataset, _get_chunk, _maybe_chunk | ||
from xarray.core.datatree import DataTree | ||
from xarray.core.indexes import Index | ||
from xarray.core.treenode import group_subtrees | ||
from xarray.core.types import NetcdfWriteModes, ZarrWriteModes | ||
from xarray.core.utils import is_remote_uri | ||
from xarray.namedarray.daskmanager import DaskManager | ||
|
@@ -74,7 +76,6 @@ | |
T_NetcdfTypes = Literal[ | ||
"NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC" | ||
] | ||
from xarray.core.datatree import DataTree | ||
|
||
DATAARRAY_NAME = "__xarray_dataarray_name__" | ||
DATAARRAY_VARIABLE = "__xarray_dataarray_variable__" | ||
|
@@ -414,6 +415,56 @@ def _dataset_from_backend_dataset( | |
return ds | ||
|
||
|
||
def _datatree_from_backend_datatree( | ||
backend_tree, | ||
filename_or_obj, | ||
engine, | ||
chunks, | ||
cache, | ||
overwrite_encoded_chunks, | ||
inline_array, | ||
chunked_array_type, | ||
from_array_kwargs, | ||
**extra_tokens, | ||
): | ||
if not isinstance(chunks, int | dict) and chunks not in {None, "auto"}: | ||
raise ValueError( | ||
f"chunks must be an int, dict, 'auto', or None. Instead found {chunks}." | ||
) | ||
|
||
# _protect_datatree_variables_inplace(backend_tree, cache) | ||
if chunks is None: | ||
tree = backend_tree | ||
else: | ||
tree = DataTree.from_dict( | ||
{ | ||
path: _chunk_ds( | ||
node.dataset, | ||
filename_or_obj, | ||
engine, | ||
chunks, | ||
overwrite_encoded_chunks, | ||
inline_array, | ||
chunked_array_type, | ||
from_array_kwargs, | ||
**extra_tokens, | ||
) | ||
for path, [node] in group_subtrees(backend_tree) | ||
} | ||
) | ||
keewis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# ds.set_close(backend_ds._close) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the question is, do we even need that here? I copied this from The only reason why I kept the commented-out line is to discuss whether the shift in paradigm (have the backend set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree it would be nice to remove this, I'm just worried that mapping over the each There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does not (I think), so I'm explicitly copying it over. So far that doesn't appear to cause anything to break. |
||
|
||
# Ensure source filename always stored in dataset object | ||
if "source" not in tree.encoding: | ||
path = getattr(filename_or_obj, "path", filename_or_obj) | ||
|
||
if isinstance(path, str | os.PathLike): | ||
tree.encoding["source"] = _normalize_path(path) | ||
keewis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return tree | ||
|
||
|
||
def open_dataset( | ||
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, | ||
*, | ||
|
@@ -838,7 +889,22 @@ def open_dataarray( | |
|
||
def open_datatree( | ||
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, | ||
*, | ||
engine: T_Engine = None, | ||
chunks: T_Chunks = None, | ||
cache: bool | None = None, | ||
decode_cf: bool | None = None, | ||
mask_and_scale: bool | Mapping[str, bool] | None = None, | ||
decode_times: bool | Mapping[str, bool] | None = None, | ||
decode_timedelta: bool | Mapping[str, bool] | None = None, | ||
use_cftime: bool | Mapping[str, bool] | None = None, | ||
concat_characters: bool | Mapping[str, bool] | None = None, | ||
decode_coords: Literal["coordinates", "all"] | bool | None = None, | ||
drop_variables: str | Iterable[str] | None = None, | ||
inline_array: bool = False, | ||
chunked_array_type: str | None = None, | ||
from_array_kwargs: dict[str, Any] | None = None, | ||
backend_kwargs: dict[str, Any] | None = None, | ||
**kwargs, | ||
) -> DataTree: | ||
""" | ||
|
@@ -856,17 +922,75 @@ def open_datatree( | |
------- | ||
xarray.DataTree | ||
""" | ||
if cache is None: | ||
cache = chunks is None | ||
|
||
if backend_kwargs is not None: | ||
kwargs.update(backend_kwargs) | ||
|
||
if engine is None: | ||
engine = plugins.guess_engine(filename_or_obj) | ||
|
||
if from_array_kwargs is None: | ||
from_array_kwargs = {} | ||
|
||
backend = plugins.get_backend(engine) | ||
|
||
return backend.open_datatree(filename_or_obj, **kwargs) | ||
decoders = _resolve_decoders_kwargs( | ||
decode_cf, | ||
open_backend_dataset_parameters=(), | ||
mask_and_scale=mask_and_scale, | ||
decode_times=decode_times, | ||
decode_timedelta=decode_timedelta, | ||
concat_characters=concat_characters, | ||
use_cftime=use_cftime, | ||
decode_coords=decode_coords, | ||
) | ||
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) | ||
|
||
backend_tree = backend.open_datatree( | ||
filename_or_obj, | ||
drop_variables=drop_variables, | ||
**decoders, | ||
**kwargs, | ||
) | ||
|
||
tree = _datatree_from_backend_datatree( | ||
backend_tree, | ||
filename_or_obj, | ||
engine, | ||
chunks, | ||
cache, | ||
overwrite_encoded_chunks, | ||
inline_array, | ||
chunked_array_type, | ||
from_array_kwargs, | ||
drop_variables=drop_variables, | ||
**decoders, | ||
**kwargs, | ||
) | ||
|
||
return tree | ||
|
||
|
||
def open_groups( | ||
filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, | ||
*, | ||
engine: T_Engine = None, | ||
chunks: T_Chunks = None, | ||
cache: bool | None = None, | ||
decode_cf: bool | None = None, | ||
mask_and_scale: bool | Mapping[str, bool] | None = None, | ||
decode_times: bool | Mapping[str, bool] | None = None, | ||
decode_timedelta: bool | Mapping[str, bool] | None = None, | ||
use_cftime: bool | Mapping[str, bool] | None = None, | ||
concat_characters: bool | Mapping[str, bool] | None = None, | ||
decode_coords: Literal["coordinates", "all"] | bool | None = None, | ||
drop_variables: str | Iterable[str] | None = None, | ||
inline_array: bool = False, | ||
chunked_array_type: str | None = None, | ||
from_array_kwargs: dict[str, Any] | None = None, | ||
backend_kwargs: dict[str, Any] | None = None, | ||
**kwargs, | ||
) -> dict[str, Dataset]: | ||
""" | ||
|
@@ -893,12 +1017,58 @@ def open_groups( | |
open_datatree() | ||
DataTree.from_dict() | ||
""" | ||
if cache is None: | ||
cache = chunks is None | ||
|
||
if backend_kwargs is not None: | ||
kwargs.update(backend_kwargs) | ||
|
||
if engine is None: | ||
engine = plugins.guess_engine(filename_or_obj) | ||
|
||
if from_array_kwargs is None: | ||
from_array_kwargs = {} | ||
|
||
backend = plugins.get_backend(engine) | ||
|
||
return backend.open_groups_as_dict(filename_or_obj, **kwargs) | ||
decoders = _resolve_decoders_kwargs( | ||
decode_cf, | ||
open_backend_dataset_parameters=(), | ||
mask_and_scale=mask_and_scale, | ||
decode_times=decode_times, | ||
decode_timedelta=decode_timedelta, | ||
concat_characters=concat_characters, | ||
use_cftime=use_cftime, | ||
decode_coords=decode_coords, | ||
) | ||
overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None) | ||
|
||
backend_groups = backend.open_groups_as_dict( | ||
filename_or_obj, | ||
drop_variables=drop_variables, | ||
**decoders, | ||
**kwargs, | ||
) | ||
|
||
groups = { | ||
name: _dataset_from_backend_dataset( | ||
backend_ds, | ||
filename_or_obj, | ||
engine, | ||
chunks, | ||
cache, | ||
overwrite_encoded_chunks, | ||
inline_array, | ||
chunked_array_type, | ||
from_array_kwargs, | ||
drop_variables=drop_variables, | ||
**decoders, | ||
**kwargs, | ||
) | ||
for name, backend_ds in backend_groups.items() | ||
} | ||
|
||
return groups | ||
|
||
|
||
def open_mfdataset( | ||
|
Uh oh!
There was an error while loading. Please reload this page.