From fe49f5fed28a046b250920ae4a38b5124fb29023 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 14 Sep 2024 15:40:08 -0700 Subject: [PATCH 01/15] fix: zarr v2 compatability fixes - port normalize_chunks from v2 - add array.store property - default to append in create --- src/zarr/api/asynchronous.py | 2 +- src/zarr/core/array.py | 35 ++++++++++++++++----------- src/zarr/core/chunk_grids.py | 47 +++++++++++++++++++++++++++++++++++- src/zarr/core/group.py | 4 ++- src/zarr/store/common.py | 8 +++--- 5 files changed, 75 insertions(+), 21 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 8a1b0c5f36..fe9bc824ec 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -724,7 +724,7 @@ async def create( if meta_array is not None: warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) - mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "w")) + mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "a")) store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 7311b6eec2..18ff09c75d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -8,12 +8,12 @@ import numpy as np import numpy.typing as npt -from zarr.abc.store import set_or_delete +from zarr.abc.store import Store, set_or_delete from zarr.codecs import BytesCodec from zarr.codecs._v2 import V2Compressor, V2Filters from zarr.core.attributes import Attributes from zarr.core.buffer import BufferPrototype, NDArrayLike, NDBuffer, default_buffer_prototype -from zarr.core.chunk_grids import RegularChunkGrid, _guess_chunks +from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks from zarr.core.chunk_key_encodings import ( ChunkKeyEncoding, DefaultChunkKeyEncoding, @@ -129,7 +129,7 @@ async def create( fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only - chunk_shape: ChunkCoords | None = None, + chunk_shape: ChunkCoords | None = None, # TODO: handle bool and iterable of iterable types chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -139,7 +139,7 @@ async def create( codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only - chunks: ShapeLike | None = None, + chunks: ShapeLike | None = None, # TODO: handle bool and iterable of iterable types dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, filters: list[dict[str, JSON]] | None = None, @@ -152,15 +152,14 @@ async def create( shape = parse_shapelike(shape) - if chunk_shape is None: - if chunks is None: - chunk_shape = chunks = _guess_chunks(shape=shape, typesize=np.dtype(dtype).itemsize) - else: - chunks = parse_shapelike(chunks) + if chunks is not None and chunk_shape is not None: + raise ValueError("Only one of chunk_shape or chunks can be provided.") - chunk_shape = chunks - elif chunks is not None: - raise ValueError("Only one of chunk_shape or chunks must be provided.") + dtype = np.dtype(dtype) + if chunks: + _chunks = normalize_chunks(chunks, shape, dtype.itemsize) + if chunk_shape: + _chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize) if zarr_format == 3: if dimension_separator is not None: @@ -183,7 +182,7 @@ async def create( store_path, shape=shape, dtype=dtype, - chunk_shape=chunk_shape, + chunk_shape=_chunks, fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, codecs=codecs, @@ -206,7 +205,7 @@ async def create( store_path, shape=shape, dtype=dtype, - chunks=chunk_shape, + chunks=_chunks, dimension_separator=dimension_separator, fill_value=fill_value, order=order, @@ -393,6 +392,10 @@ async def open( metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())), ) + @property + def store(self) -> Store: + return self.store_path.store + @property def ndim(self) -> int: return len(self.metadata.shape) @@ -697,6 +700,10 @@ def open( async_array = sync(AsyncArray.open(store)) return cls(async_array) + @property + def store(self) -> Store: + return self._async_array.store + @property def ndim(self) -> int: return self._async_array.ndim diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 61723215c6..4dc97da144 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -2,11 +2,12 @@ import itertools import math +import numbers import operator from abc import abstractmethod from dataclasses import dataclass from functools import reduce -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import numpy as np @@ -98,6 +99,50 @@ def _guess_chunks( return tuple(int(x) for x in chunks) +def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tuple[int, ...]: + """Convenience function to normalize the `chunks` argument for an array + with the given `shape`.""" + + # N.B., expect shape already normalized + + # handle auto-chunking + if chunks is None or chunks is True: + return _guess_chunks(shape, typesize) + + # handle no chunking + if chunks is False: + return shape + + # handle 1D convenience form + if isinstance(chunks, numbers.Integral): + chunks = tuple(int(chunks) for _ in shape) + + # handle dask-style chunks (iterable of iterables) + if all(isinstance(c, (tuple | list)) for c in chunks): + # take first chunk size for each dimension + chunks = ( + c[0] for c in chunks + ) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1]) + + # handle bad dimensionality + if len(chunks) > len(shape): + raise ValueError("too many dimensions in chunks") + + # handle underspecified chunks + if len(chunks) < len(shape): + # assume chunks across remaining dimensions + chunks += shape[len(chunks) :] + + # handle None or -1 in chunks + if -1 in chunks or None in chunks: + chunks = tuple( + s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks, strict=False) + ) + + out = tuple(int(c) for c in chunks) + return out + + @dataclass(frozen=True) class ChunkGrid(Metadata): @classmethod diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 40815b96c8..cfa3781b7b 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -173,7 +173,9 @@ async def open( # alternatively, we could warn and favor v3 raise ValueError("Both zarr.json and .zgroup objects exist") if zarr_json_bytes is None and zgroup_bytes is None: - raise FileNotFoundError(store_path) + raise FileNotFoundError( + f"could not find zarr.json or .zgroup objects in {store_path}" + ) # set zarr_format based on which keys were found if zarr_json_bytes is not None: zarr_format = 3 diff --git a/src/zarr/store/common.py b/src/zarr/store/common.py index 8028c9af3d..f7a8ca5cac 100644 --- a/src/zarr/store/common.py +++ b/src/zarr/store/common.py @@ -78,12 +78,12 @@ async def make_store_path( store_like: StoreLike | None, *, mode: AccessModeLiteral | None = None ) -> StorePath: if isinstance(store_like, StorePath): - if mode is not None: - assert AccessMode.from_literal(mode) == store_like.store.mode + if (mode is not None) and (AccessMode.from_literal(mode) != store_like.store.mode): + raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.store.mode})") return store_like elif isinstance(store_like, Store): - if mode is not None: - assert AccessMode.from_literal(mode) == store_like.mode + if (mode is not None) and (AccessMode.from_literal(mode) != store_like.mode): + raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode})") await store_like._ensure_open() return StorePath(store_like) elif store_like is None: From 9a1580b7f97667e0d024e5a5898fb38e61977e38 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 16 Sep 2024 16:47:50 -0700 Subject: [PATCH 02/15] move zarr.store to zarr.storage also fix failing ci --- src/zarr/api/asynchronous.py | 40 +++++++++++++++++----- src/zarr/api/synchronous.py | 2 +- src/zarr/core/array.py | 6 ++-- src/zarr/core/group.py | 4 +-- src/zarr/storage/__init__.py | 21 ++++++++++++ src/zarr/{store => storage}/_utils.py | 0 src/zarr/{store => storage}/common.py | 10 +++--- src/zarr/{store => storage}/local.py | 0 src/zarr/{store => storage}/memory.py | 2 +- src/zarr/{store => storage}/remote.py | 2 +- src/zarr/{store => storage}/zip.py | 0 src/zarr/store/__init__.py | 15 -------- src/zarr/testing/buffer.py | 2 +- src/zarr/testing/store.py | 2 +- src/zarr/testing/strategies.py | 2 +- tests/v3/conftest.py | 4 +-- tests/v3/test_api.py | 7 ++-- tests/v3/test_array.py | 4 +-- tests/v3/test_buffer.py | 4 +-- tests/v3/test_codecs/test_blosc.py | 2 +- tests/v3/test_codecs/test_codecs.py | 20 +++++------ tests/v3/test_codecs/test_endian.py | 2 +- tests/v3/test_codecs/test_gzip.py | 2 +- tests/v3/test_codecs/test_sharding.py | 2 +- tests/v3/test_codecs/test_transpose.py | 2 +- tests/v3/test_codecs/test_zstd.py | 2 +- tests/v3/test_group.py | 4 +-- tests/v3/test_indexing.py | 4 +-- tests/v3/test_store/test_core.py | 6 ++-- tests/v3/test_store/test_local.py | 2 +- tests/v3/test_store/test_memory.py | 2 +- tests/v3/test_store/test_remote.py | 2 +- tests/v3/test_store/test_stateful_store.py | 2 +- tests/v3/test_store/test_zip.py | 2 +- tests/v3/test_v2.py | 2 +- 35 files changed, 109 insertions(+), 76 deletions(-) create mode 100644 src/zarr/storage/__init__.py rename src/zarr/{store => storage}/_utils.py (100%) rename src/zarr/{store => storage}/common.py (97%) rename src/zarr/{store => storage}/local.py (100%) rename src/zarr/{store => storage}/memory.py (99%) rename src/zarr/{store => storage}/remote.py (99%) rename src/zarr/{store => storage}/zip.py (100%) delete mode 100644 src/zarr/store/__init__.py diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index fe9bc824ec..3bbf908bea 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -7,14 +7,16 @@ import numpy as np import numpy.typing as npt +from zarr.abc.store import Store from zarr.core.array import Array, AsyncArray from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat from zarr.core.config import config from zarr.core.group import AsyncGroup from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata -from zarr.store import ( +from zarr.storage import ( StoreLike, + StorePath, make_store_path, ) @@ -221,15 +223,16 @@ async def open( Return type depends on what exists in the given store. """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path try: - return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + return await open_array(store=store_path, zarr_format=zarr_format, **kwargs) except KeyError: - return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) + return await open_group(store=store_path, zarr_format=zarr_format, **kwargs) async def open_consolidated(*args: Any, **kwargs: Any) -> AsyncGroup: @@ -299,7 +302,14 @@ async def save_array( or _default_zarr_version() ) - store_path = await make_store_path(store, mode="w") + mode = kwargs.pop("mode", None) + if isinstance(store, Store): + if mode is not None: + raise ValueError("mode cannot be specified if store is a Store") + elif mode is None: + mode = cast(AccessModeLiteral, "a") + + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path new = await AsyncArray.create( @@ -453,7 +463,9 @@ async def group( zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - store_path = await make_store_path(store) + mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a") + + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path @@ -551,6 +563,9 @@ async def open_group( if storage_options is not None: warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2) + if mode is not None and isinstance(store, Store | StorePath): + raise ValueError("store and mode cannot be specified at the same time") + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path @@ -724,7 +739,13 @@ async def create( if meta_array is not None: warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2) - mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "a")) + mode = kwargs.pop("mode", None) + if isinstance(store, Store | StorePath): + if mode is not None: + raise ValueError("mode cannot be set when store is already initialized") + elif mode is None: + mode = cast(AccessModeLiteral, "r" if read_only else "a") + store_path = await make_store_path(store, mode=mode) if path is not None: store_path = store_path / path @@ -896,8 +917,11 @@ async def open_array( The opened array. """ - store_path = await make_store_path(store) - if path is not None: + mode = kwargs.pop("mode", None) + store_path = await make_store_path(store, mode=mode) + if ( + path is not None + ): # FIXME: apply path before opening store in w or risk deleting existing data store_path = store_path / path zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 93a33b8d3f..8c05f873c6 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from zarr.core.buffer import NDArrayLike from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, ZarrFormat - from zarr.store import StoreLike + from zarr.storage import StoreLike __all__ = [ "consolidate_metadata", diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 18ff09c75d..eea3eaace0 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -59,8 +59,8 @@ from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import sync from zarr.registry import get_pipeline_class -from zarr.store import StoreLike, StorePath, make_store_path -from zarr.store.common import ( +from zarr.storage import StoreLike, StorePath, make_store_path +from zarr.storage.common import ( ensure_no_existing_node, ) @@ -158,7 +158,7 @@ async def create( dtype = np.dtype(dtype) if chunks: _chunks = normalize_chunks(chunks, shape, dtype.itemsize) - if chunk_shape: + else: _chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize) if zarr_format == 3: diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index cfa3781b7b..579c66c0b2 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -28,8 +28,8 @@ ) from zarr.core.config import config from zarr.core.sync import SyncMixin, sync -from zarr.store import StoreLike, StorePath, make_store_path -from zarr.store.common import ensure_no_existing_node +from zarr.storage import StoreLike, StorePath, make_store_path +from zarr.storage.common import ensure_no_existing_node if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable, Iterator diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py new file mode 100644 index 0000000000..216dcbac95 --- /dev/null +++ b/src/zarr/storage/__init__.py @@ -0,0 +1,21 @@ +from zarr.storage.common import StoreLike, StorePath, make_store_path +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore +from zarr.storage.remote import RemoteStore +from zarr.storage.zip import ZipStore + +# alias for backwards compatibility +FSStore = RemoteStore +DirectoryStore = LocalStore + +__all__ = [ + "DirectoryStore", + "FSStore", + "StorePath", + "StoreLike", + "make_store_path", + "RemoteStore", + "LocalStore", + "MemoryStore", + "ZipStore", +] diff --git a/src/zarr/store/_utils.py b/src/zarr/storage/_utils.py similarity index 100% rename from src/zarr/store/_utils.py rename to src/zarr/storage/_utils.py diff --git a/src/zarr/store/common.py b/src/zarr/storage/common.py similarity index 97% rename from src/zarr/store/common.py rename to src/zarr/storage/common.py index f7a8ca5cac..a27e794a2d 100644 --- a/src/zarr/store/common.py +++ b/src/zarr/storage/common.py @@ -8,8 +8,8 @@ from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError -from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore if TYPE_CHECKING: from zarr.core.buffer import BufferPrototype @@ -79,11 +79,13 @@ async def make_store_path( ) -> StorePath: if isinstance(store_like, StorePath): if (mode is not None) and (AccessMode.from_literal(mode) != store_like.store.mode): - raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.store.mode})") + raise ValueError( + f"mode mismatch (mode={mode} != store.mode={store_like.store.mode.str})" + ) return store_like elif isinstance(store_like, Store): if (mode is not None) and (AccessMode.from_literal(mode) != store_like.mode): - raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode})") + raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode.str})") await store_like._ensure_open() return StorePath(store_like) elif store_like is None: diff --git a/src/zarr/store/local.py b/src/zarr/storage/local.py similarity index 100% rename from src/zarr/store/local.py rename to src/zarr/storage/local.py diff --git a/src/zarr/store/memory.py b/src/zarr/storage/memory.py similarity index 99% rename from src/zarr/store/memory.py rename to src/zarr/storage/memory.py index 13e289f374..bf56e2d95f 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/storage/memory.py @@ -6,7 +6,7 @@ from zarr.abc.store import Store from zarr.core.buffer import Buffer, gpu from zarr.core.common import concurrent_map -from zarr.store._utils import _normalize_interval_index +from zarr.storage._utils import _normalize_interval_index if TYPE_CHECKING: from collections.abc import AsyncGenerator, MutableMapping diff --git a/src/zarr/store/remote.py b/src/zarr/storage/remote.py similarity index 99% rename from src/zarr/store/remote.py rename to src/zarr/storage/remote.py index e3e2ba3447..e532f3ac0c 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/storage/remote.py @@ -5,7 +5,7 @@ import fsspec from zarr.abc.store import Store -from zarr.store.common import _dereference_path +from zarr.storage.common import _dereference_path if TYPE_CHECKING: from collections.abc import AsyncGenerator diff --git a/src/zarr/store/zip.py b/src/zarr/storage/zip.py similarity index 100% rename from src/zarr/store/zip.py rename to src/zarr/storage/zip.py diff --git a/src/zarr/store/__init__.py b/src/zarr/store/__init__.py deleted file mode 100644 index 47bbccd66e..0000000000 --- a/src/zarr/store/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from zarr.store.common import StoreLike, StorePath, make_store_path -from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore -from zarr.store.remote import RemoteStore -from zarr.store.zip import ZipStore - -__all__ = [ - "StorePath", - "StoreLike", - "make_store_path", - "RemoteStore", - "LocalStore", - "MemoryStore", - "ZipStore", -] diff --git a/src/zarr/testing/buffer.py b/src/zarr/testing/buffer.py index 9d640d2c64..e4affa59ac 100644 --- a/src/zarr/testing/buffer.py +++ b/src/zarr/testing/buffer.py @@ -7,7 +7,7 @@ import numpy.typing as npt from zarr.core.buffer import Buffer, BufferPrototype, cpu -from zarr.store import MemoryStore +from zarr.storage import MemoryStore if TYPE_CHECKING: from collections.abc import Iterable diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index a08b6960db..7ff73135d6 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -6,7 +6,7 @@ import zarr.api.asynchronous from zarr.abc.store import AccessMode, Store from zarr.core.buffer import Buffer, default_buffer_prototype -from zarr.store._utils import _normalize_interval_index +from zarr.storage._utils import _normalize_interval_index from zarr.testing.utils import assert_bytes_equal __all__ = ["StoreTests"] diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 83de3d92ce..4f02ba9e57 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -9,7 +9,7 @@ from zarr.core.array import Array from zarr.core.group import Group -from zarr.store import MemoryStore, StoreLike +from zarr.storage import MemoryStore, StoreLike # Copied from Xarray _attr_keys = st.text(st.characters(), min_size=1) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 41cd359346..db462a021b 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -10,8 +10,8 @@ from hypothesis import HealthCheck, Verbosity, settings from zarr import AsyncGroup, config -from zarr.store import LocalStore, MemoryStore, StorePath, ZipStore -from zarr.store.remote import RemoteStore +from zarr.storage import LocalStore, MemoryStore, StorePath, ZipStore +from zarr.storage.remote import RemoteStore if TYPE_CHECKING: from collections.abc import Generator, Iterator diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index ddfab587cc..a75077e1c7 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -9,7 +9,7 @@ from zarr.abc.store import Store from zarr.api.synchronous import create, load, open, open_group, save, save_array, save_group from zarr.core.common import ZarrFormat -from zarr.store.memory import MemoryStore +from zarr.storage.memory import MemoryStore def test_create_array(memory_store: Store) -> None: @@ -42,8 +42,9 @@ async def test_open_array(memory_store: MemoryStore) -> None: assert z.shape == (100,) # open array, overwrite - store._store_dict = {} - z = open(store=store, shape=200, mode="w") # mode="w" + # store._store_dict = {} + store = MemoryStore(mode="w") + z = open(store=store, shape=200) assert isinstance(z, Array) assert z.shape == (200,) diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 11be51682c..cf938e3375 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -7,8 +7,8 @@ from zarr import Array, AsyncArray, Group from zarr.core.common import ZarrFormat from zarr.errors import ContainsArrayError, ContainsGroupError -from zarr.store import LocalStore, MemoryStore -from zarr.store.common import StorePath +from zarr.storage import LocalStore, MemoryStore +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ("local", "memory", "zip"), indirect=["store"]) diff --git a/tests/v3/test_buffer.py b/tests/v3/test_buffer.py index 5a313dc1ab..883a17a87c 100644 --- a/tests/v3/test_buffer.py +++ b/tests/v3/test_buffer.py @@ -13,8 +13,8 @@ from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec from zarr.core.buffer import ArrayLike, BufferPrototype, NDArrayLike, cpu, gpu -from zarr.store.common import StorePath -from zarr.store.memory import MemoryStore +from zarr.storage.common import StorePath +from zarr.storage.memory import MemoryStore from zarr.testing.buffer import ( NDBufferUsingTestNDArrayLike, StoreExpectingTestBuffer, diff --git a/tests/v3/test_codecs/test_blosc.py b/tests/v3/test_codecs/test_blosc.py index 4c569055b7..386101035b 100644 --- a/tests/v3/test_codecs/test_blosc.py +++ b/tests/v3/test_codecs/test_blosc.py @@ -7,7 +7,7 @@ from zarr.abc.store import Store from zarr.codecs import BloscCodec, BytesCodec, ShardingCodec from zarr.core.buffer import default_buffer_prototype -from zarr.store.common import StorePath +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py index 57103d17c2..7a9cd311e5 100644 --- a/tests/v3/test_codecs/test_codecs.py +++ b/tests/v3/test_codecs/test_codecs.py @@ -17,7 +17,7 @@ ) from zarr.core.buffer import default_buffer_prototype from zarr.core.indexing import Selection, morton_order_iter -from zarr.store import StorePath +from zarr.storage import StorePath from zarr.testing.utils import assert_bytes_equal if TYPE_CHECKING: @@ -375,15 +375,15 @@ async def test_dimension_names(store: Store) -> None: @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) def test_invalid_metadata(store: Store) -> None: - spath = StorePath(store, "invalid_metadata") - with pytest.raises(ValueError): - Array.create( - spath, - shape=(16, 16, 16), - chunk_shape=(16, 16), - dtype=np.dtype("uint8"), - fill_value=0, - ) + # spath = StorePath(store, "invalid_metadata") + # with pytest.raises(ValueError): + # Array.create( + # spath, + # shape=(16, 16, 16), + # chunk_shape=(16, 16), # this is now allowed (like in v2) + # dtype=np.dtype("uint8"), + # fill_value=0, + # ) spath2 = StorePath(store, "invalid_endian") with pytest.raises(TypeError): Array.create( diff --git a/tests/v3/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py index 3c36c90b81..cc7abcb272 100644 --- a/tests/v3/test_codecs/test_endian.py +++ b/tests/v3/test_codecs/test_endian.py @@ -8,7 +8,7 @@ from zarr.abc.store import Store from zarr.codecs import BytesCodec from zarr.core.buffer import default_buffer_prototype -from zarr.store.common import StorePath +from zarr.storage.common import StorePath from zarr.testing.utils import assert_bytes_equal from .test_codecs import _AsyncArrayProxy diff --git a/tests/v3/test_codecs/test_gzip.py b/tests/v3/test_codecs/test_gzip.py index 6495f8236c..91a7852d3c 100644 --- a/tests/v3/test_codecs/test_gzip.py +++ b/tests/v3/test_codecs/test_gzip.py @@ -4,7 +4,7 @@ from zarr import Array from zarr.abc.store import Store from zarr.codecs import BytesCodec, GzipCodec -from zarr.store.common import StorePath +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) diff --git a/tests/v3/test_codecs/test_sharding.py b/tests/v3/test_codecs/test_sharding.py index bd8aab5e03..20bf0b60fe 100644 --- a/tests/v3/test_codecs/test_sharding.py +++ b/tests/v3/test_codecs/test_sharding.py @@ -15,7 +15,7 @@ TransposeCodec, ) from zarr.core.buffer import default_buffer_prototype -from zarr.store.common import StorePath +from zarr.storage.common import StorePath from ..conftest import ArrayRequest from .test_codecs import _AsyncArrayProxy, order_from_dim diff --git a/tests/v3/test_codecs/test_transpose.py b/tests/v3/test_codecs/test_transpose.py index c42c56034a..315746321f 100644 --- a/tests/v3/test_codecs/test_transpose.py +++ b/tests/v3/test_codecs/test_transpose.py @@ -9,7 +9,7 @@ from zarr.codecs import BytesCodec, ShardingCodec, TransposeCodec from zarr.core.buffer import default_buffer_prototype from zarr.core.common import MemoryOrder -from zarr.store.common import StorePath +from zarr.storage.common import StorePath from .test_codecs import _AsyncArrayProxy diff --git a/tests/v3/test_codecs/test_zstd.py b/tests/v3/test_codecs/test_zstd.py index 0726e5944c..e95e2355d7 100644 --- a/tests/v3/test_codecs/test_zstd.py +++ b/tests/v3/test_codecs/test_zstd.py @@ -4,7 +4,7 @@ from zarr import Array from zarr.abc.store import Store from zarr.codecs import BytesCodec, ZstdCodec -from zarr.store.common import StorePath +from zarr.storage.common import StorePath @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 94b839a186..1dda4f1ce3 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -13,8 +13,8 @@ from zarr.core.group import GroupMetadata from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError -from zarr.store import LocalStore, StorePath -from zarr.store.common import make_store_path +from zarr.storage import LocalStore, StorePath +from zarr.storage.common import make_store_path from .conftest import parse_store diff --git a/tests/v3/test_indexing.py b/tests/v3/test_indexing.py index efb11f36a1..b1bf229146 100644 --- a/tests/v3/test_indexing.py +++ b/tests/v3/test_indexing.py @@ -19,8 +19,8 @@ replace_ellipsis, ) from zarr.registry import get_ndbuffer_class -from zarr.store.common import StorePath -from zarr.store.memory import MemoryStore +from zarr.storage.common import StorePath +from zarr.storage.memory import MemoryStore if TYPE_CHECKING: from collections.abc import Iterator diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index c65d91f9d0..c62ba19bb2 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -2,9 +2,9 @@ import pytest -from zarr.store.common import make_store_path -from zarr.store.local import LocalStore -from zarr.store.memory import MemoryStore +from zarr.storage.common import make_store_path +from zarr.storage.local import LocalStore +from zarr.storage.memory import MemoryStore async def test_make_store_path(tmpdir: str) -> None: diff --git a/tests/v3/test_store/test_local.py b/tests/v3/test_store/test_local.py index 59cae22de3..f34e84863f 100644 --- a/tests/v3/test_store/test_local.py +++ b/tests/v3/test_store/test_local.py @@ -3,7 +3,7 @@ import pytest from zarr.core.buffer import Buffer, cpu -from zarr.store.local import LocalStore +from zarr.storage.local import LocalStore from zarr.testing.store import StoreTests diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py index 04d17eb240..6eebc4df39 100644 --- a/tests/v3/test_store/test_memory.py +++ b/tests/v3/test_store/test_memory.py @@ -5,7 +5,7 @@ import pytest from zarr.core.buffer import Buffer, cpu -from zarr.store.memory import MemoryStore +from zarr.storage.memory import MemoryStore from zarr.testing.store import StoreTests diff --git a/tests/v3/test_store/test_remote.py b/tests/v3/test_store/test_remote.py index afa991209f..13f993c9c5 100644 --- a/tests/v3/test_store/test_remote.py +++ b/tests/v3/test_store/test_remote.py @@ -8,7 +8,7 @@ from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.core.sync import sync -from zarr.store import RemoteStore +from zarr.storage import RemoteStore from zarr.testing.store import StoreTests s3fs = pytest.importorskip("s3fs") diff --git a/tests/v3/test_store/test_stateful_store.py b/tests/v3/test_store/test_stateful_store.py index 1ecbd87cc1..be36961b0c 100644 --- a/tests/v3/test_store/test_stateful_store.py +++ b/tests/v3/test_store/test_stateful_store.py @@ -14,7 +14,7 @@ import zarr from zarr.abc.store import AccessMode, Store from zarr.core.buffer import BufferPrototype, cpu, default_buffer_prototype -from zarr.store import MemoryStore +from zarr.storage import MemoryStore from zarr.testing.strategies import key_ranges, paths diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index 7c332e9a2e..31df03eaba 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -10,7 +10,7 @@ import zarr from zarr.abc.store import AccessMode from zarr.core.buffer import Buffer, cpu, default_buffer_prototype -from zarr.store.zip import ZipStore +from zarr.storage.zip import ZipStore from zarr.testing.store import StoreTests if TYPE_CHECKING: diff --git a/tests/v3/test_v2.py b/tests/v3/test_v2.py index 9ddde68e23..7909acab30 100644 --- a/tests/v3/test_v2.py +++ b/tests/v3/test_v2.py @@ -4,7 +4,7 @@ import pytest from zarr import Array -from zarr.store import MemoryStore, StorePath +from zarr.storage import MemoryStore, StorePath @pytest.fixture From 0d89912bec3e796e19f719baf3552c95aa01572f Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 16 Sep 2024 19:36:38 -0700 Subject: [PATCH 03/15] make chunks a tuple --- src/zarr/core/chunk_grids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 4dc97da144..91827daaf9 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -120,7 +120,7 @@ def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tupl # handle dask-style chunks (iterable of iterables) if all(isinstance(c, (tuple | list)) for c in chunks): # take first chunk size for each dimension - chunks = ( + chunks = tuple( c[0] for c in chunks ) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1]) From e534279a06c98fdd8f36342bf9ff5a7929291e52 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 17 Sep 2024 08:46:42 -0700 Subject: [PATCH 04/15] Apply suggestions from code review --- src/zarr/api/asynchronous.py | 6 +++--- tests/v3/test_codecs/test_codecs.py | 9 --------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 3bbf908bea..37ab9eb69f 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -303,9 +303,9 @@ async def save_array( ) mode = kwargs.pop("mode", None) - if isinstance(store, Store): + if isinstance(store, Store | StorePath): if mode is not None: - raise ValueError("mode cannot be specified if store is a Store") + raise ValueError("mode cannot be set when store is already initialized") elif mode is None: mode = cast(AccessModeLiteral, "a") @@ -564,7 +564,7 @@ async def open_group( warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2) if mode is not None and isinstance(store, Store | StorePath): - raise ValueError("store and mode cannot be specified at the same time") + raise ValueError("mode cannot be set when store is already initialized") store_path = await make_store_path(store, mode=mode) if path is not None: diff --git a/tests/v3/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py index 7a9cd311e5..bef13c56f1 100644 --- a/tests/v3/test_codecs/test_codecs.py +++ b/tests/v3/test_codecs/test_codecs.py @@ -375,15 +375,6 @@ async def test_dimension_names(store: Store) -> None: @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) def test_invalid_metadata(store: Store) -> None: - # spath = StorePath(store, "invalid_metadata") - # with pytest.raises(ValueError): - # Array.create( - # spath, - # shape=(16, 16, 16), - # chunk_shape=(16, 16), # this is now allowed (like in v2) - # dtype=np.dtype("uint8"), - # fill_value=0, - # ) spath2 = StorePath(store, "invalid_endian") with pytest.raises(TypeError): Array.create( From 93b61fca2f7075b47d7bfc70976e24caccdd113e Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 23 Sep 2024 12:09:46 -0700 Subject: [PATCH 05/15] more merge conflict resolution --- src/zarr/storage/common.py | 8 +++++++- tests/v3/test_attributes.py | 4 ++-- tests/v3/test_store/test_core.py | 2 +- tests/v3/test_sync.py | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index 83f3956c06..58efa3f46f 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -82,7 +82,7 @@ async def make_store_path( mode: AccessModeLiteral | None = None, storage_options: dict[str, Any] | None = None, ) -> StorePath: - from zarr.store.remote import RemoteStore # circular import + from zarr.storage.remote import RemoteStore # circular import used_storage_options = False @@ -91,10 +91,14 @@ async def make_store_path( raise ValueError( f"mode mismatch (mode={mode} != store.mode={store_like.store.mode.str})" ) + if storage_options: + raise TypeError("storage_options passed but store has already been initialized") return store_like elif isinstance(store_like, Store): if (mode is not None) and (AccessMode.from_literal(mode) != store_like.mode): raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode.str})") + if storage_options: + raise TypeError("storage_options passed but store has already been initialized") await store_like._ensure_open() result = StorePath(store_like) elif store_like is None: @@ -116,6 +120,8 @@ async def make_store_path( elif isinstance(store_like, dict): # We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings. # By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate. + if mode is None: + mode = "r" result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode)) else: msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable] diff --git a/tests/v3/test_attributes.py b/tests/v3/test_attributes.py index 65b6a02e8d..c4d37ebec5 100644 --- a/tests/v3/test_attributes.py +++ b/tests/v3/test_attributes.py @@ -1,10 +1,10 @@ import zarr.core import zarr.core.attributes -import zarr.store +import zarr.storage def test_put() -> None: - store = zarr.store.MemoryStore({}, mode="w") + store = zarr.storage.MemoryStore({}, mode="w") attrs = zarr.core.attributes.Attributes( zarr.Group.from_store(store, attributes={"a": 1, "b": 2}) ) diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index e0fb12c680..b2a8292ea9 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -59,7 +59,7 @@ async def test_make_store_path_fsspec(monkeypatch) -> None: ) async def test_make_store_path_storage_options_raises(store_like: StoreLike) -> None: with pytest.raises(TypeError, match="storage_options"): - await make_store_path(store_like, storage_options={"foo": "bar"}, mode="w") + await make_store_path(store_like, storage_options={"foo": "bar"}) async def test_unsupported() -> None: diff --git a/tests/v3/test_sync.py b/tests/v3/test_sync.py index 864c9e01cb..081e65f3d2 100644 --- a/tests/v3/test_sync.py +++ b/tests/v3/test_sync.py @@ -6,7 +6,7 @@ import zarr from zarr.core.sync import SyncError, SyncMixin, _get_lock, _get_loop, sync -from zarr.store.memory import MemoryStore +from zarr.storage.memory import MemoryStore @pytest.fixture(params=[True, False]) From fb6752d586e14960c0dd0545c1553e0b0e51c4f5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 29 Sep 2024 14:51:43 -0700 Subject: [PATCH 06/15] fixups --- src/zarr/core/array.py | 14 ++++++++++++-- src/zarr/core/group.py | 11 +++++++++-- src/zarr/storage/logging.py | 4 ++-- src/zarr/storage/memory.py | 4 ++-- tests/v3/test_array.py | 5 ++--- tests/v3/test_group.py | 11 ++++------- 6 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 0f529ddc87..539b791446 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2385,8 +2385,18 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - required_parts = node.store_path.path.split("/")[:-1] - parents = [] + print("path", node.store_path.path) + + if "/" in node.store_path.path: + required_parts = node.store_path.path.split("/")[:-1] + else: + required_parts = [] + parents = [ + AsyncGroup( + metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), + store_path=StorePath(store=node.store_path.store, path=""), + ) + ] for i, part in enumerate(required_parts): path = "/".join(required_parts[:i] + [part]) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 63dfc75adc..96e72fd25e 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -700,6 +700,10 @@ async def _members( "Object at %s is not recognized as a component of a Zarr hierarchy.", key ) + async def keys(self) -> AsyncGenerator[str, None]: + async for key, _ in self.members(): + yield key + async def contains(self, member: str) -> bool: # TODO: this can be made more efficient. try: @@ -823,10 +827,10 @@ def __delitem__(self, key: str) -> None: self._sync(self._async_group.delitem(key)) def __iter__(self) -> Iterator[str]: - raise NotImplementedError + yield from self.keys() def __len__(self) -> int: - raise NotImplementedError + return self.nmembers() def __setitem__(self, key: str, value: Any) -> None: """__setitem__ is not supported in v3""" @@ -906,6 +910,9 @@ def members(self, max_depth: int | None = 0) -> tuple[tuple[str, Array | Group], return tuple((kv[0], _parse_async_node(kv[1])) for kv in _members) + def keys(self) -> Generator[str, None]: + yield from self._sync_iter(self._async_group.keys()) + def __contains__(self, member: str) -> bool: return self._sync(self._async_group.contains(member)) diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index a9113aabe4..52c7c7b84d 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -150,9 +150,9 @@ async def set(self, key: str, value: Buffer) -> None: with self.log(): return await self._store.set(key=key, value=value) - async def set_if_not_exists(self, key: str, default: Buffer) -> None: + async def set_if_not_exists(self, key: str, value: Buffer) -> None: with self.log(): - return await self._store.set_if_not_exists(key=key, value=default) + return await self._store.set_if_not_exists(key=key, value=value) async def delete(self, key: str) -> None: with self.log(): diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 8dcdd14bce..24ea7e0040 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -101,10 +101,10 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None else: self._store_dict[key] = value - async def set_if_not_exists(self, key: str, default: Buffer) -> None: + async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._check_writable() await self._ensure_open() - self._store_dict.setdefault(key, default) + self._store_dict.setdefault(key, value) async def delete(self, key: str) -> None: self._check_writable() diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 7ffb62491b..5778f7e8fa 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -92,10 +92,9 @@ async def test_create_creates_parents( expected = [f"{part}/{file}" for file in files for part in parts] if zarr_format == 2: - expected.append("a/b/c/d/.zarray") - expected.append("a/b/c/d/.zattrs") + expected.extend([".zattrs", ".zgroup", "a/b/c/d/.zarray", "a/b/c/d/.zattrs"]) else: - expected.append("a/b/c/d/zarr.json") + expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) expected = sorted(expected) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 7ddc7c54b8..63c5e036fe 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -74,10 +74,9 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> expected = [f"{part}/{file}" for file in files for part in parts] if zarr_format == 2: - expected.append("a/b/c/d/.zgroup") - expected.append("a/b/c/d/.zattrs") + expected.extend([".zgroup", ".zattrs", "a/b/c/d/.zgroup", "a/b/c/d/.zattrs"]) else: - expected.append("a/b/c/d/zarr.json") + expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) expected = sorted(expected) @@ -311,8 +310,7 @@ def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: """ group = Group.from_store(store, zarr_format=zarr_format) - with pytest.raises(NotImplementedError): - list(group) + assert list(group) == [] def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: @@ -321,8 +319,7 @@ def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: """ group = Group.from_store(store, zarr_format=zarr_format) - with pytest.raises(NotImplementedError): - len(group) + assert len(group) == 0 def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: From 0b1dedc8c0f6955e99af601f669a45af120c186f Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 29 Sep 2024 15:04:33 -0700 Subject: [PATCH 07/15] fixup zipstore --- src/zarr/storage/zip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 116d6de83a..14d0a4362c 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -191,12 +191,12 @@ async def set(self, key: str, value: Buffer) -> None: async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: raise NotImplementedError - async def set_if_not_exists(self, key: str, default: Buffer) -> None: + async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._check_writable() with self._lock: members = self._zf.namelist() if key not in members: - self._set(key, default) + self._set(key, value) async def delete(self, key: str) -> None: raise NotImplementedError From 322918a20d559f59027334138e48a90c85ffbaa4 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 29 Sep 2024 15:24:58 -0700 Subject: [PATCH 08/15] Apply suggestions from code review --- src/zarr/api/asynchronous.py | 12 +++++------- src/zarr/core/array.py | 1 - 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index ccf85dca83..456ed174a9 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -228,7 +228,7 @@ async def open( """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path @@ -323,7 +323,7 @@ async def save_array( ) mode = kwargs.pop("mode", None) - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path new = await AsyncArray.create( @@ -502,7 +502,7 @@ async def group( mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a") - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path @@ -780,7 +780,7 @@ async def create( if not isinstance(store, Store | StorePath): mode = "a" - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path @@ -957,9 +957,7 @@ async def open_array( mode = kwargs.pop("mode", None) store_path = await make_store_path(store, mode=mode) - if ( - path is not None - ): # FIXME: apply path before opening store in w or risk deleting existing data + if path is not None: store_path = store_path / path zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 539b791446..9d055f4981 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2385,7 +2385,6 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - print("path", node.store_path.path) if "/" in node.store_path.path: required_parts = node.store_path.path.split("/")[:-1] From a95d54ad7e3c16adcb2bb2ba2db8acd471477dc4 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 30 Sep 2024 13:12:08 -0700 Subject: [PATCH 09/15] Apply suggestions from code review --- src/zarr/api/asynchronous.py | 2 +- src/zarr/core/array.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 456ed174a9..62a973257c 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -598,7 +598,7 @@ async def open_group( if chunk_store is not None: warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) - store_path = await make_store_path(store, mode=mode) + store_path = await make_store_path(store, mode=mode, storage_options=storage_options) if path is not None: store_path = store_path / path diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 9d055f4981..8167f3266d 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -197,7 +197,7 @@ async def create( fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only - chunk_shape: ChunkCoords | None = None, # TODO: handle bool and iterable of iterable types + chunk_shape: ChunkCoords | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -207,7 +207,7 @@ async def create( codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only - chunks: ShapeLike | None = None, # TODO: handle bool and iterable of iterable types + chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: Literal["C", "F"] | None = None, filters: list[dict[str, JSON]] | None = None, From 128eb5332b8665d38d109449f4589f8726debb7b Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 13:28:48 -0700 Subject: [PATCH 10/15] add test --- src/zarr/core/array.py | 1 - tests/v3/test_chunk_grids.py | 38 +++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 8167f3266d..c15b4201be 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2385,7 +2385,6 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - if "/" in node.store_path.path: required_parts = node.store_path.path.split("/")[:-1] else: diff --git a/tests/v3/test_chunk_grids.py b/tests/v3/test_chunk_grids.py index 12166bd210..4c69c483ae 100644 --- a/tests/v3/test_chunk_grids.py +++ b/tests/v3/test_chunk_grids.py @@ -1,7 +1,9 @@ +from typing import Any + import numpy as np import pytest -from zarr.core.chunk_grids import _guess_chunks +from zarr.core.chunk_grids import _guess_chunks, normalize_chunks @pytest.mark.parametrize( @@ -16,3 +18,37 @@ def test_guess_chunks(shape: tuple[int, ...], itemsize: int) -> None: assert chunk_size < (64 * 1024 * 1024) # doesn't make any sense to allow chunks to have zero length dimension assert all(0 < c <= max(s, 1) for c, s in zip(chunks, shape, strict=False)) + + +@pytest.mark.parametrize( + ("chunks", "shape", "typesize", "expected"), + [ + ((10,), (100,), 1, (10,)), + ([10], (100,), 1, (10,)), + (10, (100,), 1, (10,)), + ((10, 10), (100, 10), 1, (10, 10)), + (10, (100, 10), 1, (10, 10)), + ((10, None), (100, 10), 1, (10, 10)), + (30, (100, 20, 10), 1, (30, 30, 30)), + ((30,), (100, 20, 10), 1, (30, 20, 10)), + ((30, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, None, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, 20, None), (100, 20, 10), 1, (30, 20, 10)), + ((30, 20, 10), (100, 20, 10), 1, (30, 20, 10)), + # auto chunking + (None, (100,), 1, (100,)), + (-1, (100,), 1, (100,)), + ((30, -1, None), (100, 20, 10), 1, (30, 20, 10)), + ], +) +def test_normalize_chunks( + chunks: Any, shape: tuple[int, ...], typesize: int, expected: tuple[int, ...] +) -> None: + assert expected == normalize_chunks(chunks, shape, typesize) + + +def test_normalize_chunks_errors() -> None: + with pytest.raises(ValueError): + normalize_chunks("foo", (100,), 1) + with pytest.raises(ValueError): + normalize_chunks((100, 10), (100,), 1) From 54ab9efa63667615afe5d24a7a119305f2ad054e Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 14:56:50 -0700 Subject: [PATCH 11/15] extend test --- tests/v3/test_group.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 63c5e036fe..51605dd7ef 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -62,6 +62,14 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> await zarr.api.asynchronous.open_group( store=store, path="a", zarr_format=zarr_format, attributes={"key": "value"} ) + + # test that root group node was created + root = await zarr.api.asynchronous.open_group( + store=store, + ) + agroup = await root.getitem("a") + assert agroup.attrs == {"key": "value"} + # create a child node with a couple intermediates await zarr.api.asynchronous.open_group(store=store, path="a/b/c/d", zarr_format=zarr_format) parts = ["a", "a/b", "a/b/c"] From 77f293882eca6b4193a5d7645fcc0058d12934a0 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 15:44:49 -0700 Subject: [PATCH 12/15] clean up parents --- src/zarr/core/array.py | 11 +++++++---- src/zarr/core/group.py | 3 +++ tests/v3/test_group.py | 4 +--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index c15b4201be..f8c14fe389 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -3,6 +3,7 @@ import json from asyncio import gather from dataclasses import dataclass, field, replace +from logging import getLogger from typing import TYPE_CHECKING, Any, Literal, cast import numpy as np @@ -80,6 +81,8 @@ # Array and AsyncArray are defined in the base ``zarr`` namespace __all__ = ["create_codec_pipeline", "parse_array_metadata"] +logger = getLogger(__name__) + def parse_array_metadata(data: Any) -> ArrayV2Metadata | ArrayV3Metadata: if isinstance(data, ArrayV2Metadata | ArrayV3Metadata): @@ -636,6 +639,8 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F # To enable zarr.create(store, path="a/b/c"), we need to create all the intermediate groups. parents = _build_parents(self) + logger.debug("Ensure parents: %s", parents) + for parent in parents: awaitables.extend( [ @@ -2385,11 +2390,9 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - if "/" in node.store_path.path: - required_parts = node.store_path.path.split("/")[:-1] - else: - required_parts = [] + required_parts = node.store_path.path.split("/")[:-1] parents = [ + # the root group AsyncGroup( metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), store_path=StorePath(store=node.store_path.store, path=""), diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 96e72fd25e..18caea3fd4 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -836,6 +836,9 @@ def __setitem__(self, key: str, value: Any) -> None: """__setitem__ is not supported in v3""" raise NotImplementedError + def __repr__(self) -> str: + return f"" + async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group: new_metadata = replace(self.metadata, attributes=new_attributes) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 51605dd7ef..106663c6dc 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -237,9 +237,7 @@ def test_group_create(store: Store, exists_ok: bool, zarr_format: ZarrFormat) -> if not exists_ok: with pytest.raises(ContainsGroupError): - group = Group.from_store( - store, attributes=attributes, exists_ok=exists_ok, zarr_format=zarr_format - ) + _ = Group.from_store(store, exists_ok=exists_ok, zarr_format=zarr_format) def test_group_open(store: Store, zarr_format: ZarrFormat, exists_ok: bool) -> None: From 2295d76a5de869d9dee259589ee7622e773a9d1a Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 16:06:18 -0700 Subject: [PATCH 13/15] debug race condition --- src/zarr/core/array.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index f8c14fe389..67489f8acc 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -651,7 +651,10 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F ] ) - await gather(*awaitables) + for awaitable in awaitables: + await awaitable + + # await gather(*awaitables) async def _set_selection( self, From 5879d67ed4ac55c71d9b95ae7c26c36e9b06ae2f Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 30 Sep 2024 16:19:20 -0700 Subject: [PATCH 14/15] more debug --- src/zarr/core/array.py | 18 ++++++++++-------- tests/v3/test_group.py | 5 +++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 67489f8acc..5251b309d3 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -651,10 +651,7 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F ] ) - for awaitable in awaitables: - await awaitable - - # await gather(*awaitables) + await gather(*awaitables) async def _set_selection( self, @@ -2393,21 +2390,26 @@ def chunks_initialized(array: Array | AsyncArray) -> tuple[str, ...]: def _build_parents(node: AsyncArray | AsyncGroup) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata - required_parts = node.store_path.path.split("/")[:-1] + store = node.store_path.store + path = node.store_path.path + if not path: + return [] + + required_parts = path.split("/")[:-1] parents = [ # the root group AsyncGroup( metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), - store_path=StorePath(store=node.store_path.store, path=""), + store_path=StorePath(store=store, path=""), ) ] for i, part in enumerate(required_parts): - path = "/".join(required_parts[:i] + [part]) + p = "/".join(required_parts[:i] + [part]) parents.append( AsyncGroup( metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), - store_path=StorePath(store=node.store_path.store, path=path), + store_path=StorePath(store=store, path=p), ) ) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 106663c6dc..15b9658e43 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -62,6 +62,11 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> await zarr.api.asynchronous.open_group( store=store, path="a", zarr_format=zarr_format, attributes={"key": "value"} ) + objs = {x async for x in store.list()} + if zarr_format == 2: + assert objs == {".zgroup", ".zattrs", "a/.zgroup", "a/.zattrs"} + else: + assert objs == {"zarr.json", "a/zarr.json"} # test that root group node was created root = await zarr.api.asynchronous.open_group( From 3940d22fb86cd17aa9eb48ddcdad9b03ac15a010 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 1 Oct 2024 07:43:27 -0700 Subject: [PATCH 15/15] Update src/zarr/core/array.py --- src/zarr/core/array.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 5251b309d3..9a78297c6f 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -639,8 +639,6 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F # To enable zarr.create(store, path="a/b/c"), we need to create all the intermediate groups. parents = _build_parents(self) - logger.debug("Ensure parents: %s", parents) - for parent in parents: awaitables.extend( [