Skip to content

[v3] fix: zarr v2 compatibility fixes for Dask #2186

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@
import numpy as np
import numpy.typing as npt

from zarr.abc.store import Store
from zarr.core.array import Array, AsyncArray
from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, MemoryOrder, ZarrFormat
from zarr.core.config import config
from zarr.core.group import AsyncGroup
from zarr.core.metadata.v2 import ArrayV2Metadata
from zarr.core.metadata.v3 import ArrayV3Metadata
from zarr.store import (
from zarr.storage import (
StoreLike,
StorePath,
make_store_path,
)

Expand Down Expand Up @@ -221,15 +223,16 @@ async def open(
Return type depends on what exists in the given store.
"""
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)

store_path = await make_store_path(store, mode=mode)

if path is not None:
store_path = store_path / path

try:
return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
return await open_array(store=store_path, zarr_format=zarr_format, **kwargs)
except KeyError:
return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs)
return await open_group(store=store_path, zarr_format=zarr_format, **kwargs)


async def open_consolidated(*args: Any, **kwargs: Any) -> AsyncGroup:
Expand Down Expand Up @@ -299,7 +302,14 @@ async def save_array(
or _default_zarr_version()
)

store_path = await make_store_path(store, mode="w")
mode = kwargs.pop("mode", None)
if isinstance(store, Store | StorePath):
if mode is not None:
raise ValueError("mode cannot be set when store is already initialized")
elif mode is None:
mode = cast(AccessModeLiteral, "a")

store_path = await make_store_path(store, mode=mode)
if path is not None:
store_path = store_path / path
new = await AsyncArray.create(
Expand Down Expand Up @@ -453,7 +463,9 @@ async def group(

zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)

store_path = await make_store_path(store)
mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a")

store_path = await make_store_path(store, mode=mode)
if path is not None:
store_path = store_path / path

Expand Down Expand Up @@ -551,6 +563,9 @@ async def open_group(
if storage_options is not None:
warnings.warn("storage_options is not yet implemented", RuntimeWarning, stacklevel=2)

if mode is not None and isinstance(store, Store | StorePath):
raise ValueError("mode cannot be set when store is already initialized")

store_path = await make_store_path(store, mode=mode)
if path is not None:
store_path = store_path / path
Expand Down Expand Up @@ -724,7 +739,13 @@ async def create(
if meta_array is not None:
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)

mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "w"))
mode = kwargs.pop("mode", None)
if isinstance(store, Store | StorePath):
if mode is not None:
raise ValueError("mode cannot be set when store is already initialized")
elif mode is None:
mode = cast(AccessModeLiteral, "r" if read_only else "a")

store_path = await make_store_path(store, mode=mode)
if path is not None:
store_path = store_path / path
Expand Down Expand Up @@ -896,8 +917,11 @@ async def open_array(
The opened array.
"""

store_path = await make_store_path(store)
if path is not None:
mode = kwargs.pop("mode", None)
store_path = await make_store_path(store, mode=mode)
if (
path is not None
): # FIXME: apply path before opening store in w or risk deleting existing data
store_path = store_path / path

zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/api/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
if TYPE_CHECKING:
from zarr.core.buffer import NDArrayLike
from zarr.core.common import JSON, AccessModeLiteral, ChunkCoords, ZarrFormat
from zarr.store import StoreLike
from zarr.storage import StoreLike

__all__ = [
"consolidate_metadata",
Expand Down
39 changes: 23 additions & 16 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
import numpy as np
import numpy.typing as npt

from zarr.abc.store import set_or_delete
from zarr.abc.store import Store, set_or_delete
from zarr.codecs import BytesCodec
from zarr.codecs._v2 import V2Compressor, V2Filters
from zarr.core.attributes import Attributes
from zarr.core.buffer import BufferPrototype, NDArrayLike, NDBuffer, default_buffer_prototype
from zarr.core.chunk_grids import RegularChunkGrid, _guess_chunks
from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks
from zarr.core.chunk_key_encodings import (
ChunkKeyEncoding,
DefaultChunkKeyEncoding,
Expand Down Expand Up @@ -59,8 +59,8 @@
from zarr.core.metadata.v3 import ArrayV3Metadata
from zarr.core.sync import sync
from zarr.registry import get_pipeline_class
from zarr.store import StoreLike, StorePath, make_store_path
from zarr.store.common import (
from zarr.storage import StoreLike, StorePath, make_store_path
from zarr.storage.common import (
ensure_no_existing_node,
)

Expand Down Expand Up @@ -129,7 +129,7 @@ async def create(
fill_value: Any | None = None,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ChunkCoords | None = None,
chunk_shape: ChunkCoords | None = None, # TODO: handle bool and iterable of iterable types
chunk_key_encoding: (
ChunkKeyEncoding
| tuple[Literal["default"], Literal[".", "/"]]
Expand All @@ -139,7 +139,7 @@ async def create(
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: Iterable[str] | None = None,
# v2 only
chunks: ShapeLike | None = None,
chunks: ShapeLike | None = None, # TODO: handle bool and iterable of iterable types
dimension_separator: Literal[".", "/"] | None = None,
order: Literal["C", "F"] | None = None,
filters: list[dict[str, JSON]] | None = None,
Expand All @@ -152,15 +152,14 @@ async def create(

shape = parse_shapelike(shape)

if chunk_shape is None:
if chunks is None:
chunk_shape = chunks = _guess_chunks(shape=shape, typesize=np.dtype(dtype).itemsize)
else:
chunks = parse_shapelike(chunks)
if chunks is not None and chunk_shape is not None:
raise ValueError("Only one of chunk_shape or chunks can be provided.")

chunk_shape = chunks
elif chunks is not None:
raise ValueError("Only one of chunk_shape or chunks must be provided.")
dtype = np.dtype(dtype)
if chunks:
_chunks = normalize_chunks(chunks, shape, dtype.itemsize)
else:
_chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize)

if zarr_format == 3:
if dimension_separator is not None:
Expand All @@ -183,7 +182,7 @@ async def create(
store_path,
shape=shape,
dtype=dtype,
chunk_shape=chunk_shape,
chunk_shape=_chunks,
fill_value=fill_value,
chunk_key_encoding=chunk_key_encoding,
codecs=codecs,
Expand All @@ -206,7 +205,7 @@ async def create(
store_path,
shape=shape,
dtype=dtype,
chunks=chunk_shape,
chunks=_chunks,
dimension_separator=dimension_separator,
fill_value=fill_value,
order=order,
Expand Down Expand Up @@ -393,6 +392,10 @@ async def open(
metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())),
)

@property
def store(self) -> Store:
return self.store_path.store

@property
def ndim(self) -> int:
return len(self.metadata.shape)
Expand Down Expand Up @@ -697,6 +700,10 @@ def open(
async_array = sync(AsyncArray.open(store))
return cls(async_array)

@property
def store(self) -> Store:
return self._async_array.store

@property
def ndim(self) -> int:
return self._async_array.ndim
Expand Down
47 changes: 46 additions & 1 deletion src/zarr/core/chunk_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

import itertools
import math
import numbers
import operator
from abc import abstractmethod
from dataclasses import dataclass
from functools import reduce
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

import numpy as np

Expand Down Expand Up @@ -98,6 +99,50 @@ def _guess_chunks(
return tuple(int(x) for x in chunks)


def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tuple[int, ...]:
"""Convenience function to normalize the `chunks` argument for an array
with the given `shape`."""

# N.B., expect shape already normalized

# handle auto-chunking
if chunks is None or chunks is True:
return _guess_chunks(shape, typesize)

# handle no chunking
if chunks is False:
return shape

# handle 1D convenience form
if isinstance(chunks, numbers.Integral):
chunks = tuple(int(chunks) for _ in shape)

# handle dask-style chunks (iterable of iterables)
if all(isinstance(c, (tuple | list)) for c in chunks):
# take first chunk size for each dimension
chunks = tuple(
c[0] for c in chunks
) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1])

# handle bad dimensionality
if len(chunks) > len(shape):
raise ValueError("too many dimensions in chunks")

# handle underspecified chunks
if len(chunks) < len(shape):
# assume chunks across remaining dimensions
chunks += shape[len(chunks) :]

# handle None or -1 in chunks
if -1 in chunks or None in chunks:
chunks = tuple(
s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks, strict=False)
)

out = tuple(int(c) for c in chunks)
return out


@dataclass(frozen=True)
class ChunkGrid(Metadata):
@classmethod
Expand Down
8 changes: 5 additions & 3 deletions src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
)
from zarr.core.config import config
from zarr.core.sync import SyncMixin, sync
from zarr.store import StoreLike, StorePath, make_store_path
from zarr.store.common import ensure_no_existing_node
from zarr.storage import StoreLike, StorePath, make_store_path
from zarr.storage.common import ensure_no_existing_node

if TYPE_CHECKING:
from collections.abc import AsyncGenerator, Iterable, Iterator
Expand Down Expand Up @@ -173,7 +173,9 @@ async def open(
# alternatively, we could warn and favor v3
raise ValueError("Both zarr.json and .zgroup objects exist")
if zarr_json_bytes is None and zgroup_bytes is None:
raise FileNotFoundError(store_path)
raise FileNotFoundError(
f"could not find zarr.json or .zgroup objects in {store_path}"
)
# set zarr_format based on which keys were found
if zarr_json_bytes is not None:
zarr_format = 3
Expand Down
21 changes: 21 additions & 0 deletions src/zarr/storage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from zarr.storage.common import StoreLike, StorePath, make_store_path
from zarr.storage.local import LocalStore
from zarr.storage.memory import MemoryStore
from zarr.storage.remote import RemoteStore
from zarr.storage.zip import ZipStore

# alias for backwards compatibility
FSStore = RemoteStore
DirectoryStore = LocalStore

__all__ = [
"DirectoryStore",
"FSStore",
"StorePath",
"StoreLike",
"make_store_path",
"RemoteStore",
"LocalStore",
"MemoryStore",
"ZipStore",
]
File renamed without changes.
14 changes: 8 additions & 6 deletions src/zarr/store/common.py → src/zarr/storage/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from zarr.core.buffer import Buffer, default_buffer_prototype
from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat
from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError
from zarr.store.local import LocalStore
from zarr.store.memory import MemoryStore
from zarr.storage.local import LocalStore
from zarr.storage.memory import MemoryStore

if TYPE_CHECKING:
from zarr.core.buffer import BufferPrototype
Expand Down Expand Up @@ -78,12 +78,14 @@ async def make_store_path(
store_like: StoreLike | None, *, mode: AccessModeLiteral | None = None
) -> StorePath:
if isinstance(store_like, StorePath):
if mode is not None:
assert AccessMode.from_literal(mode) == store_like.store.mode
if (mode is not None) and (AccessMode.from_literal(mode) != store_like.store.mode):
raise ValueError(
f"mode mismatch (mode={mode} != store.mode={store_like.store.mode.str})"
)
return store_like
elif isinstance(store_like, Store):
if mode is not None:
assert AccessMode.from_literal(mode) == store_like.mode
if (mode is not None) and (AccessMode.from_literal(mode) != store_like.mode):
raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode.str})")
await store_like._ensure_open()
return StorePath(store_like)
elif store_like is None:
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion src/zarr/store/memory.py → src/zarr/storage/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from zarr.abc.store import Store
from zarr.core.buffer import Buffer, gpu
from zarr.core.common import concurrent_map
from zarr.store._utils import _normalize_interval_index
from zarr.storage._utils import _normalize_interval_index

if TYPE_CHECKING:
from collections.abc import AsyncGenerator, MutableMapping
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/store/remote.py → src/zarr/storage/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import fsspec

from zarr.abc.store import Store
from zarr.store.common import _dereference_path
from zarr.storage.common import _dereference_path

if TYPE_CHECKING:
from collections.abc import AsyncGenerator
Expand Down
File renamed without changes.
15 changes: 0 additions & 15 deletions src/zarr/store/__init__.py

This file was deleted.

Loading