"""
Interface to zarr arrays
"""
import contextlib
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional, Sequence, Union
import numpy as np
from pydantic import SerializationInfo
from numpydantic.interface.interface import Interface
from numpydantic.types import DtypeType
try:
import zarr
from numcodecs import VLenUTF8
from zarr.core import Array as ZarrArray
from zarr.storage import StoreLike
except ImportError: # pragma: no cover
ZarrArray = None
StoreLike = None
storage = None
VLenUTF8 = None
[docs]
@dataclass
class ZarrArrayPath:
"""
Map to an array within a zarr store.
See :func:`zarr.open`
"""
file: Union[Path, str]
"""Location of Zarr store file or directory"""
path: Optional[str] = None
"""Path to array within hierarchical zarr store"""
[docs]
def open(self, **kwargs: dict) -> ZarrArray:
"""Open the zarr array at the provided path"""
return zarr.open(str(self.file), path=self.path, **kwargs)
[docs]
@classmethod
def from_iterable(cls, spec: Sequence) -> "ZarrArrayPath":
"""
Construct a :class:`.ZarrArrayPath` specifier from an iterable,
rather than kwargs
"""
if len(spec) == 1:
return ZarrArrayPath(file=spec[0])
elif len(spec) == 2:
return ZarrArrayPath(file=spec[0], path=spec[1])
else:
raise ValueError("Only len 1-2 iterables can be used for a ZarrArrayPath")
[docs]
class ZarrInterface(Interface):
"""
Interface to in-memory or on-disk zarr arrays
"""
input_types = (Path, ZarrArray, ZarrArrayPath)
return_type = ZarrArray
[docs]
@classmethod
def enabled(cls) -> bool:
"""True if zarr is installed"""
return ZarrArray is not None
@staticmethod
def _get_array(
array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
) -> ZarrArray:
if isinstance(array, ZarrArray):
return array
if isinstance(array, (str, Path)):
array = ZarrArrayPath(file=array)
elif isinstance(array, (tuple, list)):
array = ZarrArrayPath.from_iterable(array)
return array.open(mode="a")
[docs]
@classmethod
def check(cls, array: Any) -> bool:
"""
Check if array is in-memory zarr array,
a path to a zarr array, or a :class:`.ZarrArrayPath`
"""
if isinstance(array, ZarrArray):
return True
# See if can be coerced to ZarrArrayPath
if isinstance(array, (Path, str)):
array = ZarrArrayPath(file=array)
if isinstance(array, (tuple, list)):
# something that can be coerced to ZarrArrayPath
with contextlib.suppress(ValueError):
array = ZarrArrayPath.from_iterable(array)
if isinstance(array, ZarrArrayPath):
with contextlib.suppress(Exception):
arr = array.open(mode="r")
if isinstance(arr, ZarrArray):
return True
return False
[docs]
def before_validation(
self, array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence]
) -> ZarrArray:
"""
Ensure that the zarr array is opened
"""
return self._get_array(array)
[docs]
def get_dtype(self, array: ZarrArray) -> DtypeType:
"""
Override base dtype getter to handle zarr's string-as-object encoding.
"""
if (
getattr(array.dtype, "type", None) is np.object_
and array.filters
and any([isinstance(f, VLenUTF8) for f in array.filters])
):
return np.str_
else:
return array.dtype
[docs]
@classmethod
def to_json(
cls,
array: Union[ZarrArray, str, Path, ZarrArrayPath, Sequence],
info: Optional[SerializationInfo] = None,
) -> dict:
"""
Dump just the metadata for an array from :meth:`zarr.core.Array.info_items`
plus the :meth:`zarr.core.Array.hexdigest`.
The full array can be returned by passing ``'zarr_dump_array': True`` to the
serialization ``context`` ::
model.model_dump_json(context={'zarr_dump_array': True})
"""
dump_array = False
if info is not None and info.context is not None:
dump_array = info.context.get("zarr_dump_array", False)
array = cls._get_array(array)
info = array.info_items()
info_dict = {i[0]: i[1] for i in info}
info_dict["hexdigest"] = array.hexdigest()
if dump_array:
info_dict["array"] = array[:].tolist()
return info_dict