bioimage_py.sources
Data sources: serializable, array-like handles for the runner.
1"""Data sources: serializable, array-like handles for the runner.""" 2from .array_source import ArraySource 3from .base import Source, SourceSpec 4from .cloudvolume_source import CloudVolumeSource, open_cloudvolume 5from .dispatch import SourceLike, as_source, from_spec, register_source 6from .file_source import FileSource, open_source 7from .webknossos_source import WebKnossosSource, open_webknossos 8 9__all__ = [ 10 "ArraySource", 11 "Source", 12 "SourceSpec", 13 "SourceLike", 14 "as_source", 15 "from_spec", 16 "register_source", 17 "FileSource", 18 "open_source", 19 "CloudVolumeSource", 20 "open_cloudvolume", 21 "WebKnossosSource", 22 "open_webknossos", 23]
41class ArraySource(Source): 42 """Wrap a numpy, zarr or z5py array as a :class:`Source`. 43 44 Args: 45 array: The wrapped array. numpy arrays are usable for local execution only; 46 their :meth:`to_spec` raises because they cannot be reopened on another node. 47 """ 48 49 def __init__(self, array: Any) -> None: 50 self._array = array 51 52 @property 53 def array(self) -> Any: 54 """The wrapped array object.""" 55 return self._array 56 57 def _getitem(self, roi: Tuple[slice, ...]) -> np.ndarray: 58 return np.asarray(self._array[roi]) 59 60 def _setitem(self, roi: Tuple[slice, ...], value: np.ndarray) -> None: 61 self._array[roi] = value 62 63 @property 64 def shape(self) -> Tuple[int, ...]: 65 return tuple(int(s) for s in self._array.shape) 66 67 @property 68 def dtype(self) -> np.dtype: 69 return np.dtype(self._array.dtype) 70 71 @property 72 def chunks(self) -> Optional[Tuple[int, ...]]: 73 chunks = getattr(self._array, "chunks", None) 74 return None if chunks is None else tuple(int(c) for c in chunks) 75 76 @property 77 def shards(self) -> Optional[Tuple[int, ...]]: 78 shards = getattr(self._array, "shards", None) 79 return None if shards is None else tuple(int(s) for s in shards) 80 81 def to_spec(self) -> SourceSpec: 82 """Return a reopen spec, raising for in-memory (numpy) arrays.""" 83 array = self._array 84 if _is_numpy(array): 85 raise ValueError( 86 "Cannot serialize an in-memory numpy array for distributed execution. " 87 "numpy arrays are supported for local execution only; pass a zarr or " 88 "n5 (z5py) array for the 'subprocess' or 'slurm' backends." 89 ) 90 module = type(array).__module__.split(".")[0] 91 if module == "zarr": 92 return _zarr_spec(array) 93 if module == "z5py": 94 return _z5py_spec(array) 95 raise ValueError(f"Cannot serialize array of type {type(array)!r}: unsupported source kind.") 96 97 @staticmethod 98 def reopen(spec: SourceSpec) -> "ArraySource": 99 """Reopen an array source from its spec (read-write).""" 100 if spec.kind == "zarr": 101 import zarr 102 103 array = zarr.open_array(spec.path, path=spec.internal_path, mode="r+") 104 return ArraySource(array) 105 if spec.kind == "z5py": 106 import z5py 107 108 f = z5py.File(spec.path, mode="r+") 109 return ArraySource(f[spec.internal_path]) 110 raise ValueError(f"ArraySource cannot reopen spec of kind {spec.kind!r}.")
Wrap a numpy, zarr or z5py array as a Source.
Args:
array: The wrapped array. numpy arrays are usable for local execution only;
their to_spec() raises because they cannot be reopened on another node.
71 @property 72 def chunks(self) -> Optional[Tuple[int, ...]]: 73 chunks = getattr(self._array, "chunks", None) 74 return None if chunks is None else tuple(int(c) for c in chunks)
The chunk shape of the underlying array, or None if unchunked.
76 @property 77 def shards(self) -> Optional[Tuple[int, ...]]: 78 shards = getattr(self._array, "shards", None) 79 return None if shards is None else tuple(int(s) for s in shards)
The shard shape of the underlying array, or None if unsharded.
81 def to_spec(self) -> SourceSpec: 82 """Return a reopen spec, raising for in-memory (numpy) arrays.""" 83 array = self._array 84 if _is_numpy(array): 85 raise ValueError( 86 "Cannot serialize an in-memory numpy array for distributed execution. " 87 "numpy arrays are supported for local execution only; pass a zarr or " 88 "n5 (z5py) array for the 'subprocess' or 'slurm' backends." 89 ) 90 module = type(array).__module__.split(".")[0] 91 if module == "zarr": 92 return _zarr_spec(array) 93 if module == "z5py": 94 return _z5py_spec(array) 95 raise ValueError(f"Cannot serialize array of type {type(array)!r}: unsupported source kind.")
Return a reopen spec, raising for in-memory (numpy) arrays.
97 @staticmethod 98 def reopen(spec: SourceSpec) -> "ArraySource": 99 """Reopen an array source from its spec (read-write).""" 100 if spec.kind == "zarr": 101 import zarr 102 103 array = zarr.open_array(spec.path, path=spec.internal_path, mode="r+") 104 return ArraySource(array) 105 if spec.kind == "z5py": 106 import z5py 107 108 f = z5py.File(spec.path, mode="r+") 109 return ArraySource(f[spec.internal_path]) 110 raise ValueError(f"ArraySource cannot reopen spec of kind {spec.kind!r}.")
Reopen an array source from its spec (read-write).
41class Source(ABC): 42 """Array-like data handle with a serializable open-spec. 43 44 A source supports numpy-style basic indexing: an integer, slice or ellipsis, or a tuple thereof. 45 The index is normalized to a full tuple of in-bounds slices (axes selected by an integer are 46 squeezed out of the result), so ``src[5]``, ``src[5, :]``, ``src[..., 0]`` and 47 ``src[(slice(0, 8), slice(0, 8))]`` all work. Subclasses implement the normalized read/write via 48 :meth:`_getitem` / :meth:`_setitem`, which always receive a full tuple of slices. 49 50 In the runner hot path, per-block compute functions build that full tuple explicitly with 51 :func:`bioimage_py.util.to_roi` (rather than relying on this normalization) so it is clear which 52 region -- outer / inner / inner-local -- is being indexed. 53 """ 54 55 def __getitem__(self, index: Index) -> np.ndarray: 56 """Read a region, normalizing ``index`` and squeezing integer-indexed axes.""" 57 roi, to_squeeze = normalize_index(index, self.shape) 58 return squeeze_singletons(self._getitem(roi), to_squeeze) 59 60 def __setitem__(self, index: Index, value: np.ndarray) -> None: 61 """Write a region, normalizing ``index`` and re-inserting integer-indexed axes.""" 62 roi, to_squeeze = normalize_index(index, self.shape) 63 value = np.asarray(value) 64 if to_squeeze: 65 value = np.expand_dims(value, to_squeeze) 66 self._setitem(roi, value) 67 68 @abstractmethod 69 def _getitem(self, roi: Tuple[slice, ...]) -> np.ndarray: 70 """Read the region given by a full tuple of in-bounds slices.""" 71 ... 72 73 @abstractmethod 74 def _setitem(self, roi: Tuple[slice, ...], value: np.ndarray) -> None: 75 """Write the region given by a full tuple of in-bounds slices.""" 76 ... 77 78 @property 79 @abstractmethod 80 def shape(self) -> Tuple[int, ...]: 81 ... 82 83 @property 84 @abstractmethod 85 def dtype(self) -> np.dtype: 86 ... 87 88 @property 89 def chunks(self) -> Optional[Tuple[int, ...]]: 90 """The chunk shape of the underlying array, or ``None`` if unchunked.""" 91 return None 92 93 @property 94 def shards(self) -> Optional[Tuple[int, ...]]: 95 """The shard shape of the underlying array, or ``None`` if unsharded.""" 96 return None 97 98 @property 99 def ndim(self) -> int: 100 """Number of dimensions.""" 101 return len(self.shape) 102 103 @property 104 def writable(self) -> bool: 105 """Whether this source supports writing via :meth:`__setitem__`. 106 107 Distributed runs reject non-writable sources passed as outputs. 108 """ 109 return True 110 111 @abstractmethod 112 def to_spec(self) -> SourceSpec: 113 """Return a serializable spec to reopen this source on another process. 114 115 Raises: 116 ValueError: If the source cannot be reopened elsewhere (e.g. in-memory data). 117 """ 118 ...
Array-like data handle with a serializable open-spec.
A source supports numpy-style basic indexing: an integer, slice or ellipsis, or a tuple thereof.
The index is normalized to a full tuple of in-bounds slices (axes selected by an integer are
squeezed out of the result), so src[5], src[5, :], src[..., 0] and
src[(slice(0, 8), slice(0, 8))] all work. Subclasses implement the normalized read/write via
_getitem() / _setitem(), which always receive a full tuple of slices.
In the runner hot path, per-block compute functions build that full tuple explicitly with
bioimage_py.util.to_roi() (rather than relying on this normalization) so it is clear which
region -- outer / inner / inner-local -- is being indexed.
88 @property 89 def chunks(self) -> Optional[Tuple[int, ...]]: 90 """The chunk shape of the underlying array, or ``None`` if unchunked.""" 91 return None
The chunk shape of the underlying array, or None if unchunked.
93 @property 94 def shards(self) -> Optional[Tuple[int, ...]]: 95 """The shard shape of the underlying array, or ``None`` if unsharded.""" 96 return None
The shard shape of the underlying array, or None if unsharded.
103 @property 104 def writable(self) -> bool: 105 """Whether this source supports writing via :meth:`__setitem__`. 106 107 Distributed runs reject non-writable sources passed as outputs. 108 """ 109 return True
Whether this source supports writing via __setitem__().
Distributed runs reject non-writable sources passed as outputs.
111 @abstractmethod 112 def to_spec(self) -> SourceSpec: 113 """Return a serializable spec to reopen this source on another process. 114 115 Raises: 116 ValueError: If the source cannot be reopened elsewhere (e.g. in-memory data). 117 """ 118 ...
Return a serializable spec to reopen this source on another process.
Raises: ValueError: If the source cannot be reopened elsewhere (e.g. in-memory data).
19@dataclass 20class SourceSpec: 21 """Serializable description of how to (re)open a :class:`Source` on another process. 22 23 This is intentionally a plain dataclass so that it cloudpickles trivially and is 24 human-readable in the debug dump of a distributed job. 25 26 Attributes: 27 kind: The source kind, e.g. ``"zarr"``, ``"z5py"`` or ``"wrapper"``. 28 path: Filesystem path of the container (for array sources). 29 internal_path: Path of the array inside the container (for array sources). 30 params: Extra keyword arguments needed to reconstruct the source. 31 wrapped: The spec of the wrapped source, for wrapper sources. 32 """ 33 34 kind: str 35 path: Optional[str] = None 36 internal_path: Optional[str] = None 37 params: Dict[str, Any] = field(default_factory=dict) 38 wrapped: Optional["SourceSpec"] = None
Serializable description of how to (re)open a Source on another process.
This is intentionally a plain dataclass so that it cloudpickles trivially and is human-readable in the debug dump of a distributed job.
Attributes:
kind: The source kind, e.g. "zarr", "z5py" or "wrapper".
path: Filesystem path of the container (for array sources).
internal_path: Path of the array inside the container (for array sources).
params: Extra keyword arguments needed to reconstruct the source.
wrapped: The spec of the wrapped source, for wrapper sources.
30def as_source(obj: "SourceLike") -> Source: 31 """Convert a supported object into a :class:`Source`. 32 33 Idempotent on :class:`Source` inputs. numpy / zarr / z5py arrays are wrapped in an 34 :class:`ArraySource`. Bare paths are intentionally not supported (see the design doc). 35 36 Args: 37 obj: The object to convert. 38 39 Returns: 40 A :class:`Source`. 41 42 Raises: 43 TypeError: If the object cannot be converted (e.g. a string path). 44 """ 45 if isinstance(obj, Source): 46 return obj 47 if isinstance(obj, (str, bytes)): 48 raise TypeError( 49 "Passing strings / file paths as a source is not supported. Open the array " 50 "yourself (e.g. with zarr or z5py) and pass the handle." 51 ) 52 for predicate, converter in _CONVERTERS: 53 if predicate(obj): 54 return converter(obj) 55 # numpy and any array-like with shape/dtype fall back to ArraySource. 56 if isinstance(obj, np.ndarray) or (hasattr(obj, "shape") and hasattr(obj, "dtype")): 57 return ArraySource(obj) 58 raise TypeError(f"Cannot convert object of type {type(obj)!r} to a Source.")
Convert a supported object into a Source.
Idempotent on Source inputs. numpy / zarr / z5py arrays are wrapped in an
ArraySource. Bare paths are intentionally not supported (see the design doc).
Args: obj: The object to convert.
Returns:
A Source.
Raises: TypeError: If the object cannot be converted (e.g. a string path).
61def from_spec(spec: SourceSpec) -> Source: 62 """Reconstruct a :class:`Source` from its :class:`SourceSpec`.""" 63 if spec.kind in ("zarr", "z5py"): 64 return ArraySource.reopen(spec) 65 if spec.kind == "file": 66 from .file_source import FileSource 67 68 return FileSource.reopen(spec) 69 if spec.kind == "cloudvolume": 70 from .cloudvolume_source import CloudVolumeSource 71 72 return CloudVolumeSource.reopen(spec) 73 if spec.kind == "webknossos": 74 from .webknossos_source import WebKnossosSource 75 76 return WebKnossosSource.reopen(spec) 77 if spec.kind == "wrapper": 78 from ..wrapper.base import wrapper_from_spec 79 80 return wrapper_from_spec(spec) 81 raise ValueError(f"Cannot reconstruct source from spec of kind {spec.kind!r}.")
Reconstruct a Source from its SourceSpec.
20def register_source(predicate: Callable[[object], bool], converter: Callable[[object], Source]) -> None: 21 """Register a converter used by :func:`as_source`. 22 23 Args: 24 predicate: Returns ``True`` if ``converter`` can handle the object. 25 converter: Builds a :class:`Source` from the object. 26 """ 27 _CONVERTERS.append((predicate, converter))
Register a converter used by as_source().
Args:
predicate: Returns True if converter can handle the object.
converter: Builds a Source from the object.
17class FileSource(ArraySource): 18 """A :class:`Source` wrapping a file-backed dataset, reopenable from its path + format. 19 20 Unlike a plain :class:`ArraySource` (whose spec is introspected from a live zarr/z5py handle), 21 a :class:`FileSource` stores the recipe to reopen the dataset via :func:`open_source`, so it 22 round-trips for any registered format (hdf5, mrc, nifti, tif, ...). 23 24 Args: 25 array: The opened array-like dataset. 26 path: Path of the container/file the dataset was opened from. 27 internal_path: Key of the dataset inside the container (``""`` for single-array files). 28 format: The registered format name (e.g. ``"hdf5"``, ``"mrc"``). 29 mode: The mode the file was opened in. 30 open_kwargs: Extra keyword arguments forwarded to the backend constructor. 31 writable: Whether writes are permitted (format is writable and ``mode`` is not read-only). 32 """ 33 34 def __init__( 35 self, 36 array: Any, 37 *, 38 path: PathLike, 39 internal_path: str, 40 format: str, 41 mode: str = "r", 42 open_kwargs: Optional[Dict[str, Any]] = None, 43 writable: bool = False, 44 ) -> None: 45 super().__init__(array) 46 self._path = str(path) 47 self._internal_path = internal_path 48 self._format = format 49 self._mode = mode 50 self._open_kwargs = dict(open_kwargs or {}) 51 self._writable = bool(writable) 52 53 @property 54 def format(self) -> str: 55 """The registered format name of this source.""" 56 return self._format 57 58 @property 59 def writable(self) -> bool: 60 """Whether this source supports writing via :meth:`__setitem__`.""" 61 return self._writable 62 63 def _setitem(self, roi: Tuple[slice, ...], value: np.ndarray) -> None: 64 if not self._writable: 65 raise TypeError( 66 f"FileSource for format {self._format!r} opened in mode {self._mode!r} is read-only." 67 ) 68 self._array[roi] = value 69 70 def to_spec(self) -> SourceSpec: 71 """Return a ``kind="file"`` spec recording the path, key, format and open options.""" 72 params: Dict[str, Any] = {"format": self._format, "mode": self._mode} 73 params.update(self._open_kwargs) 74 return SourceSpec( 75 kind="file", 76 path=self._path, 77 internal_path=self._internal_path, 78 params=params, 79 ) 80 81 @staticmethod 82 def reopen(spec: SourceSpec) -> "FileSource": 83 """Reopen a file source from its spec (mirrors the original :func:`open_source` call).""" 84 params = dict(spec.params) 85 fmt = params.pop("format") 86 mode = params.pop("mode", "r") 87 return open_source(spec.path, spec.internal_path, format=fmt, mode=mode, **params)
A Source wrapping a file-backed dataset, reopenable from its path + format.
Unlike a plain ArraySource (whose spec is introspected from a live zarr/z5py handle),
a FileSource stores the recipe to reopen the dataset via open_source(), so it
round-trips for any registered format (hdf5, mrc, nifti, tif, ...).
Args:
array: The opened array-like dataset.
path: Path of the container/file the dataset was opened from.
internal_path: Key of the dataset inside the container ("" for single-array files).
format: The registered format name (e.g. "hdf5", "mrc").
mode: The mode the file was opened in.
open_kwargs: Extra keyword arguments forwarded to the backend constructor.
writable: Whether writes are permitted (format is writable and mode is not read-only).
34 def __init__( 35 self, 36 array: Any, 37 *, 38 path: PathLike, 39 internal_path: str, 40 format: str, 41 mode: str = "r", 42 open_kwargs: Optional[Dict[str, Any]] = None, 43 writable: bool = False, 44 ) -> None: 45 super().__init__(array) 46 self._path = str(path) 47 self._internal_path = internal_path 48 self._format = format 49 self._mode = mode 50 self._open_kwargs = dict(open_kwargs or {}) 51 self._writable = bool(writable)
53 @property 54 def format(self) -> str: 55 """The registered format name of this source.""" 56 return self._format
The registered format name of this source.
58 @property 59 def writable(self) -> bool: 60 """Whether this source supports writing via :meth:`__setitem__`.""" 61 return self._writable
Whether this source supports writing via __setitem__().
70 def to_spec(self) -> SourceSpec: 71 """Return a ``kind="file"`` spec recording the path, key, format and open options.""" 72 params: Dict[str, Any] = {"format": self._format, "mode": self._mode} 73 params.update(self._open_kwargs) 74 return SourceSpec( 75 kind="file", 76 path=self._path, 77 internal_path=self._internal_path, 78 params=params, 79 )
Return a kind="file" spec recording the path, key, format and open options.
81 @staticmethod 82 def reopen(spec: SourceSpec) -> "FileSource": 83 """Reopen a file source from its spec (mirrors the original :func:`open_source` call).""" 84 params = dict(spec.params) 85 fmt = params.pop("format") 86 mode = params.pop("mode", "r") 87 return open_source(spec.path, spec.internal_path, format=fmt, mode=mode, **params)
Reopen a file source from its spec (mirrors the original open_source() call).
103def open_source( 104 path: PathLike, 105 internal_path: Optional[str] = None, 106 format: Optional[str] = None, 107 mode: str = "r", 108 **kwargs: Any, 109) -> FileSource: 110 """Open a file-backed array as a :class:`Source`. 111 112 The format is inferred from the path extension (overridable via ``format``). ``internal_path`` 113 selects the array inside a container; when omitted it defaults to the format's natural key 114 (e.g. ``"data"`` for mrc/nifti, ``"mag1"`` for knossos, ``""`` for a single image stack), and is 115 required for multi-array containers (hdf5/zarr/n5). 116 117 Args: 118 path: Path to the file or folder to open. 119 internal_path: Key of the array inside the container; format-dependent default if omitted. 120 format: Force a registered format name, overriding extension inference. 121 mode: Open mode. ``"r"`` (default) is read-only; write modes (``"a"``/``"r+"``/``"w"``) 122 are only honored for writable formats (zarr/n5/hdf5). 123 kwargs: Extra keyword arguments forwarded to the backend constructor. 124 125 Returns: 126 A :class:`FileSource` with a reopenable ``kind="file"`` spec. 127 """ 128 fmt = format if format is not None else infer_format(path) 129 # Validate the format is installed up front (raises a clear error otherwise). 130 constructor_for_format(fmt) 131 132 handle = open_file(path, mode=mode, format=fmt, **kwargs) 133 key = internal_path if internal_path is not None else getattr(handle, "default_key", None) 134 dataset, recorded_key = _resolve_dataset(handle, key) 135 136 writable = is_writable_format(fmt) and mode != "r" 137 return FileSource( 138 dataset, 139 path=path, 140 internal_path=recorded_key, 141 format=fmt, 142 mode=mode, 143 open_kwargs=kwargs, 144 writable=writable, 145 )
Open a file-backed array as a Source.
The format is inferred from the path extension (overridable via format). internal_path
selects the array inside a container; when omitted it defaults to the format's natural key
(e.g. "data" for mrc/nifti, "mag1" for knossos, "" for a single image stack), and is
required for multi-array containers (hdf5/zarr/n5).
Args:
path: Path to the file or folder to open.
internal_path: Key of the array inside the container; format-dependent default if omitted.
format: Force a registered format name, overriding extension inference.
mode: Open mode. "r" (default) is read-only; write modes ("a"/"r+"/"w")
are only honored for writable formats (zarr/n5/hdf5).
kwargs: Extra keyword arguments forwarded to the backend constructor.
Returns:
A FileSource with a reopenable kind="file" spec.
25class CloudVolumeSource(Source): 26 """A ZYX-ordered, writable :class:`Source` view of a CloudVolume layer. 27 28 CloudVolume stores data in ``(x, y, z, channel)`` order; this source exposes a 3D ``(z, y, x)`` 29 numpy-order view (single channel only), transposing on read and write. Indices are local to the 30 source origin (``offset``) and translated to absolute CloudVolume coordinates internally. 31 32 Not thread-safe: the CloudVolume handle is not safe to share across threads, so do not run the 33 ``local`` backend with ``num_workers > 1`` over this source. For parallelism use the 34 ``subprocess``/``slurm`` backends, where each worker reopens the source from its spec; concurrent 35 block writes must still be chunk-aligned (the runner's write-safety guard enforces this). 36 37 Args: 38 volume: An opened CloudVolume (precomputed) handle. 39 offset: Absolute XYZ origin of the view; defaults to the volume's ``voxel_offset``. 40 size: XYZ size of the view; defaults to the volume's ``volume_size``. 41 open_params: The constructor parameters used to (re)open the volume, recorded in the spec. 42 """ 43 44 def __init__( 45 self, 46 volume: Any, 47 offset: Optional[Tuple[int, int, int]] = None, 48 size: Optional[Tuple[int, int, int]] = None, 49 open_params: Optional[Dict[str, Any]] = None, 50 ) -> None: 51 self._vol = volume 52 if int(volume.shape[3]) != 1: 53 raise ValueError( 54 f"CloudVolumeSource supports single-channel volumes only, got {int(volume.shape[3])} channels." 55 ) 56 self._offset = tuple(int(v) for v in (offset if offset is not None else volume.voxel_offset)) 57 size_xyz = tuple(int(v) for v in (size if size is not None else volume.volume_size)) 58 self._size = size_xyz # XYZ 59 self._open_params = dict(open_params or {}) 60 61 @property 62 def volume(self) -> Any: 63 """The wrapped CloudVolume handle.""" 64 return self._vol 65 66 @property 67 def shape(self) -> Tuple[int, ...]: 68 """The ZYX shape of the view.""" 69 return (self._size[2], self._size[1], self._size[0]) 70 71 @property 72 def dtype(self) -> np.dtype: 73 """The numpy dtype of the volume.""" 74 return np.dtype(self._vol.dtype) 75 76 @property 77 def chunks(self) -> Optional[Tuple[int, ...]]: 78 """The ZYX chunk shape of the volume.""" 79 cs = [int(c) for c in self._vol.chunk_size] 80 return (cs[2], cs[1], cs[0]) 81 82 @property 83 def writable(self) -> bool: 84 """CloudVolume sources support writing.""" 85 return True 86 87 def _abs_bounds(self, roi: Tuple[slice, ...]) -> Tuple[Tuple[int, int], Tuple[int, int], Tuple[int, int]]: 88 """Return absolute XYZ ``(start, stop)`` bounds for a ZYX roi.""" 89 if not isinstance(roi, tuple): 90 roi = (roi,) 91 roi = roi + (slice(None),) * (3 - len(roi)) 92 z0, z1 = _start_stop(roi[0], self.shape[0]) 93 y0, y1 = _start_stop(roi[1], self.shape[1]) 94 x0, x1 = _start_stop(roi[2], self.shape[2]) 95 ox, oy, oz = self._offset 96 return (ox + x0, ox + x1), (oy + y0, oy + y1), (oz + z0, oz + z1) 97 98 def _getitem(self, roi: Tuple[slice, ...]) -> np.ndarray: 99 (x0, x1), (y0, y1), (z0, z1) = self._abs_bounds(roi) 100 block = np.asarray(self._vol[x0:x1, y0:y1, z0:z1]) # (x, y, z, c) 101 block = block[..., 0] # drop the single channel 102 return block.transpose(2, 1, 0) # -> (z, y, x) 103 104 def _setitem(self, roi: Tuple[slice, ...], value: np.ndarray) -> None: 105 (x0, x1), (y0, y1), (z0, z1) = self._abs_bounds(roi) 106 arr = np.asarray(value).transpose(2, 1, 0)[..., None] # (z,y,x) -> (x,y,z,1) 107 self._vol[x0:x1, y0:y1, z0:z1] = arr 108 109 def to_spec(self) -> SourceSpec: 110 """Return a ``kind="cloudvolume"`` spec recording the cloudpath, open params and ROI.""" 111 params = dict(self._open_params) 112 params["offset"] = list(self._offset) 113 params["size"] = list(self._size) 114 return SourceSpec(kind="cloudvolume", path=str(self._vol.cloudpath), params=params) 115 116 @staticmethod 117 def reopen(spec: SourceSpec) -> "CloudVolumeSource": 118 """Reopen a CloudVolume source from its spec.""" 119 params = dict(spec.params) 120 offset = params.pop("offset", None) 121 size = params.pop("size", None) 122 return open_cloudvolume( 123 spec.path, 124 offset=None if offset is None else tuple(offset), 125 size=None if size is None else tuple(size), 126 **params, 127 )
A ZYX-ordered, writable Source view of a CloudVolume layer.
CloudVolume stores data in (x, y, z, channel) order; this source exposes a 3D (z, y, x)
numpy-order view (single channel only), transposing on read and write. Indices are local to the
source origin (offset) and translated to absolute CloudVolume coordinates internally.
Not thread-safe: the CloudVolume handle is not safe to share across threads, so do not run the
local backend with num_workers > 1 over this source. For parallelism use the
subprocess/slurm backends, where each worker reopens the source from its spec; concurrent
block writes must still be chunk-aligned (the runner's write-safety guard enforces this).
Args:
volume: An opened CloudVolume (precomputed) handle.
offset: Absolute XYZ origin of the view; defaults to the volume's voxel_offset.
size: XYZ size of the view; defaults to the volume's volume_size.
open_params: The constructor parameters used to (re)open the volume, recorded in the spec.
44 def __init__( 45 self, 46 volume: Any, 47 offset: Optional[Tuple[int, int, int]] = None, 48 size: Optional[Tuple[int, int, int]] = None, 49 open_params: Optional[Dict[str, Any]] = None, 50 ) -> None: 51 self._vol = volume 52 if int(volume.shape[3]) != 1: 53 raise ValueError( 54 f"CloudVolumeSource supports single-channel volumes only, got {int(volume.shape[3])} channels." 55 ) 56 self._offset = tuple(int(v) for v in (offset if offset is not None else volume.voxel_offset)) 57 size_xyz = tuple(int(v) for v in (size if size is not None else volume.volume_size)) 58 self._size = size_xyz # XYZ 59 self._open_params = dict(open_params or {})
61 @property 62 def volume(self) -> Any: 63 """The wrapped CloudVolume handle.""" 64 return self._vol
The wrapped CloudVolume handle.
66 @property 67 def shape(self) -> Tuple[int, ...]: 68 """The ZYX shape of the view.""" 69 return (self._size[2], self._size[1], self._size[0])
The ZYX shape of the view.
71 @property 72 def dtype(self) -> np.dtype: 73 """The numpy dtype of the volume.""" 74 return np.dtype(self._vol.dtype)
The numpy dtype of the volume.
76 @property 77 def chunks(self) -> Optional[Tuple[int, ...]]: 78 """The ZYX chunk shape of the volume.""" 79 cs = [int(c) for c in self._vol.chunk_size] 80 return (cs[2], cs[1], cs[0])
The ZYX chunk shape of the volume.
82 @property 83 def writable(self) -> bool: 84 """CloudVolume sources support writing.""" 85 return True
CloudVolume sources support writing.
109 def to_spec(self) -> SourceSpec: 110 """Return a ``kind="cloudvolume"`` spec recording the cloudpath, open params and ROI.""" 111 params = dict(self._open_params) 112 params["offset"] = list(self._offset) 113 params["size"] = list(self._size) 114 return SourceSpec(kind="cloudvolume", path=str(self._vol.cloudpath), params=params)
Return a kind="cloudvolume" spec recording the cloudpath, open params and ROI.
116 @staticmethod 117 def reopen(spec: SourceSpec) -> "CloudVolumeSource": 118 """Reopen a CloudVolume source from its spec.""" 119 params = dict(spec.params) 120 offset = params.pop("offset", None) 121 size = params.pop("size", None) 122 return open_cloudvolume( 123 spec.path, 124 offset=None if offset is None else tuple(offset), 125 size=None if size is None else tuple(size), 126 **params, 127 )
Reopen a CloudVolume source from its spec.
130def open_cloudvolume( 131 cloudpath: str, 132 mip: int = 0, 133 fill_missing: bool = False, 134 bounded: bool = True, 135 cache: bool = False, 136 non_aligned_writes: bool = True, 137 offset: Optional[Tuple[int, int, int]] = None, 138 size: Optional[Tuple[int, int, int]] = None, 139 **kwargs: Any, 140) -> CloudVolumeSource: 141 """Open a CloudVolume (precomputed) layer as a writable ZYX :class:`Source`. 142 143 Args: 144 cloudpath: The CloudVolume cloudpath (e.g. ``"precomputed://..."`` or ``"file://..."``). 145 mip: The resolution (mip) level to open. 146 fill_missing: Whether to zero-fill missing chunks instead of raising. For a *writable* 147 output whose volume size is not a multiple of the chunk size, set this to ``True`` so 148 the partial boundary chunks can be read-modify-written into a fresh layer. 149 bounded: Whether reads/writes are restricted to the volume bounds. 150 cache: Whether to enable CloudVolume's local cache. 151 non_aligned_writes: Whether to allow writes that are not chunk-aligned (needed for the 152 partial blocks at the volume boundary in block-wise writes). 153 offset: Optional absolute XYZ origin of the view; defaults to the layer's ``voxel_offset``. 154 size: Optional XYZ size of the view; defaults to the layer's ``volume_size``. 155 kwargs: Extra keyword arguments forwarded to ``CloudVolume``. 156 157 Returns: 158 A :class:`CloudVolumeSource`. 159 """ 160 from cloudvolume import CloudVolume 161 162 open_params: Dict[str, Any] = dict( 163 mip=mip, 164 fill_missing=fill_missing, 165 bounded=bounded, 166 cache=cache, 167 non_aligned_writes=non_aligned_writes, 168 ) 169 open_params.update(kwargs) 170 volume = CloudVolume(cloudpath, progress=False, **open_params) 171 return CloudVolumeSource(volume, offset=offset, size=size, open_params=open_params)
Open a CloudVolume (precomputed) layer as a writable ZYX Source.
Args:
cloudpath: The CloudVolume cloudpath (e.g. "precomputed://..." or "file://...").
mip: The resolution (mip) level to open.
fill_missing: Whether to zero-fill missing chunks instead of raising. For a writable
output whose volume size is not a multiple of the chunk size, set this to True so
the partial boundary chunks can be read-modify-written into a fresh layer.
bounded: Whether reads/writes are restricted to the volume bounds.
cache: Whether to enable CloudVolume's local cache.
non_aligned_writes: Whether to allow writes that are not chunk-aligned (needed for the
partial blocks at the volume boundary in block-wise writes).
offset: Optional absolute XYZ origin of the view; defaults to the layer's voxel_offset.
size: Optional XYZ size of the view; defaults to the layer's volume_size.
kwargs: Extra keyword arguments forwarded to CloudVolume.
Returns:
A CloudVolumeSource.
47class WebKnossosSource(Source): 48 """A ZYX-ordered, read-only :class:`Source` view of a WebKnossos layer. 49 50 WebKnossos stores data in ``(x, y, z)`` order; this source exposes a 3D ``(z, y, x)`` numpy-order 51 view (single channel only), transposing on read. ``offset`` and ``size`` are given in **Mag(1) 52 (full-resolution) XYZ** coordinates (the convention of WebKnossos bounding boxes), defaulting to 53 the layer's bounding box. The presented ``shape`` is at the opened magnification, i.e. the 54 Mag(1) size divided by ``mag``; local indices are at that magnification and are scaled by ``mag`` 55 to address the Mag(1) coordinates that ``MagView.read`` expects. 56 57 Not thread-safe: the remote layer handle is not safe to share across threads, so do not run the 58 ``local`` backend with ``num_workers > 1`` over this source. For parallelism use the 59 ``subprocess``/``slurm`` backends, where each worker reopens the source from its spec. 60 61 Args: 62 dataset_name_or_url: The WebKnossos dataset name or URL (or an annotation URL). 63 organization_id: The organization id (required for opening by dataset name). 64 layer_name: The name of the layer to open. 65 mag: The magnification (resolution) level. 66 offset: Optional Mag(1) XYZ origin of the view; defaults to the layer bbox ``topleft``. 67 size: Optional Mag(1) XYZ size of the view; defaults to the layer bbox ``size``. 68 """ 69 70 def __init__( 71 self, 72 dataset_name_or_url: str, 73 organization_id: Optional[str] = None, 74 layer_name: str = "", 75 mag: int = 1, 76 offset: Optional[Tuple[int, int, int]] = None, 77 size: Optional[Tuple[int, int, int]] = None, 78 ) -> None: 79 import webknossos as wk 80 81 self._dataset_name_or_url = dataset_name_or_url 82 self._organization_id = organization_id 83 self._layer_name = layer_name 84 self._mag = int(mag) 85 86 self._layer = _open_layer(dataset_name_or_url, organization_id, layer_name, mag) 87 num_channels = int(self._layer.layer.num_channels) 88 if num_channels != 1: 89 raise ValueError( 90 f"WebKnossosSource supports single-channel layers only, got {num_channels} channels." 91 ) 92 # MagView.bounding_box is the data extent in Mag(1) coordinates (x, y, z) -- not 93 # info.bounding_box, which is the channel-prefixed, shard-padded storage extent. 94 bbox = self._layer.bounding_box 95 topleft = bbox.topleft if offset is None else offset 96 size_xyz = bbox.size if size is None else size 97 self._offset = (int(topleft[0]), int(topleft[1]), int(topleft[2])) # Mag(1) XYZ 98 self._size = (int(size_xyz[0]), int(size_xyz[1]), int(size_xyz[2])) # Mag(1) XYZ 99 # The actually-readable extent at this magnification (Mag(1) size aligned + divided by mag). 100 ml = wk.BoundingBox(topleft=self._offset, size=self._size).in_mag(wk.Mag(self._mag)) 101 self._size_ml = (int(ml.size[0]), int(ml.size[1]), int(ml.size[2])) # mag-level XYZ 102 info = self._layer.info 103 chunk = info.chunk_shape # mag-level XYZ 104 self._chunks = (int(chunk[2]), int(chunk[1]), int(chunk[0])) # ZYX 105 self._dtype = np.dtype(info.voxel_type) 106 107 @property 108 def layer(self) -> Any: 109 """The wrapped WebKnossos mag view.""" 110 return self._layer 111 112 @property 113 def shape(self) -> Tuple[int, ...]: 114 """The ZYX shape of the view at the opened magnification.""" 115 return (self._size_ml[2], self._size_ml[1], self._size_ml[0]) 116 117 @property 118 def dtype(self) -> np.dtype: 119 """The numpy dtype of the layer.""" 120 return self._dtype 121 122 @property 123 def chunks(self) -> Optional[Tuple[int, ...]]: 124 """The ZYX chunk shape of the layer.""" 125 return self._chunks 126 127 @property 128 def writable(self) -> bool: 129 """WebKnossos sources are read-only.""" 130 return False 131 132 def _getitem(self, roi: Tuple[slice, ...]) -> np.ndarray: 133 import webknossos as wk 134 135 z0, z1 = _start_stop(roi[0], self.shape[0]) # mag-level ZYX 136 y0, y1 = _start_stop(roi[1], self.shape[1]) 137 x0, x1 = _start_stop(roi[2], self.shape[2]) 138 139 # Local indices are at the opened magnification; MagView.read expects a Mag(1) absolute 140 # bounding box, so scale by mag (topleft = offset + local*mag, size = extent*mag). 141 mag, (ox, oy, oz) = self._mag, self._offset 142 wk_bbox = wk.BoundingBox( 143 topleft=(ox + x0 * mag, oy + y0 * mag, oz + z0 * mag), # Mag(1) XYZ 144 size=((x1 - x0) * mag, (y1 - y0) * mag, (z1 - z0) * mag), # Mag(1) XYZ 145 ) 146 data = self._layer.read(absolute_bounding_box=wk_bbox) 147 data = data[0] # drop the single channel -> (x, y, z) at the opened magnification 148 expected = (x1 - x0, y1 - y0, z1 - z0) 149 if data.shape != expected: 150 raise RuntimeError( 151 f"WebKnossos read returned XYZ shape {data.shape}, expected {expected} (mag={mag}); " 152 "this indicates a coordinate-system mismatch." 153 ) 154 return np.transpose(data, (2, 1, 0)) # -> (z, y, x) 155 156 def _setitem(self, roi: Tuple[slice, ...], value: np.ndarray) -> None: 157 raise TypeError("WebKnossosSource is read-only.") 158 159 def to_spec(self) -> SourceSpec: 160 """Return a ``kind="webknossos"`` spec recording the dataset, layer, mag and ROI.""" 161 params: Dict[str, Any] = { 162 "dataset_name_or_url": self._dataset_name_or_url, 163 "organization_id": self._organization_id, 164 "layer_name": self._layer_name, 165 "mag": self._mag, 166 "offset": list(self._offset), 167 "size": list(self._size), 168 } 169 return SourceSpec(kind="webknossos", params=params) 170 171 @staticmethod 172 def reopen(spec: SourceSpec) -> "WebKnossosSource": 173 """Reopen a WebKnossos source from its spec.""" 174 params = dict(spec.params) 175 offset = params.pop("offset", None) 176 size = params.pop("size", None) 177 return WebKnossosSource( 178 dataset_name_or_url=params["dataset_name_or_url"], 179 organization_id=params.get("organization_id"), 180 layer_name=params["layer_name"], 181 mag=params.get("mag", 1), 182 offset=None if offset is None else tuple(offset), 183 size=None if size is None else tuple(size), 184 )
A ZYX-ordered, read-only Source view of a WebKnossos layer.
WebKnossos stores data in (x, y, z) order; this source exposes a 3D (z, y, x) numpy-order
view (single channel only), transposing on read. offset and size are given in Mag(1)
(full-resolution) XYZ coordinates (the convention of WebKnossos bounding boxes), defaulting to
the layer's bounding box. The presented shape is at the opened magnification, i.e. the
Mag(1) size divided by mag; local indices are at that magnification and are scaled by mag
to address the Mag(1) coordinates that MagView.read expects.
Not thread-safe: the remote layer handle is not safe to share across threads, so do not run the
local backend with num_workers > 1 over this source. For parallelism use the
subprocess/slurm backends, where each worker reopens the source from its spec.
Args:
dataset_name_or_url: The WebKnossos dataset name or URL (or an annotation URL).
organization_id: The organization id (required for opening by dataset name).
layer_name: The name of the layer to open.
mag: The magnification (resolution) level.
offset: Optional Mag(1) XYZ origin of the view; defaults to the layer bbox topleft.
size: Optional Mag(1) XYZ size of the view; defaults to the layer bbox size.
70 def __init__( 71 self, 72 dataset_name_or_url: str, 73 organization_id: Optional[str] = None, 74 layer_name: str = "", 75 mag: int = 1, 76 offset: Optional[Tuple[int, int, int]] = None, 77 size: Optional[Tuple[int, int, int]] = None, 78 ) -> None: 79 import webknossos as wk 80 81 self._dataset_name_or_url = dataset_name_or_url 82 self._organization_id = organization_id 83 self._layer_name = layer_name 84 self._mag = int(mag) 85 86 self._layer = _open_layer(dataset_name_or_url, organization_id, layer_name, mag) 87 num_channels = int(self._layer.layer.num_channels) 88 if num_channels != 1: 89 raise ValueError( 90 f"WebKnossosSource supports single-channel layers only, got {num_channels} channels." 91 ) 92 # MagView.bounding_box is the data extent in Mag(1) coordinates (x, y, z) -- not 93 # info.bounding_box, which is the channel-prefixed, shard-padded storage extent. 94 bbox = self._layer.bounding_box 95 topleft = bbox.topleft if offset is None else offset 96 size_xyz = bbox.size if size is None else size 97 self._offset = (int(topleft[0]), int(topleft[1]), int(topleft[2])) # Mag(1) XYZ 98 self._size = (int(size_xyz[0]), int(size_xyz[1]), int(size_xyz[2])) # Mag(1) XYZ 99 # The actually-readable extent at this magnification (Mag(1) size aligned + divided by mag). 100 ml = wk.BoundingBox(topleft=self._offset, size=self._size).in_mag(wk.Mag(self._mag)) 101 self._size_ml = (int(ml.size[0]), int(ml.size[1]), int(ml.size[2])) # mag-level XYZ 102 info = self._layer.info 103 chunk = info.chunk_shape # mag-level XYZ 104 self._chunks = (int(chunk[2]), int(chunk[1]), int(chunk[0])) # ZYX 105 self._dtype = np.dtype(info.voxel_type)
107 @property 108 def layer(self) -> Any: 109 """The wrapped WebKnossos mag view.""" 110 return self._layer
The wrapped WebKnossos mag view.
112 @property 113 def shape(self) -> Tuple[int, ...]: 114 """The ZYX shape of the view at the opened magnification.""" 115 return (self._size_ml[2], self._size_ml[1], self._size_ml[0])
The ZYX shape of the view at the opened magnification.
117 @property 118 def dtype(self) -> np.dtype: 119 """The numpy dtype of the layer.""" 120 return self._dtype
The numpy dtype of the layer.
122 @property 123 def chunks(self) -> Optional[Tuple[int, ...]]: 124 """The ZYX chunk shape of the layer.""" 125 return self._chunks
The ZYX chunk shape of the layer.
127 @property 128 def writable(self) -> bool: 129 """WebKnossos sources are read-only.""" 130 return False
WebKnossos sources are read-only.
159 def to_spec(self) -> SourceSpec: 160 """Return a ``kind="webknossos"`` spec recording the dataset, layer, mag and ROI.""" 161 params: Dict[str, Any] = { 162 "dataset_name_or_url": self._dataset_name_or_url, 163 "organization_id": self._organization_id, 164 "layer_name": self._layer_name, 165 "mag": self._mag, 166 "offset": list(self._offset), 167 "size": list(self._size), 168 } 169 return SourceSpec(kind="webknossos", params=params)
Return a kind="webknossos" spec recording the dataset, layer, mag and ROI.
171 @staticmethod 172 def reopen(spec: SourceSpec) -> "WebKnossosSource": 173 """Reopen a WebKnossos source from its spec.""" 174 params = dict(spec.params) 175 offset = params.pop("offset", None) 176 size = params.pop("size", None) 177 return WebKnossosSource( 178 dataset_name_or_url=params["dataset_name_or_url"], 179 organization_id=params.get("organization_id"), 180 layer_name=params["layer_name"], 181 mag=params.get("mag", 1), 182 offset=None if offset is None else tuple(offset), 183 size=None if size is None else tuple(size), 184 )
Reopen a WebKnossos source from its spec.
187def open_webknossos( 188 dataset_name_or_url: str, 189 organization_id: Optional[str] = None, 190 layer_name: str = "", 191 mag: int = 1, 192 offset: Optional[Tuple[int, int, int]] = None, 193 size: Optional[Tuple[int, int, int]] = None, 194) -> WebKnossosSource: 195 """Open a (remote) WebKnossos layer as a read-only ZYX :class:`Source`. 196 197 Args: 198 dataset_name_or_url: The WebKnossos dataset name or URL (or an annotation URL). 199 organization_id: The organization id (required when opening by dataset name). 200 layer_name: The name of the layer to open. 201 mag: The magnification (resolution) level. 202 offset: Optional absolute XYZ origin of the view; defaults to the layer bbox ``topleft``. 203 size: Optional XYZ size of the view; defaults to the layer bbox ``size``. 204 205 Returns: 206 A :class:`WebKnossosSource`. 207 """ 208 return WebKnossosSource( 209 dataset_name_or_url=dataset_name_or_url, 210 organization_id=organization_id, 211 layer_name=layer_name, 212 mag=mag, 213 offset=offset, 214 size=size, 215 )
Open a (remote) WebKnossos layer as a read-only ZYX Source.
Args:
dataset_name_or_url: The WebKnossos dataset name or URL (or an annotation URL).
organization_id: The organization id (required when opening by dataset name).
layer_name: The name of the layer to open.
mag: The magnification (resolution) level.
offset: Optional absolute XYZ origin of the view; defaults to the layer bbox topleft.
size: Optional XYZ size of the view; defaults to the layer bbox size.
Returns:
A WebKnossosSource.