bioimage_py.util

Shared helpers: block-to-roi conversion, blocking construction and filter halos.

View Source

  1"""Shared helpers: block-to-roi conversion, blocking construction and filter halos."""
  2from __future__ import annotations
  3
  4import itertools
  5import numbers
  6import warnings
  7from math import ceil
  8from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
  9
 10import bioimage_cpp as bic
 11import numpy as np
 12from bioimage_cpp.utils import Block, BlockWithHalo, Blocking
 13
 14from .sources.base import Source
 15
 16if TYPE_CHECKING:
 17    from .sources import SourceLike
 18
 19# A per-block descriptor handed to compute functions: a plain ``Block`` (no halo) or a
 20# ``BlockWithHalo`` (halo operations).
 21BlockDescriptor = Union[Block, BlockWithHalo]
 22
 23# Signature of a per-block compute function: ``function(block, inputs, outputs, mask)``.
 24ComputeFn = Callable[
 25    [BlockDescriptor, Sequence[Source], Sequence[Source], Optional[Source]], Any
 26]
 27
 28# Gated per-block subsampling for the dict relabeling kernel. ``bioimage_cpp.utils.take_dict``
 29# rebuilds a hash map from the *full* mapping on every call (O(dict size) per block, regardless of
 30# how few ids the block holds), so for a large dict we first restrict it to the ids actually present
 31# in the block. Benchmarked crossovers: the full rebuild is <10 ms below ~1e5 entries (not worth
 32# subsampling), and subsampling only pays off while the block's distinct-id count is well below the
 33# dict size -- for a nearly-as-diverse-as-the-dict block, building the per-block dict costs more than
 34# the rebuild it saves.
 35_SUBSAMPLE_MIN_DICT = 100_000
 36_SUBSAMPLE_MAX_DIVERSITY = 8
 37
 38
 39def take_mapping(mapping: Dict[int, int], seg: np.ndarray) -> np.ndarray:
 40    """Apply a ``{old_id: new_id}`` dict to an array, subsampling the dict when that is cheaper.
 41
 42    ``bioimage_cpp.utils.take_dict`` rebuilds a hash map from the whole ``mapping`` on every call, so
 43    for a large dict this first restricts it to the ids present in ``seg`` -- but only while they are
 44    far fewer than the dict (otherwise the per-block dict costs more than the rebuild it saves). This
 45    is the shared kernel behind the canonical :func:`bioimage_py.segmentation.relabel` and the other
 46    dict-based per-block relabel writers.
 47
 48    Args:
 49        mapping: The relabeling to apply; every id present in ``seg`` must be a key.
 50        seg: The array of ids to map.
 51
 52    Returns:
 53        The mapped array (same shape as ``seg``).
 54    """
 55    if len(mapping) > _SUBSAMPLE_MIN_DICT:
 56        present = np.unique(seg)
 57        if len(present) * _SUBSAMPLE_MAX_DIVERSITY < len(mapping):
 58            mapping = {int(x): mapping[int(x)] for x in present.tolist()}
 59    return bic.utils.take_dict(mapping, seg)
 60
 61
 62def to_roi(block: BlockDescriptor) -> Tuple[slice, ...]:
 63    """Convert a ``bioimage_cpp.utils`` ``Block`` into a tuple of slices.
 64
 65    Args:
 66        block: A ``Block`` (carrying ``begin``/``end`` coordinate lists). For halo
 67            operations pass one of ``block.outer_block`` / ``block.inner_block`` /
 68            ``block.inner_block_local``.
 69
 70    Returns:
 71        A tuple of slices that indexes a source or array.
 72    """
 73    return tuple(slice(int(b), int(e)) for b, e in zip(block.begin, block.end))
 74
 75
 76def full_roi(ndim: int) -> Tuple[slice, ...]:
 77    """Return a slicing tuple that selects an entire ``ndim``-dimensional array."""
 78    return tuple(slice(None) for _ in range(ndim))
 79
 80
 81def is_direct(job_type: str, num_workers: int, block_shape: Optional[Tuple[int, ...]]) -> bool:
 82    """Return whether a call qualifies for the direct (whole-array, non-blocked) fast path."""
 83    return job_type == "local" and num_workers == 1 and block_shape is None
 84
 85
 86def check_direct(job_type: str, num_workers: int, block_shape: Optional[Tuple[int, ...]],
 87                 mask: "Optional[SourceLike]", block_ids: Optional[Sequence[int]]) -> bool:
 88    """Like :func:`is_direct`, but reject mask/block_ids the direct reduction path cannot honor."""
 89    if is_direct(job_type, num_workers, block_shape):
 90        if mask is not None or block_ids is not None:
 91            raise ValueError("Direct computation does not support 'mask' or 'block_ids'.")
 92        return True
 93    return False
 94
 95
 96def same_array(a: Source, b: Source) -> bool:
 97    """Return whether two sources wrap the same underlying array object."""
 98    return getattr(a, "array", None) is getattr(b, "array", object())
 99
100
101def normalize_halo(halo: Union[int, Sequence[int]], ndim: int) -> List[int]:
102    """Broadcast a halo to a per-axis list of length ``ndim``."""
103    if isinstance(halo, numbers.Integral):
104        return [int(halo)] * ndim
105    halo = [int(h) for h in halo]
106    if len(halo) != ndim:
107        raise ValueError(f"Halo {halo} does not match ndim {ndim}.")
108    return halo
109
110
111def sigma_to_halo(sigma: Union[float, Sequence[float]], order: int) -> Union[int, List[int]]:
112    """Compute the halo for applying an image filter block-wise.
113
114    Mirrors elf's implementation, based on VIGRA's ``multi_blockwise.hxx``.
115
116    Args:
117        sigma: The sigma value(s) of the filter.
118        order: The derivative order of the filter (0 for smoothing).
119
120    Returns:
121        The halo, as an int for scalar sigma or a per-axis list for sequence sigma.
122    """
123    multiplier = 2
124    if isinstance(sigma, numbers.Number):
125        return multiplier * int(ceil(3.0 * sigma + 0.5 * order + 0.5))
126    return [multiplier * int(ceil(3.0 * sig + 0.5 * order + 0.5)) for sig in sigma]
127
128
129def downscale_shape(shape: Sequence[int], scale_factor: Union[int, Sequence[int]],
130                    ceil_mode: bool = True) -> Tuple[int, ...]:
131    """Compute the shape resulting from downscaling by an integer factor.
132
133    Mirrors elf's ``downscale_shape``.
134
135    Args:
136        shape: The input array shape.
137        scale_factor: The downscaling factor: a single int (isotropic) or a per-axis sequence.
138        ceil_mode: Whether to round the downscaled size up (so no input voxel is dropped) or
139            down (strict integer division).
140
141    Returns:
142        The downscaled shape.
143
144    Raises:
145        ValueError: If a per-axis ``scale_factor`` does not match the dimensionality of ``shape``.
146    """
147    if isinstance(scale_factor, numbers.Integral):
148        factors = [int(scale_factor)] * len(shape)
149    else:
150        factors = [int(f) for f in scale_factor]
151        if len(factors) != len(shape):
152            raise ValueError(
153                f"scale_factor {scale_factor} does not match the dimensionality {len(shape)}."
154            )
155    if ceil_mode:
156        return tuple(int(s) // f + int((int(s) % f) != 0) for s, f in zip(shape, factors))
157    return tuple(int(s) // f for s, f in zip(shape, factors))
158
159
160def derive_block_shape(source: Source, block_shape: Optional[Sequence[int]]) -> Tuple[int, ...]:
161    """Resolve the block shape, falling back to the source's chunks.
162
163    Args:
164        source: A source exposing ``shape`` and ``chunks``.
165        block_shape: The explicit block shape, or ``None`` to derive it from chunks.
166
167    Returns:
168        The resolved block shape.
169
170    Raises:
171        ValueError: If ``block_shape`` is ``None`` and the source is unchunked.
172    """
173    if block_shape is not None:
174        return tuple(int(b) for b in block_shape)
175    chunks = source.chunks
176    if chunks is not None:
177        return tuple(int(c) for c in chunks)
178    raise ValueError(
179        "block_shape is required for block-wise processing of an unchunked array "
180        "(the source has no chunks to derive it from)."
181    )
182
183
184def get_blocking(shape: Sequence[int], block_shape: Sequence[int],
185                 roi: Optional[Tuple[slice, ...]] = None) -> Blocking:
186    """Build a ``bioimage_cpp.utils.Blocking`` over ``shape`` (or a sub-roi).
187
188    Args:
189        shape: The full array shape.
190        block_shape: The block shape.
191        roi: Optional region of interest to restrict the blocking to.
192
193    Returns:
194        A ``bioimage_cpp.utils.Blocking`` instance.
195    """
196    ndim = len(shape)
197    if roi is None:
198        roi_begin = [0] * ndim
199        roi_end = [int(s) for s in shape]
200    else:
201        roi_begin = [int(sl.start) if sl.start is not None else 0 for sl in roi]
202        roi_end = [int(sl.stop) if sl.stop is not None else int(s) for sl, s in zip(roi, shape)]
203    return bic.utils.Blocking(roi_begin, roi_end, [int(b) for b in block_shape])
204
205
206def check_rerun_args(job_type: str, resume_from: Optional[str],
207                     subset: Optional[Sequence[int]], *, subset_name: str = "block_ids") -> None:
208    """Validate an operation's rerun arguments (``resume_from`` vs a subset).
209
210    Args:
211        job_type: The execution backend (``"local"``/``"subprocess"``/``"slurm"``).
212        resume_from: The preserved temp folder to resume from, or ``None``.
213        subset: The explicit subset (``block_ids``/``item_ids``) to process, or ``None``.
214        subset_name: The subset argument's name, for error messages.
215
216    Raises:
217        ValueError: If both ``resume_from`` and ``subset`` are given, or if ``resume_from`` is
218            used with the local backend (which keeps no temp folder to resume from).
219    """
220    if resume_from is not None:
221        if subset is not None:
222            raise ValueError(f"Pass either 'resume_from' or '{subset_name}', not both.")
223        if job_type == "local":
224            raise ValueError(
225                "resume_from is only valid for distributed backends (subprocess/slurm); the "
226                "local runner keeps no temp folder. Re-run the operation in-process instead "
227                f"(optionally with {subset_name}=err.failed_block_ids for a subset)."
228            )
229
230
231def group_blocks_by_shard(
232    blocking: Blocking,
233    outputs: Sequence[Source],
234    block_ids: Sequence[int],
235) -> Optional[List[List[int]]]:
236    """Group blocks so that every shard is written by a single worker.
237
238    For a sharded zarr v3 array the atomic write unit is the *shard*, not the inner chunk:
239    two blocks writing different inner chunks of the same shard concurrently corrupt it. To
240    keep the block shape flexible (rather than forcing it to a shard multiple) the runners
241    route each group to one worker, which processes its blocks sequentially — so same-shard
242    writes never race. This computes those groups: blocks that share any shard (for any
243    sharded output) are placed in the same group via a union-find over the block ids.
244
245    The shard grid is anchored at coordinate 0 and considered along the trailing (spatial)
246    shard axes only; a leading channel axis on an output is fully written by every block and
247    is not a routing axis (mirrors the chunk handling in
248    :meth:`Runner._validate_write_safety`).
249
250    Args:
251        blocking: The blocking used to map a block id to its (non-halo) write region.
252        outputs: The output sources; only those with a ``shards`` shape drive the grouping.
253        block_ids: The block ids to group.
254
255    Returns:
256        A list of groups (each a sorted list of block ids), ordered by each group's smallest
257        id; ``None`` if no output is sharded (the caller should then use the default
258        one-block-per-unit path); an empty list if ``block_ids`` is empty.
259    """
260    sharded = [(idx, out) for idx, out in enumerate(outputs) if out.shards is not None]
261    if not sharded:
262        return None
263    block_ids = [int(b) for b in block_ids]
264    if not block_ids:
265        return []
266
267    ndim = len(blocking.get_block(block_ids[0]).begin)
268    # Per sharded output, the spatial (trailing) shard extent that defines its cell grid.
269    shard_spatial = [(idx, tuple(int(s) for s in out.shards[-ndim:])) for idx, out in sharded]
270
271    # Union-find over the positions in block_ids (dense 0..n-1); bic's UnionFind, as used in
272    # segmentation/label.py, instead of a hand-rolled one.
273    uf = bic.utils.UnionFind(len(block_ids))
274    cell_owner: Dict[Tuple[int, ...], int] = {}
275    for pos, bid in enumerate(block_ids):
276        block = blocking.get_block(bid)
277        begin = [int(b) for b in block.begin]
278        end = [int(e) for e in block.end]
279        for out_idx, shard in shard_spatial:
280            ranges = [range(begin[d] // shard[d], (end[d] + shard[d] - 1) // shard[d])
281                      for d in range(ndim)]
282            for cell in itertools.product(*ranges):
283                owner = cell_owner.setdefault((out_idx,) + cell, pos)
284                if owner != pos:
285                    uf.merge(pos, owner)
286
287    groups: Dict[int, List[int]] = {}
288    for pos, bid in enumerate(block_ids):
289        groups.setdefault(int(uf.find(pos)), []).append(bid)
290    return sorted((sorted(g) for g in groups.values()), key=lambda g: g[0])
291
292
293def maybe_warn_imbalance(loads: Sequence[int], num_workers: int, n_groups: int,
294                         name: str) -> None:
295    """Warn when shard-exclusive routing leaves workers idle or badly load-imbalanced.
296
297    Args:
298        loads: The per-worker (or per-task) block counts of the assignment.
299        num_workers: The requested number of workers.
300        n_groups: The number of shard groups (schedulable units) the blocks formed.
301        name: A short run name used in the warning message.
302    """
303    if not loads:
304        return
305    if n_groups < int(num_workers):
306        warnings.warn(
307            f"Shard routing for '{name or 'run'}' produced only {n_groups} shard-group(s) for "
308            f"{num_workers} workers, so {int(num_workers) - n_groups} worker(s) will be idle. "
309            "A few shards span the data; use a smaller shard shape or fewer workers to balance. "
310            "Results are still correct.",
311            stacklevel=2,
312        )
313        return
314    mx, mn = max(loads), min(loads)
315    mean = sum(loads) / len(loads)
316    if mx > mn and mx > 1.5 * mean:
317        warnings.warn(
318            f"Uneven worker load for '{name or 'run'}': block counts per worker range {mn}..{mx} "
319            f"(mean {mean:.1f}). Some shards span disproportionately many blocks; results are "
320            "still correct but parallelism is reduced.",
321            stacklevel=2,
322        )

BlockDescriptor = typing.Union[bioimage_cpp._core.Block, bioimage_cpp._core.BlockWithHalo]

ComputeFn = typing.Callable[[typing.Union[bioimage_cpp._core.Block, bioimage_cpp._core.BlockWithHalo], typing.Sequence[bioimage_py.sources.Source], typing.Sequence[bioimage_py.sources.Source], typing.Optional[bioimage_py.sources.Source]], typing.Any]

def take_mapping(mapping: Dict[int, int], seg: numpy.ndarray) -> numpy.ndarray: View Source

40def take_mapping(mapping: Dict[int, int], seg: np.ndarray) -> np.ndarray:
41    """Apply a ``{old_id: new_id}`` dict to an array, subsampling the dict when that is cheaper.
42
43    ``bioimage_cpp.utils.take_dict`` rebuilds a hash map from the whole ``mapping`` on every call, so
44    for a large dict this first restricts it to the ids present in ``seg`` -- but only while they are
45    far fewer than the dict (otherwise the per-block dict costs more than the rebuild it saves). This
46    is the shared kernel behind the canonical :func:`bioimage_py.segmentation.relabel` and the other
47    dict-based per-block relabel writers.
48
49    Args:
50        mapping: The relabeling to apply; every id present in ``seg`` must be a key.
51        seg: The array of ids to map.
52
53    Returns:
54        The mapped array (same shape as ``seg``).
55    """
56    if len(mapping) > _SUBSAMPLE_MIN_DICT:
57        present = np.unique(seg)
58        if len(present) * _SUBSAMPLE_MAX_DIVERSITY < len(mapping):
59            mapping = {int(x): mapping[int(x)] for x in present.tolist()}
60    return bic.utils.take_dict(mapping, seg)

Apply a {old_id: new_id} dict to an array, subsampling the dict when that is cheaper.

bioimage_cpp.utils.take_dict rebuilds a hash map from the whole mapping on every call, so for a large dict this first restricts it to the ids present in seg -- but only while they are far fewer than the dict (otherwise the per-block dict costs more than the rebuild it saves). This is the shared kernel behind the canonical bioimage_py.segmentation.relabel and the other dict-based per-block relabel writers.

Args: mapping: The relabeling to apply; every id present in seg must be a key. seg: The array of ids to map.

Returns: The mapped array (same shape as seg).

def to_roi( block: Union[bioimage_cpp._core.Block, bioimage_cpp._core.BlockWithHalo]) -> Tuple[slice, ...]: View Source

63def to_roi(block: BlockDescriptor) -> Tuple[slice, ...]:
64    """Convert a ``bioimage_cpp.utils`` ``Block`` into a tuple of slices.
65
66    Args:
67        block: A ``Block`` (carrying ``begin``/``end`` coordinate lists). For halo
68            operations pass one of ``block.outer_block`` / ``block.inner_block`` /
69            ``block.inner_block_local``.
70
71    Returns:
72        A tuple of slices that indexes a source or array.
73    """
74    return tuple(slice(int(b), int(e)) for b, e in zip(block.begin, block.end))

Convert a bioimage_cpp.utils Block into a tuple of slices.

Args: block: A Block (carrying begin/end coordinate lists). For halo operations pass one of block.outer_block / block.inner_block / block.inner_block_local.

Returns: A tuple of slices that indexes a source or array.

def full_roi(ndim: int) -> Tuple[slice, ...]: View Source

77def full_roi(ndim: int) -> Tuple[slice, ...]:
78    """Return a slicing tuple that selects an entire ``ndim``-dimensional array."""
79    return tuple(slice(None) for _ in range(ndim))

Return a slicing tuple that selects an entire ndim-dimensional array.

def is_direct( job_type: str, num_workers: int, block_shape: Optional[Tuple[int, ...]]) -> bool: View Source

82def is_direct(job_type: str, num_workers: int, block_shape: Optional[Tuple[int, ...]]) -> bool:
83    """Return whether a call qualifies for the direct (whole-array, non-blocked) fast path."""
84    return job_type == "local" and num_workers == 1 and block_shape is None

Return whether a call qualifies for the direct (whole-array, non-blocked) fast path.

def check_direct( job_type: str, num_workers: int, block_shape: Optional[Tuple[int, ...]], mask: "'Optional[SourceLike]'", block_ids: Optional[Sequence[int]]) -> bool: View Source

87def check_direct(job_type: str, num_workers: int, block_shape: Optional[Tuple[int, ...]],
88                 mask: "Optional[SourceLike]", block_ids: Optional[Sequence[int]]) -> bool:
89    """Like :func:`is_direct`, but reject mask/block_ids the direct reduction path cannot honor."""
90    if is_direct(job_type, num_workers, block_shape):
91        if mask is not None or block_ids is not None:
92            raise ValueError("Direct computation does not support 'mask' or 'block_ids'.")
93        return True
94    return False

Like is_direct(), but reject mask/block_ids the direct reduction path cannot honor.

def same_array( a: bioimage_py.sources.Source, b: bioimage_py.sources.Source) -> bool: View Source

97def same_array(a: Source, b: Source) -> bool:
98    """Return whether two sources wrap the same underlying array object."""
99    return getattr(a, "array", None) is getattr(b, "array", object())

Return whether two sources wrap the same underlying array object.

def normalize_halo(halo: Union[int, Sequence[int]], ndim: int) -> List[int]: View Source

102def normalize_halo(halo: Union[int, Sequence[int]], ndim: int) -> List[int]:
103    """Broadcast a halo to a per-axis list of length ``ndim``."""
104    if isinstance(halo, numbers.Integral):
105        return [int(halo)] * ndim
106    halo = [int(h) for h in halo]
107    if len(halo) != ndim:
108        raise ValueError(f"Halo {halo} does not match ndim {ndim}.")
109    return halo

Broadcast a halo to a per-axis list of length ndim.

def sigma_to_halo( sigma: Union[float, Sequence[float]], order: int) -> Union[int, List[int]]: View Source

112def sigma_to_halo(sigma: Union[float, Sequence[float]], order: int) -> Union[int, List[int]]:
113    """Compute the halo for applying an image filter block-wise.
114
115    Mirrors elf's implementation, based on VIGRA's ``multi_blockwise.hxx``.
116
117    Args:
118        sigma: The sigma value(s) of the filter.
119        order: The derivative order of the filter (0 for smoothing).
120
121    Returns:
122        The halo, as an int for scalar sigma or a per-axis list for sequence sigma.
123    """
124    multiplier = 2
125    if isinstance(sigma, numbers.Number):
126        return multiplier * int(ceil(3.0 * sigma + 0.5 * order + 0.5))
127    return [multiplier * int(ceil(3.0 * sig + 0.5 * order + 0.5)) for sig in sigma]

Compute the halo for applying an image filter block-wise.

Mirrors elf's implementation, based on VIGRA's multi_blockwise.hxx.

Args: sigma: The sigma value(s) of the filter. order: The derivative order of the filter (0 for smoothing).

Returns: The halo, as an int for scalar sigma or a per-axis list for sequence sigma.

def downscale_shape( shape: Sequence[int], scale_factor: Union[int, Sequence[int]], ceil_mode: bool = True) -> Tuple[int, ...]: View Source

130def downscale_shape(shape: Sequence[int], scale_factor: Union[int, Sequence[int]],
131                    ceil_mode: bool = True) -> Tuple[int, ...]:
132    """Compute the shape resulting from downscaling by an integer factor.
133
134    Mirrors elf's ``downscale_shape``.
135
136    Args:
137        shape: The input array shape.
138        scale_factor: The downscaling factor: a single int (isotropic) or a per-axis sequence.
139        ceil_mode: Whether to round the downscaled size up (so no input voxel is dropped) or
140            down (strict integer division).
141
142    Returns:
143        The downscaled shape.
144
145    Raises:
146        ValueError: If a per-axis ``scale_factor`` does not match the dimensionality of ``shape``.
147    """
148    if isinstance(scale_factor, numbers.Integral):
149        factors = [int(scale_factor)] * len(shape)
150    else:
151        factors = [int(f) for f in scale_factor]
152        if len(factors) != len(shape):
153            raise ValueError(
154                f"scale_factor {scale_factor} does not match the dimensionality {len(shape)}."
155            )
156    if ceil_mode:
157        return tuple(int(s) // f + int((int(s) % f) != 0) for s, f in zip(shape, factors))
158    return tuple(int(s) // f for s, f in zip(shape, factors))

Compute the shape resulting from downscaling by an integer factor.

Mirrors elf's downscale_shape.

Args: shape: The input array shape. scale_factor: The downscaling factor: a single int (isotropic) or a per-axis sequence. ceil_mode: Whether to round the downscaled size up (so no input voxel is dropped) or down (strict integer division).

Returns: The downscaled shape.

Raises: ValueError: If a per-axis scale_factor does not match the dimensionality of shape.

def derive_block_shape( source: bioimage_py.sources.Source, block_shape: Optional[Sequence[int]]) -> Tuple[int, ...]: View Source

161def derive_block_shape(source: Source, block_shape: Optional[Sequence[int]]) -> Tuple[int, ...]:
162    """Resolve the block shape, falling back to the source's chunks.
163
164    Args:
165        source: A source exposing ``shape`` and ``chunks``.
166        block_shape: The explicit block shape, or ``None`` to derive it from chunks.
167
168    Returns:
169        The resolved block shape.
170
171    Raises:
172        ValueError: If ``block_shape`` is ``None`` and the source is unchunked.
173    """
174    if block_shape is not None:
175        return tuple(int(b) for b in block_shape)
176    chunks = source.chunks
177    if chunks is not None:
178        return tuple(int(c) for c in chunks)
179    raise ValueError(
180        "block_shape is required for block-wise processing of an unchunked array "
181        "(the source has no chunks to derive it from)."
182    )

Resolve the block shape, falling back to the source's chunks.

Args: source: A source exposing shape and chunks. block_shape: The explicit block shape, or None to derive it from chunks.

Returns: The resolved block shape.

Raises: ValueError: If block_shape is None and the source is unchunked.

def get_blocking( shape: Sequence[int], block_shape: Sequence[int], roi: Optional[Tuple[slice, ...]] = None) -> bioimage_cpp._core.Blocking: View Source

185def get_blocking(shape: Sequence[int], block_shape: Sequence[int],
186                 roi: Optional[Tuple[slice, ...]] = None) -> Blocking:
187    """Build a ``bioimage_cpp.utils.Blocking`` over ``shape`` (or a sub-roi).
188
189    Args:
190        shape: The full array shape.
191        block_shape: The block shape.
192        roi: Optional region of interest to restrict the blocking to.
193
194    Returns:
195        A ``bioimage_cpp.utils.Blocking`` instance.
196    """
197    ndim = len(shape)
198    if roi is None:
199        roi_begin = [0] * ndim
200        roi_end = [int(s) for s in shape]
201    else:
202        roi_begin = [int(sl.start) if sl.start is not None else 0 for sl in roi]
203        roi_end = [int(sl.stop) if sl.stop is not None else int(s) for sl, s in zip(roi, shape)]
204    return bic.utils.Blocking(roi_begin, roi_end, [int(b) for b in block_shape])

Build a bioimage_cpp.utils.Blocking over shape (or a sub-roi).

Args: shape: The full array shape. block_shape: The block shape. roi: Optional region of interest to restrict the blocking to.

Returns: A bioimage_cpp.utils.Blocking instance.

def check_rerun_args( job_type: str, resume_from: Optional[str], subset: Optional[Sequence[int]], *, subset_name: str = 'block_ids') -> None: View Source

207def check_rerun_args(job_type: str, resume_from: Optional[str],
208                     subset: Optional[Sequence[int]], *, subset_name: str = "block_ids") -> None:
209    """Validate an operation's rerun arguments (``resume_from`` vs a subset).
210
211    Args:
212        job_type: The execution backend (``"local"``/``"subprocess"``/``"slurm"``).
213        resume_from: The preserved temp folder to resume from, or ``None``.
214        subset: The explicit subset (``block_ids``/``item_ids``) to process, or ``None``.
215        subset_name: The subset argument's name, for error messages.
216
217    Raises:
218        ValueError: If both ``resume_from`` and ``subset`` are given, or if ``resume_from`` is
219            used with the local backend (which keeps no temp folder to resume from).
220    """
221    if resume_from is not None:
222        if subset is not None:
223            raise ValueError(f"Pass either 'resume_from' or '{subset_name}', not both.")
224        if job_type == "local":
225            raise ValueError(
226                "resume_from is only valid for distributed backends (subprocess/slurm); the "
227                "local runner keeps no temp folder. Re-run the operation in-process instead "
228                f"(optionally with {subset_name}=err.failed_block_ids for a subset)."
229            )

Validate an operation's rerun arguments (resume_from vs a subset).

Args: job_type: The execution backend ("local"/"subprocess"/"slurm"). resume_from: The preserved temp folder to resume from, or None. subset: The explicit subset (block_ids/item_ids) to process, or None. subset_name: The subset argument's name, for error messages.

Raises: ValueError: If both resume_from and subset are given, or if resume_from is used with the local backend (which keeps no temp folder to resume from).

def group_blocks_by_shard( blocking: bioimage_cpp._core.Blocking, outputs: Sequence[bioimage_py.sources.Source], block_ids: Sequence[int]) -> Optional[List[List[int]]]: View Source

232def group_blocks_by_shard(
233    blocking: Blocking,
234    outputs: Sequence[Source],
235    block_ids: Sequence[int],
236) -> Optional[List[List[int]]]:
237    """Group blocks so that every shard is written by a single worker.
238
239    For a sharded zarr v3 array the atomic write unit is the *shard*, not the inner chunk:
240    two blocks writing different inner chunks of the same shard concurrently corrupt it. To
241    keep the block shape flexible (rather than forcing it to a shard multiple) the runners
242    route each group to one worker, which processes its blocks sequentially — so same-shard
243    writes never race. This computes those groups: blocks that share any shard (for any
244    sharded output) are placed in the same group via a union-find over the block ids.
245
246    The shard grid is anchored at coordinate 0 and considered along the trailing (spatial)
247    shard axes only; a leading channel axis on an output is fully written by every block and
248    is not a routing axis (mirrors the chunk handling in
249    :meth:`Runner._validate_write_safety`).
250
251    Args:
252        blocking: The blocking used to map a block id to its (non-halo) write region.
253        outputs: The output sources; only those with a ``shards`` shape drive the grouping.
254        block_ids: The block ids to group.
255
256    Returns:
257        A list of groups (each a sorted list of block ids), ordered by each group's smallest
258        id; ``None`` if no output is sharded (the caller should then use the default
259        one-block-per-unit path); an empty list if ``block_ids`` is empty.
260    """
261    sharded = [(idx, out) for idx, out in enumerate(outputs) if out.shards is not None]
262    if not sharded:
263        return None
264    block_ids = [int(b) for b in block_ids]
265    if not block_ids:
266        return []
267
268    ndim = len(blocking.get_block(block_ids[0]).begin)
269    # Per sharded output, the spatial (trailing) shard extent that defines its cell grid.
270    shard_spatial = [(idx, tuple(int(s) for s in out.shards[-ndim:])) for idx, out in sharded]
271
272    # Union-find over the positions in block_ids (dense 0..n-1); bic's UnionFind, as used in
273    # segmentation/label.py, instead of a hand-rolled one.
274    uf = bic.utils.UnionFind(len(block_ids))
275    cell_owner: Dict[Tuple[int, ...], int] = {}
276    for pos, bid in enumerate(block_ids):
277        block = blocking.get_block(bid)
278        begin = [int(b) for b in block.begin]
279        end = [int(e) for e in block.end]
280        for out_idx, shard in shard_spatial:
281            ranges = [range(begin[d] // shard[d], (end[d] + shard[d] - 1) // shard[d])
282                      for d in range(ndim)]
283            for cell in itertools.product(*ranges):
284                owner = cell_owner.setdefault((out_idx,) + cell, pos)
285                if owner != pos:
286                    uf.merge(pos, owner)
287
288    groups: Dict[int, List[int]] = {}
289    for pos, bid in enumerate(block_ids):
290        groups.setdefault(int(uf.find(pos)), []).append(bid)
291    return sorted((sorted(g) for g in groups.values()), key=lambda g: g[0])

Group blocks so that every shard is written by a single worker.

For a sharded zarr v3 array the atomic write unit is the shard, not the inner chunk: two blocks writing different inner chunks of the same shard concurrently corrupt it. To keep the block shape flexible (rather than forcing it to a shard multiple) the runners route each group to one worker, which processes its blocks sequentially — so same-shard writes never race. This computes those groups: blocks that share any shard (for any sharded output) are placed in the same group via a union-find over the block ids.

The shard grid is anchored at coordinate 0 and considered along the trailing (spatial) shard axes only; a leading channel axis on an output is fully written by every block and is not a routing axis (mirrors the chunk handling in Runner._validate_write_safety()).

Args: blocking: The blocking used to map a block id to its (non-halo) write region. outputs: The output sources; only those with a shards shape drive the grouping. block_ids: The block ids to group.

Returns: A list of groups (each a sorted list of block ids), ordered by each group's smallest id; None if no output is sharded (the caller should then use the default one-block-per-unit path); an empty list if block_ids is empty.

def maybe_warn_imbalance(loads: Sequence[int], num_workers: int, n_groups: int, name: str) -> None: View Source

294def maybe_warn_imbalance(loads: Sequence[int], num_workers: int, n_groups: int,
295                         name: str) -> None:
296    """Warn when shard-exclusive routing leaves workers idle or badly load-imbalanced.
297
298    Args:
299        loads: The per-worker (or per-task) block counts of the assignment.
300        num_workers: The requested number of workers.
301        n_groups: The number of shard groups (schedulable units) the blocks formed.
302        name: A short run name used in the warning message.
303    """
304    if not loads:
305        return
306    if n_groups < int(num_workers):
307        warnings.warn(
308            f"Shard routing for '{name or 'run'}' produced only {n_groups} shard-group(s) for "
309            f"{num_workers} workers, so {int(num_workers) - n_groups} worker(s) will be idle. "
310            "A few shards span the data; use a smaller shard shape or fewer workers to balance. "
311            "Results are still correct.",
312            stacklevel=2,
313        )
314        return
315    mx, mn = max(loads), min(loads)
316    mean = sum(loads) / len(loads)
317    if mx > mn and mx > 1.5 * mean:
318        warnings.warn(
319            f"Uneven worker load for '{name or 'run'}': block counts per worker range {mn}..{mx} "
320            f"(mean {mean:.1f}). Some shards span disproportionately many blocks; results are "
321            "still correct but parallelism is reduced.",
322            stacklevel=2,
323        )

Warn when shard-exclusive routing leaves workers idle or badly load-imbalanced.

Args: loads: The per-worker (or per-task) block counts of the assignment. num_workers: The requested number of workers. n_groups: The number of shard groups (schedulable units) the blocks formed. name: A short run name used in the warning message.