bioimage_py.segmentation

Segmentation: connected-component labeling and related operations.

View Source

 1"""Segmentation: connected-component labeling and related operations."""
 2from .label import label
 3from .multicut import (compute_edge_costs, multicut_decomposition, multicut_gaec,
 4                       multicut_kernighan_lin, transform_probabilities_to_costs)
 5from .relabel import relabel, relabel_consecutive
 6from .size_filter import segmentation_filter, size_filter
 7from .stitching import stitch_segmentation, stitch_tiled_segmentation
 8from .watershed import watershed
 9
10__all__ = [
11    "label",
12    "watershed",
13    "relabel",
14    "relabel_consecutive",
15    "segmentation_filter",
16    "size_filter",
17    "stitch_segmentation",
18    "stitch_tiled_segmentation",
19    "compute_edge_costs",
20    "transform_probabilities_to_costs",
21    "multicut_decomposition",
22    "multicut_gaec",
23    "multicut_kernighan_lin",
24]

def label( input: 'SourceLike', output: 'Optional[SourceLike]' = None, *, threshold: Optional[float] = None, connectivity: Optional[int] = None, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None) -> 'SourceLike': View Source

115def label(
116    input: SourceLike,
117    output: Optional[SourceLike] = None,
118    *,
119    threshold: Optional[float] = None,
120    connectivity: Optional[int] = None,
121    block_shape: Optional[Tuple[int, ...]] = None,
122    job_type: str = "local",
123    job_config: Optional[RunnerConfig] = None,
124    num_workers: int = 1,
125    mask: Optional[SourceLike] = None,
126) -> SourceLike:
127    """Label connected components of (optionally thresholded) data, block-wise.
128
129    Unlike the single-pass operations, ``label`` is multi-stage with a global cross-block merge
130    (per-block labeling, then a union-find over touching components across block faces), so it
131    does **not** accept ``block_ids`` or ``resume_from``: a failed run must be re-run whole (it is
132    idempotent given the same ``output``).
133
134    Args:
135        input: The input data (a numpy/zarr/n5 array or a `Source`).
136        output: The ``uint64`` output array to write the labels into. Optional for local
137            execution — a numpy array is allocated and returned if omitted; **required** for
138            distributed execution.
139        threshold: If given, the input is binarized as ``input > threshold``; otherwise the
140            input is treated as a binary foreground mask.
141        connectivity: Neighbour connectivity in ``[1, ndim]`` (``1`` = orthogonal). Defaults
142            to ``1``; values ``> 1`` are only supported for the direct (single-block) path.
143        block_shape: Shape of the processing blocks. Defaults to the input/output chunk shape;
144            required for unchunked data.
145        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
146        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
147        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
148            backends).
149        mask: Optional binary mask; values outside the mask are excluded from the foreground.
150
151    Returns:
152        The output array (the provided ``output``, or a newly allocated numpy array), labeled
153        with consecutive ids (background stays ``0``).
154    """
155    src = as_source(input)
156    ndim = src.ndim
157    conn = 1 if connectivity is None else int(connectivity)
158    if not 1 <= conn <= ndim:
159        raise ValueError(f"connectivity must be in [1, {ndim}], got {conn}.")
160
161    direct = is_direct(job_type, num_workers, block_shape) and mask is None
162    if conn > 1 and not direct:
163        raise NotImplementedError(
164            "Block-wise labeling only supports connectivity=1 (orthogonal). Use the direct "
165            "path (local, single worker, no block_shape, no mask) for higher connectivity."
166        )
167
168    if output is None:
169        if job_type != "local":
170            raise ValueError(
171                f"'output' is required for distributed execution (job_type={job_type!r}); "
172                "pass a file-backed (zarr/n5) output array."
173            )
174        out_array: SourceLike = np.zeros(tuple(src.shape), dtype="uint64")
175    else:
176        out_array = output
177
178    out = as_source(out_array)
179    if out.dtype != np.dtype("uint64"):
180        raise ValueError(f"output must have dtype uint64, got {out.dtype}.")
181
182    if direct:
183        binary = _binarize(src[full_roi(ndim)], threshold)
184        comp = bic.segmentation.label(binary, connectivity=conn).astype("uint64", copy=False)
185        out[full_roi(ndim)] = comp
186        return out_array
187
188    block_shape = _resolve_block_shape(src, out, block_shape)
189    offset_factor = int(np.prod(block_shape))
190    blocking = get_blocking(src.shape, block_shape)
191    n_blocks = int(blocking.number_of_blocks)
192    if (n_blocks * offset_factor) >= int(np.iinfo(np.uint64).max):
193        raise ValueError(
194            "Label id overflow: number_of_blocks * prod(block_shape) exceeds uint64. "
195            "Reduce the block shape or the volume size."
196        )
197
198    runner = get_runner(job_type, job_config)
199
200    # Stage 1: label each block independently with a globally-unique offset.
201    stage1 = _make_stage1(tuple(src.shape), block_shape, conn, threshold, offset_factor)
202    id_results = runner.run(stage1, [input], outputs=[out_array], block_shape=block_shape,
203                            mask=mask, num_workers=num_workers, has_return_val=True,
204                            name="label-blocks")
205    id_arrays = [a for a in id_results if a is not None and len(a)]
206    real_labels = np.unique(np.concatenate(id_arrays)) if id_arrays else np.zeros((0,), dtype="uint64")
207
208    # Stage 2: collect label equivalences across lower block faces.
209    stage2 = _make_stage2(tuple(src.shape), block_shape)
210    pair_results = runner.run(stage2, [out_array], block_shape=block_shape,
211                              num_workers=num_workers, has_return_val=True, name="merge-faces")
212    pairs = [p for p in pair_results if p is not None]
213    assignments = (np.unique(np.concatenate(pairs, axis=0), axis=0)
214                   if pairs else np.zeros((0, 2), dtype="uint64"))
215
216    # Stage 3 (in process): union-find merge, then relabel the labels that exist to consecutive ids.
217    # The stage-1 offset space is sparse (ids run up to ~voxel count), so the union-find is built over
218    # a dense [0..K) compaction of the labels that actually exist -- sized to the component count K,
219    # not the max offset id -- keeping this in-process step O(components) rather than O(voxels). Every
220    # id in `assignments` exists in `real_labels` (stage 2 only reads what stage 1 wrote), so the
221    # compaction covers all pair ids.
222    mapping: Dict[int, int] = {0: 0}
223    if real_labels.size:
224        n_components = int(real_labels.size)
225        dense = {int(lab): idx for idx, lab in enumerate(real_labels.tolist())}
226        uf = bic.utils.UnionFind(n_components)
227        if len(assignments):
228            pu = bic.utils.take_dict(dense, np.ascontiguousarray(assignments[:, 0].astype("uint64")))
229            pv = bic.utils.take_dict(dense, np.ascontiguousarray(assignments[:, 1].astype("uint64")))
230            uf.merge(np.stack([pu, pv], axis=1).astype("uint64"))
231        roots = np.asarray(uf.find(np.arange(n_components, dtype="uint64")))
232        _, new_ids = np.unique(roots, return_inverse=True)  # consecutive component ranks (0-based)
233        for lab, new_id in zip(real_labels.tolist(), new_ids.tolist()):
234            mapping[int(lab)] = int(new_id) + 1  # reserve 0 for background
235
236    # Stage 4: apply the mapping in place through the canonical node-label writer (relabel).
237    out_array = relabel(out_array, mapping, output=out_array, block_shape=block_shape,
238                        job_type=job_type, job_config=job_config, num_workers=num_workers)
239    return out_array

Label connected components of (optionally thresholded) data, block-wise.

Unlike the single-pass operations, label is multi-stage with a global cross-block merge (per-block labeling, then a union-find over touching components across block faces), so it does not accept block_ids or resume_from: a failed run must be re-run whole (it is idempotent given the same output).

Args: input: The input data (a numpy/zarr/n5 array or a Source). output: The uint64 output array to write the labels into. Optional for local execution — a numpy array is allocated and returned if omitted; required for distributed execution. threshold: If given, the input is binarized as input > threshold; otherwise the input is treated as a binary foreground mask. connectivity: Neighbour connectivity in [1, ndim] (1 = orthogonal). Defaults to 1; values > 1 are only supported for the direct (single-block) path. block_shape: Shape of the processing blocks. Defaults to the input/output chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; values outside the mask are excluded from the foreground.

Returns: The output array (the provided output, or a newly allocated numpy array), labeled with consecutive ids (background stays 0).

def watershed( input: 'SourceLike', seeds: 'SourceLike', output: 'Optional[SourceLike]' = None, *, halo: Optional[Sequence[int]] = None, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> 'SourceLike': View Source

 67def watershed(
 68    input: SourceLike,
 69    seeds: SourceLike,
 70    output: Optional[SourceLike] = None,
 71    *,
 72    halo: Optional[Sequence[int]] = None,
 73    block_shape: Optional[Tuple[int, ...]] = None,
 74    job_type: str = "local",
 75    job_config: Optional[RunnerConfig] = None,
 76    num_workers: int = 1,
 77    mask: Optional[SourceLike] = None,
 78    block_ids: Optional[Sequence[int]] = None,
 79    resume_from: Optional[str] = None,
 80) -> SourceLike:
 81    """Compute a seeded watershed over a height map, block-wise.
 82
 83    Each block runs a seeded watershed (``bioimage_cpp.segmentation.watershed``) on a halo-padded
 84    region and writes back the halo-free inner block. The ``seeds`` define the segments and their
 85    ids are preserved verbatim -- there is no cross-block merge, so pass globally consistent seeds
 86    (e.g. a connected-component labeling of a seed mask) for a coherent result.
 87
 88    Being single-stage, this operation supports ``block_ids`` / ``resume_from`` re-runs. The
 89    block-wise output matches a whole-array watershed only where ``halo`` covers the relevant
 90    catchment basins, but is bit-identical across backends for a fixed ``(block_shape, halo)``.
 91
 92    Args:
 93        input: The height map (a numpy/zarr/n5 array or a `Source`). It is cast to ``float32`` for
 94            the watershed if it is not already a float type.
 95        seeds: The pre-computed integer seed markers (``0`` = background), same shape as ``input``.
 96            A non-integer seed map is cast to ``uint32``.
 97        output: The integer output array to write the segmentation into. Optional for local
 98            execution -- a ``uint64`` numpy array is allocated and returned if omitted; **required**
 99            for distributed execution. It must be wide enough to hold the seed ids (``uint64``
100            recommended; a ``uint32`` watershed result writes losslessly into a ``uint64`` output).
101        halo: Per-axis halo enlarging each block; **required** for the block-wise path (there is no
102            principled default for a watershed). Choose it large enough to cover object extents at
103            block boundaries. Ignored by the direct (single-block) path.
104        block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
105            for unchunked data.
106        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
107        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
108        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
109            backends).
110        mask: Optional binary mask; voxels outside the mask are excluded and stay ``0``.
111        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks
112            into the existing ``output``). Mutually exclusive with ``resume_from``.
113        resume_from: Distributed only; the preserved temp folder of a failed run to resume (see
114            ``runner.run``); the missing blocks are written into ``output``. Mutually exclusive
115            with ``block_ids``.
116
117    Returns:
118        The output array (the provided ``output``, or a newly allocated ``uint64`` numpy array).
119    """
120    check_rerun_args(job_type, resume_from, block_ids)
121
122    src = as_source(input)
123    seeds_src = as_source(seeds)
124    ndim = src.ndim
125    if tuple(seeds_src.shape) != tuple(src.shape):
126        raise ValueError(
127            f"seeds shape {seeds_src.shape} does not match input shape {src.shape}."
128        )
129
130    # A subset/resume rerun is inherently block-wise, so it cannot use the direct (whole-array) path.
131    direct = (is_direct(job_type, num_workers, block_shape)
132              and block_ids is None and resume_from is None)
133
134    if output is None:
135        if job_type != "local":
136            raise ValueError(
137                f"'output' is required for distributed execution (job_type={job_type!r}); "
138                "pass a file-backed (zarr/n5) output array."
139            )
140        out_array: SourceLike = np.zeros(tuple(src.shape), dtype="uint64")
141    else:
142        out_array = output
143
144    out = as_source(out_array)
145    if not np.issubdtype(out.dtype, np.integer):
146        raise ValueError(f"output must have an integer dtype, got {out.dtype}.")
147
148    if direct:
149        block_hmap = _as_hmap(src[full_roi(ndim)])
150        block_seeds = _as_seeds(seeds_src[full_roi(ndim)])
151        block_mask = as_source(mask)[full_roi(ndim)].astype(bool) if mask is not None else None
152        out[full_roi(ndim)] = bic.segmentation.watershed(block_hmap, block_seeds, mask=block_mask)
153        return out_array
154
155    if halo is None:
156        raise ValueError(
157            "halo is required for block-wise watershed; choose one large enough to cover object "
158            "extents at block boundaries."
159        )
160
161    runner = get_runner(job_type, job_config)
162    runner.run(_watershed_block, [input, seeds], outputs=[out_array], halo=halo,
163               block_shape=block_shape, mask=mask, num_workers=num_workers,
164               block_ids=block_ids, resume_from=resume_from, name="watershed")
165    return out_array

Compute a seeded watershed over a height map, block-wise.

Each block runs a seeded watershed (bioimage_cpp.segmentation.watershed) on a halo-padded region and writes back the halo-free inner block. The seeds define the segments and their ids are preserved verbatim -- there is no cross-block merge, so pass globally consistent seeds (e.g. a connected-component labeling of a seed mask) for a coherent result.

Being single-stage, this operation supports block_ids / resume_from re-runs. The block-wise output matches a whole-array watershed only where halo covers the relevant catchment basins, but is bit-identical across backends for a fixed (block_shape, halo).

Args: input: The height map (a numpy/zarr/n5 array or a Source). It is cast to float32 for the watershed if it is not already a float type. seeds: The pre-computed integer seed markers (0 = background), same shape as input. A non-integer seed map is cast to uint32. output: The integer output array to write the segmentation into. Optional for local execution -- a uint64 numpy array is allocated and returned if omitted; required for distributed execution. It must be wide enough to hold the seed ids (uint64 recommended; a uint32 watershed result writes losslessly into a uint64 output). halo: Per-axis halo enlarging each block; required for the block-wise path (there is no principled default for a watershed). Choose it large enough to cover object extents at block boundaries. Ignored by the direct (single-block) path. block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; voxels outside the mask are excluded and stay 0. block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks into the existing output). Mutually exclusive with resume_from. resume_from: Distributed only; the preserved temp folder of a failed run to resume (see runner.run); the missing blocks are written into output. Mutually exclusive with block_ids.

Returns: The output array (the provided output, or a newly allocated uint64 numpy array).

def relabel( input: 'SourceLike', labeling: 'Union[SourceLike, Mapping[int, int]]', output: 'Optional[SourceLike]' = None, *, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> 'SourceLike': View Source

149def relabel(
150    input: SourceLike,
151    labeling: Union[SourceLike, Mapping[int, int]],
152    output: Optional[SourceLike] = None,
153    *,
154    block_shape: Optional[Tuple[int, ...]] = None,
155    job_type: str = "local",
156    job_config: Optional[RunnerConfig] = None,
157    num_workers: int = 1,
158    mask: Optional[SourceLike] = None,
159    block_ids: Optional[Sequence[int]] = None,
160    resume_from: Optional[str] = None,
161) -> SourceLike:
162    """Apply a labeling (relabeling map) to a segmentation, block-wise.
163
164    Each block of ``input`` is read, its ids are mapped through ``labeling``, and the result is
165    written to ``output`` (or back to ``input`` when ``output`` is omitted). This is a single-stage,
166    disjoint per-block point operation, so it may be applied in place and supports ``block_ids`` /
167    ``resume_from`` re-runs.
168
169    Args:
170        input: The input segmentation (a numpy/zarr/n5 array or a `Source`); must be integer-typed.
171        labeling: The relabeling to apply. Either a ``dict`` ``{old_id: new_id}`` (applied with
172            ``bioimage_cpp.utils.take_dict``; every id present in ``input`` must be a key) or a dense
173            1D array/source where ``labeling[old_id]`` is the new id (applied with ``numpy.take``; it
174            must be long enough to index every id present in ``input``). A dict must map every id of
175            the (masked) input; a dense array must cover the id range ``0 .. max_id``.
176        output: The output array to write the relabeled segmentation into. Optional -- when omitted
177            the relabeling is applied **in place** to ``input`` (which must then be writable, and
178            file-backed for distributed execution). As an exception, a plain in-memory numpy input
179            (which is local-only) is never mutated: a fresh array is allocated and returned. When
180            given, ``output`` must match the input shape; ids are cast to its dtype on write.
181        block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for
182            unchunked data.
183        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
184        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
185        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
186            backends).
187        mask: Optional binary mask; only voxels within the mask are relabeled (out-of-mask output
188            voxels are left unchanged).
189        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks
190            into the existing ``output``). Mutually exclusive with ``resume_from``.
191        resume_from: Distributed only; the preserved temp folder of a failed run to resume (see
192            ``runner.run``); the missing blocks are relabeled using the original run's labeling.
193            Mutually exclusive with ``block_ids``. (A numpy labeling array persisted by the failed
194            run is preserved with that temp folder; after a successful resume it is best-effort left
195            behind since the labeling is small.)
196
197    Returns:
198        The output array: the provided ``output`` if given, else ``input`` itself when relabeling a
199        file-backed source in place, or a freshly allocated array for an in-memory numpy input.
200    """
201    check_rerun_args(job_type, resume_from, block_ids)
202    src = as_source(input)
203    _require_integer(src, "relabel expects an integer label image")
204    ndim = src.ndim
205
206    # Resolve the output. By default relabel in place (a disjoint per-block point op with no halo,
207    # so this is safe). Exception: a plain in-memory numpy input is local-only (distributed rejects
208    # it), and silently mutating a passed-in array is surprising -- so allocate a fresh copy instead.
209    if output is not None:
210        out_array: SourceLike = output
211    elif job_type == "local" and _is_inmemory_numpy(src):
212        out_array = np.array(src.array)
213    else:
214        out_array = input
215    out = as_source(out_array)
216
217    runner = get_runner(job_type, job_config)
218
219    # Resume short-circuits inside run() to the preserved run's payload (its own sources and
220    # labeling), so the function/inputs passed here are placeholders that run() does not use.
221    if resume_from is not None:
222        runner.run(_resume_placeholder, [src], outputs=[out], resume_from=resume_from,
223                   name="relabel")
224        return out_array
225
226    direct = is_direct(job_type, num_workers, block_shape) and mask is None and block_ids is None
227
228    # Dict mode: apply the mapping with take_dict.
229    if isinstance(labeling, Mapping):
230        mapping = dict(labeling)
231        if direct:
232            out[full_roi(ndim)] = bic.utils.take_dict(mapping, src[full_roi(ndim)])
233            return out_array
234        runner.run(_make_relabel_block(mapping), [src], outputs=[out], block_shape=block_shape,
235                   mask=mask, num_workers=num_workers, block_ids=block_ids, name="relabel")
236        return out_array
237
238    # Dense 1D labeling array/source: applied with numpy.take.
239    labeling_src = as_source(labeling)
240    if labeling_src.ndim != 1:
241        raise ValueError(f"Dense labeling must be a 1D array; got shape {labeling_src.shape}.")
242    _require_integer(labeling_src, "labeling must be integer-typed")
243
244    if direct:
245        labels = np.asarray(labeling_src[full_roi(1)])
246        out[full_roi(ndim)] = np.take(labels, src[full_roi(ndim)])
247        return out_array
248
249    # Carry the labeling in the (cloudpickled) closure: the array directly for local execution, or
250    # a reopen spec for distributed workers -- persisting an in-memory numpy array to a temp zarr.
251    labeling_array: Optional[np.ndarray] = None
252    spec: Optional[SourceSpec] = None
253    pre_cleanup: Optional[Callable[[str], None]] = None
254    if job_type == "local":
255        labeling_array = np.asarray(labeling_src[full_roi(1)])
256    else:
257        try:
258            spec = labeling_src.to_spec()
259        except ValueError:  # an in-memory numpy array: persist it so worker tasks can reopen it.
260            spec, tmp_dir = _persist_labeling(np.asarray(labeling_src[full_roi(1)]),
261                                              runner.config.tmp_root)
262            pre_cleanup = _make_cleanup(tmp_dir)
263
264    runner.run(_make_take_array_block(labeling_array, spec), [src], outputs=[out],
265               block_shape=block_shape, mask=mask, num_workers=num_workers, block_ids=block_ids,
266               pre_cleanup=pre_cleanup, name="relabel")
267    return out_array

Apply a labeling (relabeling map) to a segmentation, block-wise.

Each block of input is read, its ids are mapped through labeling, and the result is written to output (or back to input when output is omitted). This is a single-stage, disjoint per-block point operation, so it may be applied in place and supports block_ids / resume_from re-runs.

Args: input: The input segmentation (a numpy/zarr/n5 array or a Source); must be integer-typed. labeling: The relabeling to apply. Either a dict {old_id: new_id} (applied with bioimage_cpp.utils.take_dict; every id present in input must be a key) or a dense 1D array/source where labeling[old_id] is the new id (applied with numpy.take; it must be long enough to index every id present in input). A dict must map every id of the (masked) input; a dense array must cover the id range 0 .. max_id. output: The output array to write the relabeled segmentation into. Optional -- when omitted the relabeling is applied in place to input (which must then be writable, and file-backed for distributed execution). As an exception, a plain in-memory numpy input (which is local-only) is never mutated: a fresh array is allocated and returned. When given, output must match the input shape; ids are cast to its dtype on write. block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; only voxels within the mask are relabeled (out-of-mask output voxels are left unchanged). block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks into the existing output). Mutually exclusive with resume_from. resume_from: Distributed only; the preserved temp folder of a failed run to resume (see runner.run); the missing blocks are relabeled using the original run's labeling. Mutually exclusive with block_ids. (A numpy labeling array persisted by the failed run is preserved with that temp folder; after a successful resume it is best-effort left behind since the labeling is small.)

Returns: The output array: the provided output if given, else input itself when relabeling a file-backed source in place, or a freshly allocated array for an in-memory numpy input.

def relabel_consecutive( input: 'SourceLike', output: 'Optional[SourceLike]' = None, *, start_label: int = 0, keep_zeros: bool = True, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None) -> 'Tuple[SourceLike, int, Dict[int, int]]': View Source

270def relabel_consecutive(
271    input: SourceLike,
272    output: Optional[SourceLike] = None,
273    *,
274    start_label: int = 0,
275    keep_zeros: bool = True,
276    block_shape: Optional[Tuple[int, ...]] = None,
277    job_type: str = "local",
278    job_config: Optional[RunnerConfig] = None,
279    num_workers: int = 1,
280    mask: Optional[SourceLike] = None,
281) -> Tuple[SourceLike, int, Dict[int, int]]:
282    """Relabel a segmentation to consecutive ids, block-wise.
283
284    This is multi-stage: a global ``unique`` reduction derives the ``{old_id: new_id}`` mapping, then
285    the block-wise write is delegated to :func:`relabel`. Because of the reduction it does **not**
286    accept ``block_ids`` or ``resume_from``: a failed run is re-run whole (it is idempotent given the
287    same ``output``).
288
289    Args:
290        input: The input label image (a numpy/zarr/n5 array or a `Source`); must be integer-typed.
291        output: The output array to write the relabeled segmentation into. Optional -- when omitted
292            the relabeling is applied **in place** to ``input`` (which must then be writable, and
293            file-backed for distributed execution); a plain in-memory numpy input (local-only) is
294            never mutated -- a fresh array is allocated and returned. When given, it must match the
295            input shape.
296        start_label: The value the smallest unique id is mapped to (subsequent ids follow
297            consecutively).
298        keep_zeros: Whether to always keep ``0`` mapped to ``0`` (background), regardless of
299            ``start_label``.
300        block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
301            for unchunked data.
302        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
303        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
304        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
305            backends).
306        mask: Optional binary mask; values outside the mask are excluded from the computation and
307            their output voxels are left unchanged.
308
309    Returns:
310        A ``(output, max_id, mapping)`` tuple: the relabeled output array (``input`` itself when
311        relabeling a file-backed source in place, or a freshly allocated array for a numpy input),
312        the maximum label id after relabeling, and the ``{old_id: new_id}`` mapping that was applied.
313
314    """
315    src = as_source(input)
316    _require_integer(src, "relabel_consecutive expects an integer label image")
317    ndim = src.ndim
318
319    # Pass 1: the global set of unique values.
320    direct = is_direct(job_type, num_workers, block_shape) and mask is None
321    if direct:
322        uniques = np.unique(src[full_roi(ndim)])
323    else:
324        uniques = unique(input, block_shape=block_shape, job_type=job_type, job_config=job_config,
325                         num_workers=num_workers, mask=mask)
326
327    # In-process: build the old -> new mapping (consecutive ids from start_label).
328    mapping: Dict[int, int] = {int(v): i for i, v in enumerate(uniques.tolist(), start_label)}
329    if keep_zeros and 0 in mapping:
330        mapping[0] = 0
331    max_id = max(mapping.values()) if mapping else 0
332
333    # Pass 2: apply the mapping (in place when output is omitted), reusing relabel's dict path.
334    out = relabel(input, mapping, output, block_shape=block_shape, job_type=job_type,
335                  job_config=job_config, num_workers=num_workers, mask=mask)
336    return out, max_id, mapping

Relabel a segmentation to consecutive ids, block-wise.

This is multi-stage: a global unique reduction derives the {old_id: new_id} mapping, then the block-wise write is delegated to relabel(). Because of the reduction it does not accept block_ids or resume_from: a failed run is re-run whole (it is idempotent given the same output).

Args: input: The input label image (a numpy/zarr/n5 array or a Source); must be integer-typed. output: The output array to write the relabeled segmentation into. Optional -- when omitted the relabeling is applied in place to input (which must then be writable, and file-backed for distributed execution); a plain in-memory numpy input (local-only) is never mutated -- a fresh array is allocated and returned. When given, it must match the input shape. start_label: The value the smallest unique id is mapped to (subsequent ids follow consecutively). keep_zeros: Whether to always keep 0 mapped to 0 (background), regardless of start_label. block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; values outside the mask are excluded from the computation and their output voxels are left unchanged.

Returns: A (output, max_id, mapping) tuple: the relabeled output array (input itself when relabeling a file-backed source in place, or a freshly allocated array for a numpy input), the maximum label id after relabeling, and the {old_id: new_id} mapping that was applied.

def segmentation_filter( input: 'SourceLike', filter_function: Callable[[numpy.ndarray, Optional[numpy.ndarray]], numpy.ndarray], output: 'Optional[SourceLike]' = None, *, relabel: Optional[Callable[[numpy.ndarray, Optional[numpy.ndarray]], numpy.ndarray]] = None, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> 'SourceLike': View Source

 57def segmentation_filter(
 58    input: SourceLike,
 59    filter_function: BlockFn,
 60    output: Optional[SourceLike] = None,
 61    *,
 62    relabel: Optional[BlockFn] = None,
 63    block_shape: Optional[Tuple[int, ...]] = None,
 64    job_type: str = "local",
 65    job_config: Optional[RunnerConfig] = None,
 66    num_workers: int = 1,
 67    mask: Optional[SourceLike] = None,
 68    block_ids: Optional[Sequence[int]] = None,
 69    resume_from: Optional[str] = None,
 70) -> SourceLike:
 71    """Filter a segmentation with a custom per-block criterion, block-wise.
 72
 73    Args:
 74        input: The input segmentation (a numpy/zarr/n5 array or a `Source`).
 75        filter_function: A picklable callable ``filter_function(block_seg, block_mask)`` returning the
 76            filtered block. ``block_mask`` is the block's boolean in-mask array, or ``None`` when no
 77            mask is used; when a mask is used, restrict the criterion to the in-mask voxels.
 78        output: The output array to write into. Optional for local execution -- a numpy array
 79            matching the input shape and dtype is allocated and returned if omitted; **required** for
 80            distributed execution (a writable, file-backed zarr/n5 array).
 81        relabel: Optional picklable callable ``relabel(block_seg, block_mask)`` applied after
 82            ``filter_function`` (e.g. a consecutive relabeling); same masking contract.
 83        block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
 84            for unchunked data.
 85        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
 86        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
 87        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
 88            backends).
 89        mask: Optional binary mask; out-of-mask output voxels are left unchanged.
 90        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
 91            Mutually exclusive with ``resume_from``.
 92        resume_from: Distributed only; the preserved temp folder of a failed run to resume (see
 93            ``runner.run``). Mutually exclusive with ``block_ids``.
 94
 95    Returns:
 96        The output array (the provided ``output``, or a newly allocated numpy array).
 97    """
 98    check_rerun_args(job_type, resume_from, block_ids)
 99    src = as_source(input)
100    ndim = src.ndim
101    direct = (is_direct(job_type, num_workers, block_shape) and mask is None
102              and block_ids is None and resume_from is None)
103
104    if output is None:
105        if job_type != "local":
106            raise ValueError(
107                f"'output' is required for distributed execution (job_type={job_type!r}); "
108                "pass a file-backed (zarr/n5) output array."
109            )
110        out_array: SourceLike = np.zeros(tuple(src.shape), dtype=src.dtype)
111    else:
112        out_array = output
113    out = as_source(out_array)
114    if not direct and same_array(out, src):
115        raise ValueError("Block-wise segmentation_filter needs 'output' to differ from 'input'.")
116
117    if direct:
118        filtered = filter_function(src[full_roi(ndim)], None)
119        if relabel is not None:
120            filtered = relabel(filtered, None)
121        out[full_roi(ndim)] = filtered
122        return out_array
123
124    runner = get_runner(job_type, job_config)
125    runner.run(_make_filter_block(filter_function, relabel), [input], outputs=[out_array],
126               block_shape=block_shape, mask=mask, num_workers=num_workers,
127               block_ids=block_ids, resume_from=resume_from, name="segmentation_filter")
128    return out_array

Filter a segmentation with a custom per-block criterion, block-wise.

Args: input: The input segmentation (a numpy/zarr/n5 array or a Source). filter_function: A picklable callable filter_function(block_seg, block_mask) returning the filtered block. block_mask is the block's boolean in-mask array, or None when no mask is used; when a mask is used, restrict the criterion to the in-mask voxels. output: The output array to write into. Optional for local execution -- a numpy array matching the input shape and dtype is allocated and returned if omitted; required for distributed execution (a writable, file-backed zarr/n5 array). relabel: Optional picklable callable relabel(block_seg, block_mask) applied after filter_function (e.g. a consecutive relabeling); same masking contract. block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; out-of-mask output voxels are left unchanged. block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks). Mutually exclusive with resume_from. resume_from: Distributed only; the preserved temp folder of a failed run to resume (see runner.run). Mutually exclusive with block_ids.

Returns: The output array (the provided output, or a newly allocated numpy array).

def size_filter( input: 'SourceLike', output: 'Optional[SourceLike]' = None, *, min_size: Optional[int] = None, max_size: Optional[int] = None, relabel: bool = True, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None) -> 'SourceLike': View Source

158def size_filter(
159    input: SourceLike,
160    output: Optional[SourceLike] = None,
161    *,
162    min_size: Optional[int] = None,
163    max_size: Optional[int] = None,
164    relabel: bool = True,
165    block_shape: Optional[Tuple[int, ...]] = None,
166    job_type: str = "local",
167    job_config: Optional[RunnerConfig] = None,
168    num_workers: int = 1,
169    mask: Optional[SourceLike] = None,
170) -> SourceLike:
171    """Remove objects smaller than ``min_size`` and/or larger than ``max_size`` from a segmentation.
172
173    Multi-stage (a global ``unique`` count reduction, then a filter pass), so it does **not** accept
174    ``block_ids`` / ``resume_from``. By default it relabels the result consecutively; pass
175    ``relabel=False`` to keep the original ids of the surviving objects.
176
177    Args:
178        input: The input segmentation (a numpy/zarr/n5 array or a `Source`); must be integer-typed.
179        output: The output array to write into. Optional for local execution -- a numpy array
180            matching the input shape and dtype is allocated and returned if omitted; **required** for
181            distributed execution (a writable, file-backed zarr/n5 array).
182        min_size: The minimum object size; smaller objects are removed. At least one of ``min_size`` /
183            ``max_size`` is required.
184        max_size: The maximum object size; larger objects are removed.
185        relabel: Whether to relabel the surviving objects consecutively after filtering.
186        block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
187            for unchunked data. Required when a ``mask`` is given (the size reduction is block-wise).
188        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
189        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
190        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
191            backends).
192        mask: Optional binary mask; out-of-mask output voxels are left unchanged.
193
194    Returns:
195        The output array (the provided ``output``, or a newly allocated numpy array).
196    """
197    if min_size is None and max_size is None:
198        raise ValueError("size_filter requires at least one of 'min_size' or 'max_size'.")
199    src = as_source(input)
200    if not np.issubdtype(np.dtype(src.dtype), np.integer):
201        raise ValueError(f"size_filter expects an integer label image, got dtype {src.dtype}.")
202
203    # Pass 1: unique ids with their sizes.
204    ids, counts = unique(input, return_counts=True, block_shape=block_shape, job_type=job_type,
205                         job_config=job_config, num_workers=num_workers, mask=mask)
206
207    # In-process: ids to discard and the consecutive relabeling of the survivors.
208    discard = np.zeros(ids.shape, dtype=bool)
209    if min_size is not None:
210        discard |= counts < min_size
211    if max_size is not None:
212        discard |= counts > max_size
213    filter_ids = ids[discard]
214
215    relabel_fn: Optional[BlockFn] = None
216    if relabel:
217        # Reserve 0 for background and map the surviving foreground ids to 1..K consecutively, so a
218        # surviving object can never collide with the (possibly newly introduced) background 0.
219        remaining_fg = ids[(~discard) & (ids != 0)]
220        mapping: Dict[int, int] = {int(v): i for i, v in enumerate(remaining_fg.tolist(), start=1)}
221        mapping[0] = 0
222        relabel_fn = _make_size_relabel(mapping)
223
224    return segmentation_filter(input, _make_size_filter(filter_ids), output, relabel=relabel_fn,
225                               block_shape=block_shape, job_type=job_type, job_config=job_config,
226                               num_workers=num_workers, mask=mask)

Remove objects smaller than min_size and/or larger than max_size from a segmentation.

Multi-stage (a global unique count reduction, then a filter pass), so it does not accept block_ids / resume_from. By default it relabels the result consecutively; pass relabel=False to keep the original ids of the surviving objects.

Args: input: The input segmentation (a numpy/zarr/n5 array or a Source); must be integer-typed. output: The output array to write into. Optional for local execution -- a numpy array matching the input shape and dtype is allocated and returned if omitted; required for distributed execution (a writable, file-backed zarr/n5 array). min_size: The minimum object size; smaller objects are removed. At least one of min_size / max_size is required. max_size: The maximum object size; larger objects are removed. relabel: Whether to relabel the surviving objects consecutively after filtering. block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. Required when a mask is given (the size reduction is block-wise). job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; out-of-mask output voxels are left unchanged.

Returns: The output array (the provided output, or a newly allocated numpy array).

def stitch_segmentation( input: 'SourceLike', segmentation_function: Callable, tile_shape: Tuple[int, ...], tile_overlap: Tuple[int, ...], output: 'Optional[SourceLike]' = None, *, beta: float = 0.5, shape: Optional[Tuple[int, ...]] = None, with_background: bool = True, num_workers: int = 1, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, return_before_stitching: bool = False) -> 'Union[SourceLike, Tuple[SourceLike, np.ndarray]]': View Source

399def stitch_segmentation(
400    input: SourceLike,
401    segmentation_function: Callable,
402    tile_shape: Tuple[int, ...],
403    tile_overlap: Tuple[int, ...],
404    output: Optional[SourceLike] = None,
405    *,
406    beta: float = 0.5,
407    shape: Optional[Tuple[int, ...]] = None,
408    with_background: bool = True,
409    num_workers: int = 1,
410    job_type: str = "local",
411    job_config: Optional[RunnerConfig] = None,
412    return_before_stitching: bool = False,
413) -> Union[SourceLike, Tuple[SourceLike, np.ndarray]]:
414    """Run a segmentation function tile-wise and stitch the results based on overlap.
415
416    Each tile is read with a halo (``tile_shape + 2 * tile_overlap``) and segmented independently;
417    the halo region is where the overlap with the neighbouring tile's segmentation of the same
418    pixels is measured. Objects that overlap strongly there are merged via a multicut over the
419    region adjacency graph of the per-tile labeling.
420
421    Args:
422        input: The input data (a numpy/zarr/n5 array or a `Source`). If it has channels they must be
423            the last (trailing) axes, and `shape` must give the spatial shape.
424        segmentation_function: The per-tile segmentation function with signature
425            ``f(tile_input, tile_id) -> labels``. It receives the haloed tile input and the tile id
426            (passed in case the segmentation depends on the tile; ignore it otherwise) and returns a
427            label image of the tile's (haloed) spatial shape. It is cloudpickled for distributed
428            execution, so it must be picklable.
429        tile_shape: The shape of the individual tiles.
430        tile_overlap: The halo added on each side of a tile; the input to the segmentation function
431            has size ``tile_shape + 2 * tile_overlap``, and the overlap is measured in the halo.
432        output: The ``uint64`` output array. Optional for local execution — a numpy array is
433            allocated and returned if omitted; **required** (file-backed) for distributed execution.
434        beta: The boundary bias of the multicut; ``> 0.5`` biases towards over-segmentation,
435            ``< 0.5`` towards under-segmentation. Must be in the exclusive range ``(0, 1)``.
436        shape: The spatial shape of the segmentation. Defaults to the input shape; must be passed if
437            the input has trailing channel axes.
438        with_background: Whether the problem has a background label (hard-coded ``0``) that must not
439            be merged with foreground objects.
440        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
441            backends) for the tile-segmentation and overlap-counting phases.
442        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
443        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). For distributed
444            backends a temporary per-tile store is created under ``job_config.tmp_root``.
445        return_before_stitching: Also return the (relabeled) pre-stitch segmentation, for debugging.
446
447    Returns:
448        The output array with the stitched segmentation, or ``(output, pre_stitch)`` if
449        ``return_before_stitching`` is set (``pre_stitch`` is an in-memory numpy array).
450    """
451    src = as_source(input)
452    in_ndim = src.ndim
453    shape = tuple(int(s) for s in (src.shape if shape is None else shape))
454    ndim = len(shape)
455    tile_shape = tuple(int(t) for t in tile_shape)
456    tile_overlap = tuple(int(t) for t in tile_overlap)
457
458    out_array = _prepare_output(output, shape, job_type)
459    out = as_source(out_array)
460    if out.dtype != np.dtype("uint64"):
461        raise ValueError(f"output must have dtype uint64, got {out.dtype}.")
462
463    blocking = get_blocking(shape, tile_shape)
464    n_blocks = int(blocking.number_of_blocks)
465    max_halo = tuple(ts + 2 * ov for ts, ov in zip(tile_shape, tile_overlap))
466    offset_factor = int(np.prod(max_halo))
467    if n_blocks * offset_factor >= int(np.iinfo(np.uint64).max):
468        raise ValueError("Label id overflow: number_of_blocks * prod(haloed tile shape) exceeds "
469                         "uint64. Reduce the tile shape or the volume size.")
470
471    runner = get_runner(job_type, job_config)
472    input_handle = _capture(src, job_type)
473    store, store_cleanup, store_handle = _make_tile_store(n_blocks, max_halo, job_type, job_config)
474    try:
475        # Stage 1: segment each haloed tile, offset its ids, write the inner block + the store slot.
476        stage1 = _make_segment(shape, tile_shape, tile_overlap, segmentation_function,
477                               with_background, offset_factor, store_handle, input_handle,
478                               ndim, in_ndim)
479        id_results = runner.run(stage1, [], outputs=[out_array], block_shape=tile_shape,
480                                halo=tile_overlap, num_workers=num_workers, has_return_val=True,
481                                name="stitch-segment")
482        id_arrays = [a for a in id_results if a is not None and len(a)]
483        real = (np.unique(np.concatenate(id_arrays)) if id_arrays
484                else np.zeros((0,), dtype="uint64"))
485
486        # Build the dense relabeling (offset ids are sparse; this keeps the RAG node space compact).
487        mapping = {0: 0}
488        for i, lab in enumerate(real.tolist()):
489            mapping[int(lab)] = i + 1
490
491        # Stage 2: apply the dense relabeling to the pre-stitch output in place, through the canonical
492        # node-label writer (relabel). output=out_array keeps it in place (no copy for a numpy output).
493        if real.size:
494            relabel(out_array, mapping, output=out_array, block_shape=tile_shape,
495                    job_type=job_type, job_config=job_config, num_workers=num_workers)
496
497        # Stage 3: count object overlaps in the halo bands, reading haloed tiles from the store.
498        overlap_fn = _make_seg_overlap(shape, tile_shape, tile_overlap, store_handle, ndim)
499        ov_results = runner.run(overlap_fn, [out_array], block_shape=tile_shape,
500                                num_workers=num_workers, has_return_val=True, name="stitch-overlaps")
501    finally:
502        store_cleanup()
503
504    uv, frac = _collect_edges(ov_results)
505    uv, frac = _map_edges(uv, frac, mapping)  # overlap ids are offset ids -> map to dense.
506
507    seg = out[full_roi(ndim)]
508    stitched = _stitch_via_multicut(seg, uv, frac, with_background, beta, n_threads=None)
509
510    # Relabel to consecutive ids (elf semantics): keep 0 as background, or renumber 0 too when there
511    # is no background.
512    if with_background:
513        stitched, _, _ = bic.segmentation.relabel_sequential(stitched.astype("uint64"), offset=1)
514    else:
515        stitched, _, _ = bic.segmentation.relabel_sequential(stitched.astype("uint64") + 1, offset=1)
516
517    pre_stitch = seg.copy() if return_before_stitching else None
518    out[full_roi(ndim)] = stitched.astype(out.dtype, copy=False)
519    if return_before_stitching:
520        return out_array, pre_stitch
521    return out_array

Run a segmentation function tile-wise and stitch the results based on overlap.

Each tile is read with a halo (tile_shape + 2 * tile_overlap) and segmented independently; the halo region is where the overlap with the neighbouring tile's segmentation of the same pixels is measured. Objects that overlap strongly there are merged via a multicut over the region adjacency graph of the per-tile labeling.

Args: input: The input data (a numpy/zarr/n5 array or a Source). If it has channels they must be the last (trailing) axes, and shape must give the spatial shape. segmentation_function: The per-tile segmentation function with signature f(tile_input, tile_id) -> labels. It receives the haloed tile input and the tile id (passed in case the segmentation depends on the tile; ignore it otherwise) and returns a label image of the tile's (haloed) spatial shape. It is cloudpickled for distributed execution, so it must be picklable. tile_shape: The shape of the individual tiles. tile_overlap: The halo added on each side of a tile; the input to the segmentation function has size tile_shape + 2 * tile_overlap, and the overlap is measured in the halo. output: The uint64 output array. Optional for local execution — a numpy array is allocated and returned if omitted; required (file-backed) for distributed execution. beta: The boundary bias of the multicut; > 0.5 biases towards over-segmentation, < 0.5 towards under-segmentation. Must be in the exclusive range (0, 1). shape: The spatial shape of the segmentation. Defaults to the input shape; must be passed if the input has trailing channel axes. with_background: Whether the problem has a background label (hard-coded 0) that must not be merged with foreground objects. num_workers: Number of parallel workers (threads for local, tasks for distributed backends) for the tile-segmentation and overlap-counting phases. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). For distributed backends a temporary per-tile store is created under job_config.tmp_root. return_before_stitching: Also return the (relabeled) pre-stitch segmentation, for debugging.

Returns: The output array with the stitched segmentation, or (output, pre_stitch) if return_before_stitching is set (pre_stitch is an in-memory numpy array).

def stitch_tiled_segmentation( segmentation: 'SourceLike', tile_shape: Tuple[int, ...], output: 'Optional[SourceLike]' = None, *, overlap: int = 1, with_background: bool = True, beta: float = 0.5, num_workers: int = 1, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None) -> 'SourceLike': View Source

340def stitch_tiled_segmentation(
341    segmentation: SourceLike,
342    tile_shape: Tuple[int, ...],
343    output: Optional[SourceLike] = None,
344    *,
345    overlap: int = 1,
346    with_background: bool = True,
347    beta: float = 0.5,
348    num_workers: int = 1,
349    job_type: str = "local",
350    job_config: Optional[RunnerConfig] = None,
351) -> SourceLike:
352    """Stitch a segmentation that is already split into tiles with unique ids per tile.
353
354    The ids in the tiles of the input have to be unique (the segmentations are separate across
355    tiles). Objects that touch across a tile interface are merged based on how strongly they overlap
356    there, via a multicut over the region adjacency graph of the tiled segmentation.
357
358    Args:
359        segmentation: The input tiled segmentation (a numpy/zarr/n5 array or a `Source`); must be
360            integer-typed with ids unique across tiles.
361        tile_shape: The shape of the tiles (the block shape of the tiling).
362        output: The ``uint64`` output array. Optional for local execution — a numpy array is
363            allocated and returned if omitted; **required** (file-backed) for distributed execution.
364        overlap: The thickness (in pixels) of the tile-interface slab used to measure object overlap.
365        with_background: Whether the problem has a background label (hard-coded ``0``) that must not
366            be merged with foreground objects.
367        beta: The boundary bias of the multicut; ``> 0.5`` biases towards over-segmentation,
368            ``< 0.5`` towards under-segmentation.
369        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
370            backends) for the overlap-counting phase.
371        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
372        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
373
374    Returns:
375        The output array with merged labels.
376    """
377    src = as_source(segmentation)
378    if not np.issubdtype(np.dtype(src.dtype), np.integer):
379        raise ValueError(f"stitch_tiled_segmentation expects an integer label image, got {src.dtype}.")
380    ndim = src.ndim
381    shape = tuple(int(s) for s in src.shape)
382    tile_shape = tuple(int(t) for t in tile_shape)
383    out_array = _prepare_output(output, shape, job_type)
384
385    runner = get_runner(job_type, job_config)
386    overlap_fn = _make_tiled_overlap(shape, tile_shape, int(overlap), ndim)
387    results = runner.run(overlap_fn, [segmentation], block_shape=tile_shape, num_workers=num_workers,
388                         has_return_val=True, name="stitch-overlaps")
389    uv, frac = _collect_edges(results)
390
391    seg = src[full_roi(ndim)]
392    stitched = _stitch_via_multicut(seg, uv, frac, with_background, beta, n_threads=None)
393
394    out = as_source(out_array)
395    out[full_roi(ndim)] = stitched.astype(out.dtype, copy=False)
396    return out_array

Stitch a segmentation that is already split into tiles with unique ids per tile.

The ids in the tiles of the input have to be unique (the segmentations are separate across tiles). Objects that touch across a tile interface are merged based on how strongly they overlap there, via a multicut over the region adjacency graph of the tiled segmentation.

Args: segmentation: The input tiled segmentation (a numpy/zarr/n5 array or a Source); must be integer-typed with ids unique across tiles. tile_shape: The shape of the tiles (the block shape of the tiling). output: The uint64 output array. Optional for local execution — a numpy array is allocated and returned if omitted; required (file-backed) for distributed execution. overlap: The thickness (in pixels) of the tile-interface slab used to measure object overlap. with_background: Whether the problem has a background label (hard-coded 0) that must not be merged with foreground objects. beta: The boundary bias of the multicut; > 0.5 biases towards over-segmentation, < 0.5 towards under-segmentation. num_workers: Number of parallel workers (threads for local, tasks for distributed backends) for the overlap-counting phase. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig).

Returns: The output array with merged labels.

def compute_edge_costs( probs: numpy.ndarray, edge_sizes: Optional[numpy.ndarray] = None, z_edge_mask: Optional[numpy.ndarray] = None, beta: float = 0.5, weighting_scheme: Optional[str] = None, weighting_exponent: float = 1.0) -> numpy.ndarray: View Source

 87def compute_edge_costs(
 88    probs: np.ndarray,
 89    edge_sizes: Optional[np.ndarray] = None,
 90    z_edge_mask: Optional[np.ndarray] = None,
 91    beta: float = 0.5,
 92    weighting_scheme: Optional[str] = None,
 93    weighting_exponent: float = 1.0,
 94) -> np.ndarray:
 95    """Compute multicut edge costs from probabilities with a pre-defined weighting scheme.
 96
 97    Args:
 98        probs: The input edge probabilities, in ``[0, 1]``.
 99        edge_sizes: The sizes of the edges; required for all weighting schemes except ``None`` /
100            ``"none"``.
101        z_edge_mask: A boolean mask of inter-slice edges; required for the ``"xyz"`` and ``"z"``
102            schemes (for flat superpixels in a 3d problem).
103        beta: The boundary bias (see `transform_probabilities_to_costs`).
104        weighting_scheme: How to weight the costs by edge size; one of ``None``, ``"none"``,
105            ``"all"``, ``"xyz"`` or ``"z"``.
106        weighting_exponent: The exponent applied to the normalized edge sizes when weighting.
107
108    Returns:
109        The edge costs.
110
111    Raises:
112        ValueError: If ``weighting_scheme`` is unknown or a scheme's required inputs are missing.
113    """
114    schemes = (None, "all", "none", "xyz", "z")
115    if weighting_scheme not in schemes:
116        schemes_str = ", ".join([str(scheme) for scheme in schemes])
117        raise ValueError(f"Weighting scheme must be one of {schemes_str}, got {weighting_scheme}.")
118
119    if weighting_scheme is None or weighting_scheme == "none":
120        edge_pop = edge_sizes_ = None
121
122    elif weighting_scheme == "all":
123        if edge_sizes is None:
124            raise ValueError("Need edge sizes for weighting scheme 'all'.")
125        if len(edge_sizes) != len(probs):
126            raise ValueError("Invalid edge sizes.")
127        edge_sizes_ = edge_sizes
128        edge_pop = None
129
130    elif weighting_scheme == "xyz":
131        if edge_sizes is None or z_edge_mask is None:
132            raise ValueError("Need edge sizes and z edge mask for weighting scheme 'xyz'.")
133        if len(edge_sizes) != len(probs) or len(z_edge_mask) != len(probs):
134            raise ValueError("Invalid edge sizes or z edge mask.")
135        edge_pop = [z_edge_mask, np.logical_not(z_edge_mask)]
136        edge_sizes_ = edge_sizes
137
138    elif weighting_scheme == "z":
139        if edge_sizes is None or z_edge_mask is None:
140            raise ValueError("Need edge sizes and z edge mask for weighting scheme 'z'.")
141        if len(edge_sizes) != len(probs) or len(z_edge_mask) != len(probs):
142            raise ValueError("Invalid edge sizes or z edge mask.")
143        edge_pop = [z_edge_mask, np.logical_not(z_edge_mask)]
144        edge_sizes_ = edge_sizes.copy()
145        edge_sizes_[edge_pop[1]] = 1.0
146
147    return transform_probabilities_to_costs(
148        probs, beta=beta, edge_sizes=edge_sizes_, edge_populations=edge_pop,
149        weighting_exponent=weighting_exponent,
150    )

Compute multicut edge costs from probabilities with a pre-defined weighting scheme.

Args: probs: The input edge probabilities, in [0, 1]. edge_sizes: The sizes of the edges; required for all weighting schemes except None / "none". z_edge_mask: A boolean mask of inter-slice edges; required for the "xyz" and "z" schemes (for flat superpixels in a 3d problem). beta: The boundary bias (see transform_probabilities_to_costs). weighting_scheme: How to weight the costs by edge size; one of None, "none", "all", "xyz" or "z". weighting_exponent: The exponent applied to the normalized edge sizes when weighting.

Returns: The edge costs.

Raises: ValueError: If weighting_scheme is unknown or a scheme's required inputs are missing.

def transform_probabilities_to_costs( probs: numpy.ndarray, beta: float = 0.5, edge_sizes: Optional[numpy.ndarray] = None, edge_populations: Optional[List[numpy.ndarray]] = None, weighting_exponent: float = 1.0) -> numpy.ndarray: View Source

47def transform_probabilities_to_costs(
48    probs: np.ndarray,
49    beta: float = 0.5,
50    edge_sizes: Optional[np.ndarray] = None,
51    edge_populations: Optional[List[np.ndarray]] = None,
52    weighting_exponent: float = 1.0,
53) -> np.ndarray:
54    """Transform merge probabilities to multicut costs via the negative log-likelihood.
55
56    Probabilities near ``1`` map to large positive costs (attractive, likely same segment) and
57    probabilities near ``0`` to large negative costs (repulsive). The boundary bias ``beta``
58    shifts the decision threshold.
59
60    Args:
61        probs: The input edge probabilities, in ``[0, 1]``.
62        beta: The boundary bias term; ``> 0.5`` biases towards over-segmentation (more cuts),
63            ``< 0.5`` towards under-segmentation. Must be in the exclusive range ``(0, 1)``.
64        edge_sizes: The sizes of the edges, used for weighting if given.
65        edge_populations: Disjoint edge populations (lists of masks or index arrays) that are
66            size-weighted independently, e.g. flat superpixels in a 3d problem.
67        weighting_exponent: The exponent applied to the normalized edge sizes when weighting.
68
69    Returns:
70        The edge costs.
71    """
72    p_min = 0.001
73    p_max = 1.0 - p_min
74    costs = (p_max - p_min) * probs + p_min
75    # Probabilities to costs; the second term is the boundary bias.
76    costs = np.log((1.0 - costs) / costs) + np.log((1.0 - beta) / beta)
77    # Weight the costs with edge sizes, if they are given.
78    if edge_sizes is not None:
79        assert len(edge_sizes) == len(costs)
80        if edge_populations is None:
81            costs = _weight_edges(costs, edge_sizes, weighting_exponent)
82        else:
83            costs = _weight_populations(costs, edge_sizes, edge_populations, weighting_exponent)
84    return costs

Transform merge probabilities to multicut costs via the negative log-likelihood.

Probabilities near 1 map to large positive costs (attractive, likely same segment) and probabilities near 0 to large negative costs (repulsive). The boundary bias beta shifts the decision threshold.

Args: probs: The input edge probabilities, in [0, 1]. beta: The boundary bias term; > 0.5 biases towards over-segmentation (more cuts), < 0.5 towards under-segmentation. Must be in the exclusive range (0, 1). edge_sizes: The sizes of the edges, used for weighting if given. edge_populations: Disjoint edge populations (lists of masks or index arrays) that are size-weighted independently, e.g. flat superpixels in a 3d problem. weighting_exponent: The exponent applied to the normalized edge sizes when weighting.

Returns: The edge costs.

def multicut_decomposition( graph, costs: numpy.ndarray, n_threads: int = 1, internal_solver: str = 'kernighan-lin') -> numpy.ndarray: View Source

218def multicut_decomposition(
219    graph,
220    costs: np.ndarray,
221    n_threads: int = 1,
222    internal_solver: str = "kernighan-lin",
223) -> np.ndarray:
224    """Solve the multicut problem with the decomposition solver.
225
226    The graph is split into its connected components after removing strongly repulsive edges, each
227    component is solved independently (in parallel) with ``internal_solver``, and the solutions are
228    combined. Introduced in "Break and Conquer: Efficient Correlation Clustering for Image
229    Segmentation" (https://link.springer.com/chapter/10.1007/978-3-642-39140-8_9).
230
231    Args:
232        graph: The graph (or region adjacency graph) of the multicut problem.
233        costs: The edge costs of the multicut problem.
234        n_threads: The number of threads used to solve sub-problems in parallel.
235        internal_solver: The name of the solver used for the sub-problems; one of
236            ``"kernighan-lin"``, ``"greedy-additive"`` or ``"greedy-fixation"``.
237
238    Returns:
239        The node label solution to the multicut problem.
240    """
241    objective = _to_objective(graph, costs)
242    solver = bic.graph.multicut.MulticutDecomposer(
243        sub_solver=_get_solver(internal_solver),
244        fallthrough_solver=_get_solver(internal_solver),
245        number_of_threads=n_threads,
246    )
247    return solver.optimize(objective)

Solve the multicut problem with the decomposition solver.

The graph is split into its connected components after removing strongly repulsive edges, each component is solved independently (in parallel) with internal_solver, and the solutions are combined. Introduced in "Break and Conquer: Efficient Correlation Clustering for Image Segmentation" (https://link.springer.com/chapter/10.1007/978-3-642-39140-8_9).

Args: graph: The graph (or region adjacency graph) of the multicut problem. costs: The edge costs of the multicut problem. n_threads: The number of threads used to solve sub-problems in parallel. internal_solver: The name of the solver used for the sub-problems; one of "kernighan-lin", "greedy-additive" or "greedy-fixation".

Returns: The node label solution to the multicut problem.

def multicut_gaec(graph, costs: numpy.ndarray) -> numpy.ndarray: View Source

201def multicut_gaec(graph, costs: np.ndarray) -> np.ndarray:
202    """Solve the multicut problem with the greedy-additive edge contraction solver.
203
204    Introduced in "Fusion moves for correlation clustering"
205    (http://openaccess.thecvf.com/content_cvpr_2015/papers/Beier_Fusion_Moves_for_2015_CVPR_paper.pdf).
206
207    Args:
208        graph: The graph (or region adjacency graph) of the multicut problem.
209        costs: The edge costs of the multicut problem.
210
211    Returns:
212        The node label solution to the multicut problem.
213    """
214    objective = _to_objective(graph, costs)
215    return bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)

Solve the multicut problem with the greedy-additive edge contraction solver.

Introduced in "Fusion moves for correlation clustering" (http://openaccess.thecvf.com/content_cvpr_2015/papers/Beier_Fusion_Moves_for_2015_CVPR_paper.pdf).

Args: graph: The graph (or region adjacency graph) of the multicut problem. costs: The edge costs of the multicut problem.

Returns: The node label solution to the multicut problem.

def multicut_kernighan_lin(graph, costs: numpy.ndarray, warmstart: bool = True) -> numpy.ndarray: View Source

176def multicut_kernighan_lin(graph, costs: np.ndarray, warmstart: bool = True) -> np.ndarray:
177    """Solve the multicut problem with the Kernighan-Lin solver.
178
179    Introduced in "An efficient heuristic procedure for partitioning graphs"
180    (http://xilinx.asia/_hdl/4/eda.ee.ucla.edu/EE201A-04Spring/kl.pdf).
181
182    Args:
183        graph: The graph (or region adjacency graph) of the multicut problem.
184        costs: The edge costs of the multicut problem.
185        warmstart: Whether to warmstart with the greedy-additive solution.
186
187    Returns:
188        The node label solution to the multicut problem.
189    """
190    objective = _to_objective(graph, costs)
191    if warmstart:
192        solver = bic.graph.multicut.ChainedMulticutSolvers([
193            bic.graph.multicut.GreedyAdditiveMulticut(),
194            bic.graph.multicut.KernighanLinMulticut(),
195        ])
196    else:
197        solver = bic.graph.multicut.KernighanLinMulticut()
198    return solver.optimize(objective)

Solve the multicut problem with the Kernighan-Lin solver.

Introduced in "An efficient heuristic procedure for partitioning graphs" (http://xilinx.asia/_hdl/4/eda.ee.ucla.edu/EE201A-04Spring/kl.pdf).

Args: graph: The graph (or region adjacency graph) of the multicut problem. costs: The edge costs of the multicut problem. warmstart: Whether to warmstart with the greedy-additive solution.

Returns: The node label solution to the multicut problem.