bioimage_py.segmentation

Segmentation: connected-component labeling and related operations.

 1"""Segmentation: connected-component labeling and related operations."""
 2from .label import label
 3from .multicut import (compute_edge_costs, multicut_decomposition, multicut_gaec,
 4                       multicut_kernighan_lin, transform_probabilities_to_costs)
 5from .relabel import relabel_consecutive
 6from .size_filter import segmentation_filter, size_filter
 7from .stitching import stitch_segmentation, stitch_tiled_segmentation
 8from .watershed import watershed
 9
10__all__ = [
11    "label",
12    "watershed",
13    "relabel_consecutive",
14    "segmentation_filter",
15    "size_filter",
16    "stitch_segmentation",
17    "stitch_tiled_segmentation",
18    "compute_edge_costs",
19    "transform_probabilities_to_costs",
20    "multicut_decomposition",
21    "multicut_gaec",
22    "multicut_kernighan_lin",
23]
def label( input: 'SourceLike', output: 'Optional[SourceLike]' = None, *, threshold: Optional[float] = None, connectivity: Optional[int] = None, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None) -> 'SourceLike':
127def label(
128    input: SourceLike,
129    output: Optional[SourceLike] = None,
130    *,
131    threshold: Optional[float] = None,
132    connectivity: Optional[int] = None,
133    block_shape: Optional[Tuple[int, ...]] = None,
134    job_type: str = "local",
135    job_config: Optional[RunnerConfig] = None,
136    num_workers: int = 1,
137    mask: Optional[SourceLike] = None,
138) -> SourceLike:
139    """Label connected components of (optionally thresholded) data, block-wise.
140
141    Unlike the single-pass operations, ``label`` is multi-stage with a global cross-block merge
142    (per-block labeling, then a union-find over touching components across block faces), so it
143    does **not** accept ``block_ids`` or ``resume_from``: a failed run must be re-run whole (it is
144    idempotent given the same ``output``).
145
146    Args:
147        input: The input data (a numpy/zarr/n5 array or a `Source`).
148        output: The ``uint64`` output array to write the labels into. Optional for local
149            execution — a numpy array is allocated and returned if omitted; **required** for
150            distributed execution.
151        threshold: If given, the input is binarized as ``input > threshold``; otherwise the
152            input is treated as a binary foreground mask.
153        connectivity: Neighbour connectivity in ``[1, ndim]`` (``1`` = orthogonal). Defaults
154            to ``1``; values ``> 1`` are only supported for the direct (single-block) path.
155        block_shape: Shape of the processing blocks. Defaults to the input/output chunk shape;
156            required for unchunked data.
157        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
158        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
159        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
160            backends).
161        mask: Optional binary mask; values outside the mask are excluded from the foreground.
162
163    Returns:
164        The output array (the provided ``output``, or a newly allocated numpy array), labeled
165        with consecutive ids (background stays ``0``).
166    """
167    src = as_source(input)
168    ndim = src.ndim
169    conn = 1 if connectivity is None else int(connectivity)
170    if not 1 <= conn <= ndim:
171        raise ValueError(f"connectivity must be in [1, {ndim}], got {conn}.")
172
173    direct = is_direct(job_type, num_workers, block_shape) and mask is None
174    if conn > 1 and not direct:
175        raise NotImplementedError(
176            "Block-wise labeling only supports connectivity=1 (orthogonal). Use the direct "
177            "path (local, single worker, no block_shape, no mask) for higher connectivity."
178        )
179
180    if output is None:
181        if job_type != "local":
182            raise ValueError(
183                f"'output' is required for distributed execution (job_type={job_type!r}); "
184                "pass a file-backed (zarr/n5) output array."
185            )
186        out_array: SourceLike = np.zeros(tuple(src.shape), dtype="uint64")
187    else:
188        out_array = output
189
190    out = as_source(out_array)
191    if out.dtype != np.dtype("uint64"):
192        raise ValueError(f"output must have dtype uint64, got {out.dtype}.")
193
194    if direct:
195        binary = _binarize(src[full_roi(ndim)], threshold)
196        comp = bic.segmentation.label(binary, connectivity=conn).astype("uint64", copy=False)
197        out[full_roi(ndim)] = comp
198        return out_array
199
200    block_shape = _resolve_block_shape(src, out, block_shape)
201    offset_factor = int(np.prod(block_shape))
202    blocking = get_blocking(src.shape, block_shape)
203    n_blocks = int(blocking.number_of_blocks)
204    if (n_blocks * offset_factor) >= int(np.iinfo(np.uint64).max):
205        raise ValueError(
206            "Label id overflow: number_of_blocks * prod(block_shape) exceeds uint64. "
207            "Reduce the block shape or the volume size."
208        )
209
210    runner = get_runner(job_type, job_config)
211
212    # Stage 1: label each block independently with a globally-unique offset.
213    stage1 = _make_stage1(tuple(src.shape), block_shape, conn, threshold, offset_factor)
214    id_results = runner.run(stage1, [input], outputs=[out_array], block_shape=block_shape,
215                            mask=mask, num_workers=num_workers, has_return_val=True,
216                            name="label-blocks")
217    id_arrays = [a for a in id_results if a is not None and len(a)]
218    real_labels = np.unique(np.concatenate(id_arrays)) if id_arrays else np.zeros((0,), dtype="uint64")
219    max_label = int(real_labels.max()) if real_labels.size else 0
220
221    # Stage 2: collect label equivalences across lower block faces.
222    stage2 = _make_stage2(tuple(src.shape), block_shape)
223    pair_results = runner.run(stage2, [out_array], block_shape=block_shape,
224                              num_workers=num_workers, has_return_val=True, name="merge-faces")
225    pairs = [p for p in pair_results if p is not None]
226    assignments = (np.unique(np.concatenate(pairs, axis=0), axis=0)
227                   if pairs else np.zeros((0, 2), dtype="uint64"))
228
229    # Stage 3 (in process): union-find merge, then relabel only the labels that exist to
230    # consecutive ids (the offset space is sparse, so relabeling over it would not be compact).
231    uf = bic.utils.UnionFind(max_label + 1)
232    if len(assignments):
233        uf.merge(assignments.astype("uint64"))
234    mapping: Dict[int, int] = {0: 0}
235    if real_labels.size:
236        roots = np.asarray(uf.find(real_labels.astype("uint64")))
237        root_to_new = {int(r): i + 1 for i, r in enumerate(np.unique(roots).tolist())}
238        for lab, root in zip(real_labels.tolist(), roots.tolist()):
239            mapping[int(lab)] = root_to_new[int(root)]
240
241    # Stage 4: apply the mapping in place.
242    stage4 = _make_stage4(mapping)
243    runner.run(stage4, [], outputs=[out_array], block_shape=block_shape,
244               num_workers=num_workers, has_return_val=False, name="relabel")
245    return out_array

Label connected components of (optionally thresholded) data, block-wise.

Unlike the single-pass operations, label is multi-stage with a global cross-block merge (per-block labeling, then a union-find over touching components across block faces), so it does not accept block_ids or resume_from: a failed run must be re-run whole (it is idempotent given the same output).

Args: input: The input data (a numpy/zarr/n5 array or a Source). output: The uint64 output array to write the labels into. Optional for local execution — a numpy array is allocated and returned if omitted; required for distributed execution. threshold: If given, the input is binarized as input > threshold; otherwise the input is treated as a binary foreground mask. connectivity: Neighbour connectivity in [1, ndim] (1 = orthogonal). Defaults to 1; values > 1 are only supported for the direct (single-block) path. block_shape: Shape of the processing blocks. Defaults to the input/output chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; values outside the mask are excluded from the foreground.

Returns: The output array (the provided output, or a newly allocated numpy array), labeled with consecutive ids (background stays 0).

def watershed( input: 'SourceLike', seeds: 'SourceLike', output: 'Optional[SourceLike]' = None, *, halo: Optional[Sequence[int]] = None, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> 'SourceLike':
 67def watershed(
 68    input: SourceLike,
 69    seeds: SourceLike,
 70    output: Optional[SourceLike] = None,
 71    *,
 72    halo: Optional[Sequence[int]] = None,
 73    block_shape: Optional[Tuple[int, ...]] = None,
 74    job_type: str = "local",
 75    job_config: Optional[RunnerConfig] = None,
 76    num_workers: int = 1,
 77    mask: Optional[SourceLike] = None,
 78    block_ids: Optional[Sequence[int]] = None,
 79    resume_from: Optional[str] = None,
 80) -> SourceLike:
 81    """Compute a seeded watershed over a height map, block-wise.
 82
 83    Each block runs a seeded watershed (``bioimage_cpp.segmentation.watershed``) on a halo-padded
 84    region and writes back the halo-free inner block. The ``seeds`` define the segments and their
 85    ids are preserved verbatim -- there is no cross-block merge, so pass globally consistent seeds
 86    (e.g. a connected-component labeling of a seed mask) for a coherent result.
 87
 88    Being single-stage, this operation supports ``block_ids`` / ``resume_from`` re-runs. The
 89    block-wise output matches a whole-array watershed only where ``halo`` covers the relevant
 90    catchment basins, but is bit-identical across backends for a fixed ``(block_shape, halo)``.
 91
 92    Args:
 93        input: The height map (a numpy/zarr/n5 array or a `Source`). It is cast to ``float32`` for
 94            the watershed if it is not already a float type.
 95        seeds: The pre-computed integer seed markers (``0`` = background), same shape as ``input``.
 96            A non-integer seed map is cast to ``uint32``.
 97        output: The integer output array to write the segmentation into. Optional for local
 98            execution -- a ``uint64`` numpy array is allocated and returned if omitted; **required**
 99            for distributed execution. It must be wide enough to hold the seed ids (``uint64``
100            recommended; a ``uint32`` watershed result writes losslessly into a ``uint64`` output).
101        halo: Per-axis halo enlarging each block; **required** for the block-wise path (there is no
102            principled default for a watershed). Choose it large enough to cover object extents at
103            block boundaries. Ignored by the direct (single-block) path.
104        block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
105            for unchunked data.
106        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
107        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
108        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
109            backends).
110        mask: Optional binary mask; voxels outside the mask are excluded and stay ``0``.
111        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks
112            into the existing ``output``). Mutually exclusive with ``resume_from``.
113        resume_from: Distributed only; the preserved temp folder of a failed run to resume (see
114            ``runner.run``); the missing blocks are written into ``output``. Mutually exclusive
115            with ``block_ids``.
116
117    Returns:
118        The output array (the provided ``output``, or a newly allocated ``uint64`` numpy array).
119    """
120    check_rerun_args(job_type, resume_from, block_ids)
121
122    src = as_source(input)
123    seeds_src = as_source(seeds)
124    ndim = src.ndim
125    if tuple(seeds_src.shape) != tuple(src.shape):
126        raise ValueError(
127            f"seeds shape {seeds_src.shape} does not match input shape {src.shape}."
128        )
129
130    # A subset/resume rerun is inherently block-wise, so it cannot use the direct (whole-array) path.
131    direct = (is_direct(job_type, num_workers, block_shape)
132              and block_ids is None and resume_from is None)
133
134    if output is None:
135        if job_type != "local":
136            raise ValueError(
137                f"'output' is required for distributed execution (job_type={job_type!r}); "
138                "pass a file-backed (zarr/n5) output array."
139            )
140        out_array: SourceLike = np.zeros(tuple(src.shape), dtype="uint64")
141    else:
142        out_array = output
143
144    out = as_source(out_array)
145    if not np.issubdtype(out.dtype, np.integer):
146        raise ValueError(f"output must have an integer dtype, got {out.dtype}.")
147
148    if direct:
149        block_hmap = _as_hmap(src[full_roi(ndim)])
150        block_seeds = _as_seeds(seeds_src[full_roi(ndim)])
151        block_mask = as_source(mask)[full_roi(ndim)].astype(bool) if mask is not None else None
152        out[full_roi(ndim)] = bic.segmentation.watershed(block_hmap, block_seeds, mask=block_mask)
153        return out_array
154
155    if halo is None:
156        raise ValueError(
157            "halo is required for block-wise watershed; choose one large enough to cover object "
158            "extents at block boundaries."
159        )
160
161    runner = get_runner(job_type, job_config)
162    runner.run(_watershed_block, [input, seeds], outputs=[out_array], halo=halo,
163               block_shape=block_shape, mask=mask, num_workers=num_workers,
164               block_ids=block_ids, resume_from=resume_from, name="watershed")
165    return out_array

Compute a seeded watershed over a height map, block-wise.

Each block runs a seeded watershed (bioimage_cpp.segmentation.watershed) on a halo-padded region and writes back the halo-free inner block. The seeds define the segments and their ids are preserved verbatim -- there is no cross-block merge, so pass globally consistent seeds (e.g. a connected-component labeling of a seed mask) for a coherent result.

Being single-stage, this operation supports block_ids / resume_from re-runs. The block-wise output matches a whole-array watershed only where halo covers the relevant catchment basins, but is bit-identical across backends for a fixed (block_shape, halo).

Args: input: The height map (a numpy/zarr/n5 array or a Source). It is cast to float32 for the watershed if it is not already a float type. seeds: The pre-computed integer seed markers (0 = background), same shape as input. A non-integer seed map is cast to uint32. output: The integer output array to write the segmentation into. Optional for local execution -- a uint64 numpy array is allocated and returned if omitted; required for distributed execution. It must be wide enough to hold the seed ids (uint64 recommended; a uint32 watershed result writes losslessly into a uint64 output). halo: Per-axis halo enlarging each block; required for the block-wise path (there is no principled default for a watershed). Choose it large enough to cover object extents at block boundaries. Ignored by the direct (single-block) path. block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; voxels outside the mask are excluded and stay 0. block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks into the existing output). Mutually exclusive with resume_from. resume_from: Distributed only; the preserved temp folder of a failed run to resume (see runner.run); the missing blocks are written into output. Mutually exclusive with block_ids.

Returns: The output array (the provided output, or a newly allocated uint64 numpy array).

def relabel_consecutive( input: 'SourceLike', output: 'Optional[SourceLike]' = None, *, start_label: int = 0, keep_zeros: bool = True, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None) -> 'Tuple[SourceLike, int, Dict[int, int]]':
 49def relabel_consecutive(
 50    input: SourceLike,
 51    output: Optional[SourceLike] = None,
 52    *,
 53    start_label: int = 0,
 54    keep_zeros: bool = True,
 55    block_shape: Optional[Tuple[int, ...]] = None,
 56    job_type: str = "local",
 57    job_config: Optional[RunnerConfig] = None,
 58    num_workers: int = 1,
 59    mask: Optional[SourceLike] = None,
 60) -> Tuple[SourceLike, int, Dict[int, int]]:
 61    """Relabel a segmentation to consecutive ids, block-wise.
 62
 63    Like ``label``, this is multi-stage (a global ``unique`` reduction, an in-process mapping, then a
 64    block-wise write), so it does **not** accept ``block_ids`` or ``resume_from``: a failed run is
 65    re-run whole (it is idempotent given the same ``output``).
 66
 67    Args:
 68        input: The input label image (a numpy/zarr/n5 array or a `Source`); must be integer-typed.
 69        output: The output array to write the relabeled segmentation into. Optional for local
 70            execution -- a numpy array matching the input shape and dtype is allocated and returned
 71            if omitted; **required** for distributed execution (a writable, file-backed zarr/n5
 72            array).
 73        start_label: The value the smallest unique id is mapped to (subsequent ids follow
 74            consecutively).
 75        keep_zeros: Whether to always keep ``0`` mapped to ``0`` (background), regardless of
 76            ``start_label``.
 77        block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
 78            for unchunked data.
 79        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
 80        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
 81        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
 82            backends).
 83        mask: Optional binary mask; values outside the mask are excluded from the computation and
 84            their output voxels are left unchanged.
 85
 86    Returns:
 87        A ``(output, max_id, mapping)`` tuple: the relabeled output array, the maximum label id
 88        after relabeling, and the ``{old_id: new_id}`` mapping that was applied.
 89    """
 90    src = as_source(input)
 91    if not np.issubdtype(np.dtype(src.dtype), np.integer):
 92        raise ValueError(f"relabel_consecutive expects an integer label image, got dtype {src.dtype}.")
 93    ndim = src.ndim
 94
 95    direct = is_direct(job_type, num_workers, block_shape) and mask is None
 96
 97    if output is None:
 98        if job_type != "local":
 99            raise ValueError(
100                f"'output' is required for distributed execution (job_type={job_type!r}); "
101                "pass a file-backed (zarr/n5) output array."
102            )
103        out_array: SourceLike = np.zeros(tuple(src.shape), dtype=src.dtype)
104    else:
105        out_array = output
106    out = as_source(out_array)
107    if not direct and same_array(out, src):
108        raise ValueError("Block-wise relabel_consecutive needs 'output' to differ from 'input'.")
109
110    # Pass 1: the global set of unique values.
111    if direct:
112        uniques = np.unique(src[full_roi(ndim)])
113    else:
114        uniques = unique(input, block_shape=block_shape, job_type=job_type, job_config=job_config,
115                         num_workers=num_workers, mask=mask)
116
117    # In-process: build the old -> new mapping (consecutive ids from start_label).
118    mapping: Dict[int, int] = {int(v): i for i, v in enumerate(uniques.tolist(), start_label)}
119    if keep_zeros and 0 in mapping:
120        mapping[0] = 0
121    max_id = max(mapping.values()) if mapping else 0
122
123    # Pass 2: apply the mapping.
124    if direct:
125        out[full_roi(ndim)] = bic.utils.take_dict(mapping, src[full_roi(ndim)])
126        return out_array, max_id, mapping
127
128    runner = get_runner(job_type, job_config)
129    runner.run(_make_relabel_block(mapping), [input], outputs=[out_array], block_shape=block_shape,
130               mask=mask, num_workers=num_workers, name="relabel")
131    return out_array, max_id, mapping

Relabel a segmentation to consecutive ids, block-wise.

Like label, this is multi-stage (a global unique reduction, an in-process mapping, then a block-wise write), so it does not accept block_ids or resume_from: a failed run is re-run whole (it is idempotent given the same output).

Args: input: The input label image (a numpy/zarr/n5 array or a Source); must be integer-typed. output: The output array to write the relabeled segmentation into. Optional for local execution -- a numpy array matching the input shape and dtype is allocated and returned if omitted; required for distributed execution (a writable, file-backed zarr/n5 array). start_label: The value the smallest unique id is mapped to (subsequent ids follow consecutively). keep_zeros: Whether to always keep 0 mapped to 0 (background), regardless of start_label. block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; values outside the mask are excluded from the computation and their output voxels are left unchanged.

Returns: A (output, max_id, mapping) tuple: the relabeled output array, the maximum label id after relabeling, and the {old_id: new_id} mapping that was applied.

def segmentation_filter( input: 'SourceLike', filter_function: Callable[[numpy.ndarray, Optional[numpy.ndarray]], numpy.ndarray], output: 'Optional[SourceLike]' = None, *, relabel: Optional[Callable[[numpy.ndarray, Optional[numpy.ndarray]], numpy.ndarray]] = None, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> 'SourceLike':
 57def segmentation_filter(
 58    input: SourceLike,
 59    filter_function: BlockFn,
 60    output: Optional[SourceLike] = None,
 61    *,
 62    relabel: Optional[BlockFn] = None,
 63    block_shape: Optional[Tuple[int, ...]] = None,
 64    job_type: str = "local",
 65    job_config: Optional[RunnerConfig] = None,
 66    num_workers: int = 1,
 67    mask: Optional[SourceLike] = None,
 68    block_ids: Optional[Sequence[int]] = None,
 69    resume_from: Optional[str] = None,
 70) -> SourceLike:
 71    """Filter a segmentation with a custom per-block criterion, block-wise.
 72
 73    Args:
 74        input: The input segmentation (a numpy/zarr/n5 array or a `Source`).
 75        filter_function: A picklable callable ``filter_function(block_seg, block_mask)`` returning the
 76            filtered block. ``block_mask`` is the block's boolean in-mask array, or ``None`` when no
 77            mask is used; when a mask is used, restrict the criterion to the in-mask voxels.
 78        output: The output array to write into. Optional for local execution -- a numpy array
 79            matching the input shape and dtype is allocated and returned if omitted; **required** for
 80            distributed execution (a writable, file-backed zarr/n5 array).
 81        relabel: Optional picklable callable ``relabel(block_seg, block_mask)`` applied after
 82            ``filter_function`` (e.g. a consecutive relabeling); same masking contract.
 83        block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
 84            for unchunked data.
 85        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
 86        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
 87        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
 88            backends).
 89        mask: Optional binary mask; out-of-mask output voxels are left unchanged.
 90        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
 91            Mutually exclusive with ``resume_from``.
 92        resume_from: Distributed only; the preserved temp folder of a failed run to resume (see
 93            ``runner.run``). Mutually exclusive with ``block_ids``.
 94
 95    Returns:
 96        The output array (the provided ``output``, or a newly allocated numpy array).
 97    """
 98    check_rerun_args(job_type, resume_from, block_ids)
 99    src = as_source(input)
100    ndim = src.ndim
101    direct = (is_direct(job_type, num_workers, block_shape) and mask is None
102              and block_ids is None and resume_from is None)
103
104    if output is None:
105        if job_type != "local":
106            raise ValueError(
107                f"'output' is required for distributed execution (job_type={job_type!r}); "
108                "pass a file-backed (zarr/n5) output array."
109            )
110        out_array: SourceLike = np.zeros(tuple(src.shape), dtype=src.dtype)
111    else:
112        out_array = output
113    out = as_source(out_array)
114    if not direct and same_array(out, src):
115        raise ValueError("Block-wise segmentation_filter needs 'output' to differ from 'input'.")
116
117    if direct:
118        filtered = filter_function(src[full_roi(ndim)], None)
119        if relabel is not None:
120            filtered = relabel(filtered, None)
121        out[full_roi(ndim)] = filtered
122        return out_array
123
124    runner = get_runner(job_type, job_config)
125    runner.run(_make_filter_block(filter_function, relabel), [input], outputs=[out_array],
126               block_shape=block_shape, mask=mask, num_workers=num_workers,
127               block_ids=block_ids, resume_from=resume_from, name="segmentation_filter")
128    return out_array

Filter a segmentation with a custom per-block criterion, block-wise.

Args: input: The input segmentation (a numpy/zarr/n5 array or a Source). filter_function: A picklable callable filter_function(block_seg, block_mask) returning the filtered block. block_mask is the block's boolean in-mask array, or None when no mask is used; when a mask is used, restrict the criterion to the in-mask voxels. output: The output array to write into. Optional for local execution -- a numpy array matching the input shape and dtype is allocated and returned if omitted; required for distributed execution (a writable, file-backed zarr/n5 array). relabel: Optional picklable callable relabel(block_seg, block_mask) applied after filter_function (e.g. a consecutive relabeling); same masking contract. block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; out-of-mask output voxels are left unchanged. block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks). Mutually exclusive with resume_from. resume_from: Distributed only; the preserved temp folder of a failed run to resume (see runner.run). Mutually exclusive with block_ids.

Returns: The output array (the provided output, or a newly allocated numpy array).

def size_filter( input: 'SourceLike', output: 'Optional[SourceLike]' = None, *, min_size: Optional[int] = None, max_size: Optional[int] = None, relabel: bool = True, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, num_workers: int = 1, mask: 'Optional[SourceLike]' = None) -> 'SourceLike':
158def size_filter(
159    input: SourceLike,
160    output: Optional[SourceLike] = None,
161    *,
162    min_size: Optional[int] = None,
163    max_size: Optional[int] = None,
164    relabel: bool = True,
165    block_shape: Optional[Tuple[int, ...]] = None,
166    job_type: str = "local",
167    job_config: Optional[RunnerConfig] = None,
168    num_workers: int = 1,
169    mask: Optional[SourceLike] = None,
170) -> SourceLike:
171    """Remove objects smaller than ``min_size`` and/or larger than ``max_size`` from a segmentation.
172
173    Multi-stage (a global ``unique`` count reduction, then a filter pass), so it does **not** accept
174    ``block_ids`` / ``resume_from``. By default it relabels the result consecutively; pass
175    ``relabel=False`` to keep the original ids of the surviving objects.
176
177    Args:
178        input: The input segmentation (a numpy/zarr/n5 array or a `Source`); must be integer-typed.
179        output: The output array to write into. Optional for local execution -- a numpy array
180            matching the input shape and dtype is allocated and returned if omitted; **required** for
181            distributed execution (a writable, file-backed zarr/n5 array).
182        min_size: The minimum object size; smaller objects are removed. At least one of ``min_size`` /
183            ``max_size`` is required.
184        max_size: The maximum object size; larger objects are removed.
185        relabel: Whether to relabel the surviving objects consecutively after filtering.
186        block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
187            for unchunked data. Required when a ``mask`` is given (the size reduction is block-wise).
188        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
189        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
190        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
191            backends).
192        mask: Optional binary mask; out-of-mask output voxels are left unchanged.
193
194    Returns:
195        The output array (the provided ``output``, or a newly allocated numpy array).
196    """
197    if min_size is None and max_size is None:
198        raise ValueError("size_filter requires at least one of 'min_size' or 'max_size'.")
199    src = as_source(input)
200    if not np.issubdtype(np.dtype(src.dtype), np.integer):
201        raise ValueError(f"size_filter expects an integer label image, got dtype {src.dtype}.")
202
203    # Pass 1: unique ids with their sizes.
204    ids, counts = unique(input, return_counts=True, block_shape=block_shape, job_type=job_type,
205                         job_config=job_config, num_workers=num_workers, mask=mask)
206
207    # In-process: ids to discard and the consecutive relabeling of the survivors.
208    discard = np.zeros(ids.shape, dtype=bool)
209    if min_size is not None:
210        discard |= counts < min_size
211    if max_size is not None:
212        discard |= counts > max_size
213    filter_ids = ids[discard]
214
215    relabel_fn: Optional[BlockFn] = None
216    if relabel:
217        # Reserve 0 for background and map the surviving foreground ids to 1..K consecutively, so a
218        # surviving object can never collide with the (possibly newly introduced) background 0.
219        remaining_fg = ids[(~discard) & (ids != 0)]
220        mapping: Dict[int, int] = {int(v): i for i, v in enumerate(remaining_fg.tolist(), start=1)}
221        mapping[0] = 0
222        relabel_fn = _make_size_relabel(mapping)
223
224    return segmentation_filter(input, _make_size_filter(filter_ids), output, relabel=relabel_fn,
225                               block_shape=block_shape, job_type=job_type, job_config=job_config,
226                               num_workers=num_workers, mask=mask)

Remove objects smaller than min_size and/or larger than max_size from a segmentation.

Multi-stage (a global unique count reduction, then a filter pass), so it does not accept block_ids / resume_from. By default it relabels the result consecutively; pass relabel=False to keep the original ids of the surviving objects.

Args: input: The input segmentation (a numpy/zarr/n5 array or a Source); must be integer-typed. output: The output array to write into. Optional for local execution -- a numpy array matching the input shape and dtype is allocated and returned if omitted; required for distributed execution (a writable, file-backed zarr/n5 array). min_size: The minimum object size; smaller objects are removed. At least one of min_size / max_size is required. max_size: The maximum object size; larger objects are removed. relabel: Whether to relabel the surviving objects consecutively after filtering. block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. Required when a mask is given (the size reduction is block-wise). job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). mask: Optional binary mask; out-of-mask output voxels are left unchanged.

Returns: The output array (the provided output, or a newly allocated numpy array).

def stitch_segmentation( input: 'SourceLike', segmentation_function: Callable, tile_shape: Tuple[int, ...], tile_overlap: Tuple[int, ...], output: 'Optional[SourceLike]' = None, *, beta: float = 0.5, shape: Optional[Tuple[int, ...]] = None, with_background: bool = True, num_workers: int = 1, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, return_before_stitching: bool = False) -> 'Union[SourceLike, Tuple[SourceLike, np.ndarray]]':
411def stitch_segmentation(
412    input: SourceLike,
413    segmentation_function: Callable,
414    tile_shape: Tuple[int, ...],
415    tile_overlap: Tuple[int, ...],
416    output: Optional[SourceLike] = None,
417    *,
418    beta: float = 0.5,
419    shape: Optional[Tuple[int, ...]] = None,
420    with_background: bool = True,
421    num_workers: int = 1,
422    job_type: str = "local",
423    job_config: Optional[RunnerConfig] = None,
424    return_before_stitching: bool = False,
425) -> Union[SourceLike, Tuple[SourceLike, np.ndarray]]:
426    """Run a segmentation function tile-wise and stitch the results based on overlap.
427
428    Each tile is read with a halo (``tile_shape + 2 * tile_overlap``) and segmented independently;
429    the halo region is where the overlap with the neighbouring tile's segmentation of the same
430    pixels is measured. Objects that overlap strongly there are merged via a multicut over the
431    region adjacency graph of the per-tile labeling.
432
433    Args:
434        input: The input data (a numpy/zarr/n5 array or a `Source`). If it has channels they must be
435            the last (trailing) axes, and `shape` must give the spatial shape.
436        segmentation_function: The per-tile segmentation function with signature
437            ``f(tile_input, tile_id) -> labels``. It receives the haloed tile input and the tile id
438            (passed in case the segmentation depends on the tile; ignore it otherwise) and returns a
439            label image of the tile's (haloed) spatial shape. It is cloudpickled for distributed
440            execution, so it must be picklable.
441        tile_shape: The shape of the individual tiles.
442        tile_overlap: The halo added on each side of a tile; the input to the segmentation function
443            has size ``tile_shape + 2 * tile_overlap``, and the overlap is measured in the halo.
444        output: The ``uint64`` output array. Optional for local execution — a numpy array is
445            allocated and returned if omitted; **required** (file-backed) for distributed execution.
446        beta: The boundary bias of the multicut; ``> 0.5`` biases towards over-segmentation,
447            ``< 0.5`` towards under-segmentation. Must be in the exclusive range ``(0, 1)``.
448        shape: The spatial shape of the segmentation. Defaults to the input shape; must be passed if
449            the input has trailing channel axes.
450        with_background: Whether the problem has a background label (hard-coded ``0``) that must not
451            be merged with foreground objects.
452        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
453            backends) for the tile-segmentation and overlap-counting phases.
454        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
455        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). For distributed
456            backends a temporary per-tile store is created under ``job_config.tmp_root``.
457        return_before_stitching: Also return the (relabeled) pre-stitch segmentation, for debugging.
458
459    Returns:
460        The output array with the stitched segmentation, or ``(output, pre_stitch)`` if
461        ``return_before_stitching`` is set (``pre_stitch`` is an in-memory numpy array).
462    """
463    src = as_source(input)
464    in_ndim = src.ndim
465    shape = tuple(int(s) for s in (src.shape if shape is None else shape))
466    ndim = len(shape)
467    tile_shape = tuple(int(t) for t in tile_shape)
468    tile_overlap = tuple(int(t) for t in tile_overlap)
469
470    out_array = _prepare_output(output, shape, job_type)
471    out = as_source(out_array)
472    if out.dtype != np.dtype("uint64"):
473        raise ValueError(f"output must have dtype uint64, got {out.dtype}.")
474
475    blocking = get_blocking(shape, tile_shape)
476    n_blocks = int(blocking.number_of_blocks)
477    max_halo = tuple(ts + 2 * ov for ts, ov in zip(tile_shape, tile_overlap))
478    offset_factor = int(np.prod(max_halo))
479    if n_blocks * offset_factor >= int(np.iinfo(np.uint64).max):
480        raise ValueError("Label id overflow: number_of_blocks * prod(haloed tile shape) exceeds "
481                         "uint64. Reduce the tile shape or the volume size.")
482
483    runner = get_runner(job_type, job_config)
484    input_handle = _capture(src, job_type)
485    store, store_cleanup, store_handle = _make_tile_store(n_blocks, max_halo, job_type, job_config)
486    try:
487        # Stage 1: segment each haloed tile, offset its ids, write the inner block + the store slot.
488        stage1 = _make_segment(shape, tile_shape, tile_overlap, segmentation_function,
489                               with_background, offset_factor, store_handle, input_handle,
490                               ndim, in_ndim)
491        id_results = runner.run(stage1, [], outputs=[out_array], block_shape=tile_shape,
492                                halo=tile_overlap, num_workers=num_workers, has_return_val=True,
493                                name="stitch-segment")
494        id_arrays = [a for a in id_results if a is not None and len(a)]
495        real = (np.unique(np.concatenate(id_arrays)) if id_arrays
496                else np.zeros((0,), dtype="uint64"))
497
498        # Build the dense relabeling (offset ids are sparse; this keeps the RAG node space compact).
499        mapping = {0: 0}
500        for i, lab in enumerate(real.tolist()):
501            mapping[int(lab)] = i + 1
502
503        # Stage 2: apply the dense relabeling to the pre-stitch output in place.
504        if real.size:
505            runner.run(_make_relabel(mapping), [], outputs=[out_array], block_shape=tile_shape,
506                       num_workers=num_workers, has_return_val=False, name="stitch-densify")
507
508        # Stage 3: count object overlaps in the halo bands, reading haloed tiles from the store.
509        overlap_fn = _make_seg_overlap(shape, tile_shape, tile_overlap, store_handle, ndim)
510        ov_results = runner.run(overlap_fn, [out_array], block_shape=tile_shape,
511                                num_workers=num_workers, has_return_val=True, name="stitch-overlaps")
512    finally:
513        store_cleanup()
514
515    uv, frac = _collect_edges(ov_results)
516    uv, frac = _map_edges(uv, frac, mapping)  # overlap ids are offset ids -> map to dense.
517
518    seg = out[full_roi(ndim)]
519    stitched = _stitch_via_multicut(seg, uv, frac, with_background, beta, n_threads=None)
520
521    # Relabel to consecutive ids (elf semantics): keep 0 as background, or renumber 0 too when there
522    # is no background.
523    if with_background:
524        stitched, _, _ = bic.segmentation.relabel_sequential(stitched.astype("uint64"), offset=1)
525    else:
526        stitched, _, _ = bic.segmentation.relabel_sequential(stitched.astype("uint64") + 1, offset=1)
527
528    pre_stitch = seg.copy() if return_before_stitching else None
529    out[full_roi(ndim)] = stitched.astype(out.dtype, copy=False)
530    if return_before_stitching:
531        return out_array, pre_stitch
532    return out_array

Run a segmentation function tile-wise and stitch the results based on overlap.

Each tile is read with a halo (tile_shape + 2 * tile_overlap) and segmented independently; the halo region is where the overlap with the neighbouring tile's segmentation of the same pixels is measured. Objects that overlap strongly there are merged via a multicut over the region adjacency graph of the per-tile labeling.

Args: input: The input data (a numpy/zarr/n5 array or a Source). If it has channels they must be the last (trailing) axes, and shape must give the spatial shape. segmentation_function: The per-tile segmentation function with signature f(tile_input, tile_id) -> labels. It receives the haloed tile input and the tile id (passed in case the segmentation depends on the tile; ignore it otherwise) and returns a label image of the tile's (haloed) spatial shape. It is cloudpickled for distributed execution, so it must be picklable. tile_shape: The shape of the individual tiles. tile_overlap: The halo added on each side of a tile; the input to the segmentation function has size tile_shape + 2 * tile_overlap, and the overlap is measured in the halo. output: The uint64 output array. Optional for local execution — a numpy array is allocated and returned if omitted; required (file-backed) for distributed execution. beta: The boundary bias of the multicut; > 0.5 biases towards over-segmentation, < 0.5 towards under-segmentation. Must be in the exclusive range (0, 1). shape: The spatial shape of the segmentation. Defaults to the input shape; must be passed if the input has trailing channel axes. with_background: Whether the problem has a background label (hard-coded 0) that must not be merged with foreground objects. num_workers: Number of parallel workers (threads for local, tasks for distributed backends) for the tile-segmentation and overlap-counting phases. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). For distributed backends a temporary per-tile store is created under job_config.tmp_root. return_before_stitching: Also return the (relabeled) pre-stitch segmentation, for debugging.

Returns: The output array with the stitched segmentation, or (output, pre_stitch) if return_before_stitching is set (pre_stitch is an in-memory numpy array).

def stitch_tiled_segmentation( segmentation: 'SourceLike', tile_shape: Tuple[int, ...], output: 'Optional[SourceLike]' = None, *, overlap: int = 1, with_background: bool = True, beta: float = 0.5, num_workers: int = 1, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None) -> 'SourceLike':
352def stitch_tiled_segmentation(
353    segmentation: SourceLike,
354    tile_shape: Tuple[int, ...],
355    output: Optional[SourceLike] = None,
356    *,
357    overlap: int = 1,
358    with_background: bool = True,
359    beta: float = 0.5,
360    num_workers: int = 1,
361    job_type: str = "local",
362    job_config: Optional[RunnerConfig] = None,
363) -> SourceLike:
364    """Stitch a segmentation that is already split into tiles with unique ids per tile.
365
366    The ids in the tiles of the input have to be unique (the segmentations are separate across
367    tiles). Objects that touch across a tile interface are merged based on how strongly they overlap
368    there, via a multicut over the region adjacency graph of the tiled segmentation.
369
370    Args:
371        segmentation: The input tiled segmentation (a numpy/zarr/n5 array or a `Source`); must be
372            integer-typed with ids unique across tiles.
373        tile_shape: The shape of the tiles (the block shape of the tiling).
374        output: The ``uint64`` output array. Optional for local execution — a numpy array is
375            allocated and returned if omitted; **required** (file-backed) for distributed execution.
376        overlap: The thickness (in pixels) of the tile-interface slab used to measure object overlap.
377        with_background: Whether the problem has a background label (hard-coded ``0``) that must not
378            be merged with foreground objects.
379        beta: The boundary bias of the multicut; ``> 0.5`` biases towards over-segmentation,
380            ``< 0.5`` towards under-segmentation.
381        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
382            backends) for the overlap-counting phase.
383        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
384        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
385
386    Returns:
387        The output array with merged labels.
388    """
389    src = as_source(segmentation)
390    if not np.issubdtype(np.dtype(src.dtype), np.integer):
391        raise ValueError(f"stitch_tiled_segmentation expects an integer label image, got {src.dtype}.")
392    ndim = src.ndim
393    shape = tuple(int(s) for s in src.shape)
394    tile_shape = tuple(int(t) for t in tile_shape)
395    out_array = _prepare_output(output, shape, job_type)
396
397    runner = get_runner(job_type, job_config)
398    overlap_fn = _make_tiled_overlap(shape, tile_shape, int(overlap), ndim)
399    results = runner.run(overlap_fn, [segmentation], block_shape=tile_shape, num_workers=num_workers,
400                         has_return_val=True, name="stitch-overlaps")
401    uv, frac = _collect_edges(results)
402
403    seg = src[full_roi(ndim)]
404    stitched = _stitch_via_multicut(seg, uv, frac, with_background, beta, n_threads=None)
405
406    out = as_source(out_array)
407    out[full_roi(ndim)] = stitched.astype(out.dtype, copy=False)
408    return out_array

Stitch a segmentation that is already split into tiles with unique ids per tile.

The ids in the tiles of the input have to be unique (the segmentations are separate across tiles). Objects that touch across a tile interface are merged based on how strongly they overlap there, via a multicut over the region adjacency graph of the tiled segmentation.

Args: segmentation: The input tiled segmentation (a numpy/zarr/n5 array or a Source); must be integer-typed with ids unique across tiles. tile_shape: The shape of the tiles (the block shape of the tiling). output: The uint64 output array. Optional for local execution — a numpy array is allocated and returned if omitted; required (file-backed) for distributed execution. overlap: The thickness (in pixels) of the tile-interface slab used to measure object overlap. with_background: Whether the problem has a background label (hard-coded 0) that must not be merged with foreground objects. beta: The boundary bias of the multicut; > 0.5 biases towards over-segmentation, < 0.5 towards under-segmentation. num_workers: Number of parallel workers (threads for local, tasks for distributed backends) for the overlap-counting phase. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig).

Returns: The output array with merged labels.

def compute_edge_costs( probs: numpy.ndarray, edge_sizes: Optional[numpy.ndarray] = None, z_edge_mask: Optional[numpy.ndarray] = None, beta: float = 0.5, weighting_scheme: Optional[str] = None, weighting_exponent: float = 1.0) -> numpy.ndarray:
 87def compute_edge_costs(
 88    probs: np.ndarray,
 89    edge_sizes: Optional[np.ndarray] = None,
 90    z_edge_mask: Optional[np.ndarray] = None,
 91    beta: float = 0.5,
 92    weighting_scheme: Optional[str] = None,
 93    weighting_exponent: float = 1.0,
 94) -> np.ndarray:
 95    """Compute multicut edge costs from probabilities with a pre-defined weighting scheme.
 96
 97    Args:
 98        probs: The input edge probabilities, in ``[0, 1]``.
 99        edge_sizes: The sizes of the edges; required for all weighting schemes except ``None`` /
100            ``"none"``.
101        z_edge_mask: A boolean mask of inter-slice edges; required for the ``"xyz"`` and ``"z"``
102            schemes (for flat superpixels in a 3d problem).
103        beta: The boundary bias (see `transform_probabilities_to_costs`).
104        weighting_scheme: How to weight the costs by edge size; one of ``None``, ``"none"``,
105            ``"all"``, ``"xyz"`` or ``"z"``.
106        weighting_exponent: The exponent applied to the normalized edge sizes when weighting.
107
108    Returns:
109        The edge costs.
110
111    Raises:
112        ValueError: If ``weighting_scheme`` is unknown or a scheme's required inputs are missing.
113    """
114    schemes = (None, "all", "none", "xyz", "z")
115    if weighting_scheme not in schemes:
116        schemes_str = ", ".join([str(scheme) for scheme in schemes])
117        raise ValueError(f"Weighting scheme must be one of {schemes_str}, got {weighting_scheme}.")
118
119    if weighting_scheme is None or weighting_scheme == "none":
120        edge_pop = edge_sizes_ = None
121
122    elif weighting_scheme == "all":
123        if edge_sizes is None:
124            raise ValueError("Need edge sizes for weighting scheme 'all'.")
125        if len(edge_sizes) != len(probs):
126            raise ValueError("Invalid edge sizes.")
127        edge_sizes_ = edge_sizes
128        edge_pop = None
129
130    elif weighting_scheme == "xyz":
131        if edge_sizes is None or z_edge_mask is None:
132            raise ValueError("Need edge sizes and z edge mask for weighting scheme 'xyz'.")
133        if len(edge_sizes) != len(probs) or len(z_edge_mask) != len(probs):
134            raise ValueError("Invalid edge sizes or z edge mask.")
135        edge_pop = [z_edge_mask, np.logical_not(z_edge_mask)]
136        edge_sizes_ = edge_sizes
137
138    elif weighting_scheme == "z":
139        if edge_sizes is None or z_edge_mask is None:
140            raise ValueError("Need edge sizes and z edge mask for weighting scheme 'z'.")
141        if len(edge_sizes) != len(probs) or len(z_edge_mask) != len(probs):
142            raise ValueError("Invalid edge sizes or z edge mask.")
143        edge_pop = [z_edge_mask, np.logical_not(z_edge_mask)]
144        edge_sizes_ = edge_sizes.copy()
145        edge_sizes_[edge_pop[1]] = 1.0
146
147    return transform_probabilities_to_costs(
148        probs, beta=beta, edge_sizes=edge_sizes_, edge_populations=edge_pop,
149        weighting_exponent=weighting_exponent,
150    )

Compute multicut edge costs from probabilities with a pre-defined weighting scheme.

Args: probs: The input edge probabilities, in [0, 1]. edge_sizes: The sizes of the edges; required for all weighting schemes except None / "none". z_edge_mask: A boolean mask of inter-slice edges; required for the "xyz" and "z" schemes (for flat superpixels in a 3d problem). beta: The boundary bias (see transform_probabilities_to_costs). weighting_scheme: How to weight the costs by edge size; one of None, "none", "all", "xyz" or "z". weighting_exponent: The exponent applied to the normalized edge sizes when weighting.

Returns: The edge costs.

Raises: ValueError: If weighting_scheme is unknown or a scheme's required inputs are missing.

def transform_probabilities_to_costs( probs: numpy.ndarray, beta: float = 0.5, edge_sizes: Optional[numpy.ndarray] = None, edge_populations: Optional[List[numpy.ndarray]] = None, weighting_exponent: float = 1.0) -> numpy.ndarray:
47def transform_probabilities_to_costs(
48    probs: np.ndarray,
49    beta: float = 0.5,
50    edge_sizes: Optional[np.ndarray] = None,
51    edge_populations: Optional[List[np.ndarray]] = None,
52    weighting_exponent: float = 1.0,
53) -> np.ndarray:
54    """Transform merge probabilities to multicut costs via the negative log-likelihood.
55
56    Probabilities near ``1`` map to large positive costs (attractive, likely same segment) and
57    probabilities near ``0`` to large negative costs (repulsive). The boundary bias ``beta``
58    shifts the decision threshold.
59
60    Args:
61        probs: The input edge probabilities, in ``[0, 1]``.
62        beta: The boundary bias term; ``> 0.5`` biases towards over-segmentation (more cuts),
63            ``< 0.5`` towards under-segmentation. Must be in the exclusive range ``(0, 1)``.
64        edge_sizes: The sizes of the edges, used for weighting if given.
65        edge_populations: Disjoint edge populations (lists of masks or index arrays) that are
66            size-weighted independently, e.g. flat superpixels in a 3d problem.
67        weighting_exponent: The exponent applied to the normalized edge sizes when weighting.
68
69    Returns:
70        The edge costs.
71    """
72    p_min = 0.001
73    p_max = 1.0 - p_min
74    costs = (p_max - p_min) * probs + p_min
75    # Probabilities to costs; the second term is the boundary bias.
76    costs = np.log((1.0 - costs) / costs) + np.log((1.0 - beta) / beta)
77    # Weight the costs with edge sizes, if they are given.
78    if edge_sizes is not None:
79        assert len(edge_sizes) == len(costs)
80        if edge_populations is None:
81            costs = _weight_edges(costs, edge_sizes, weighting_exponent)
82        else:
83            costs = _weight_populations(costs, edge_sizes, edge_populations, weighting_exponent)
84    return costs

Transform merge probabilities to multicut costs via the negative log-likelihood.

Probabilities near 1 map to large positive costs (attractive, likely same segment) and probabilities near 0 to large negative costs (repulsive). The boundary bias beta shifts the decision threshold.

Args: probs: The input edge probabilities, in [0, 1]. beta: The boundary bias term; > 0.5 biases towards over-segmentation (more cuts), < 0.5 towards under-segmentation. Must be in the exclusive range (0, 1). edge_sizes: The sizes of the edges, used for weighting if given. edge_populations: Disjoint edge populations (lists of masks or index arrays) that are size-weighted independently, e.g. flat superpixels in a 3d problem. weighting_exponent: The exponent applied to the normalized edge sizes when weighting.

Returns: The edge costs.

def multicut_decomposition( graph, costs: numpy.ndarray, n_threads: int = 1, internal_solver: str = 'kernighan-lin') -> numpy.ndarray:
218def multicut_decomposition(
219    graph,
220    costs: np.ndarray,
221    n_threads: int = 1,
222    internal_solver: str = "kernighan-lin",
223) -> np.ndarray:
224    """Solve the multicut problem with the decomposition solver.
225
226    The graph is split into its connected components after removing strongly repulsive edges, each
227    component is solved independently (in parallel) with ``internal_solver``, and the solutions are
228    combined. Introduced in "Break and Conquer: Efficient Correlation Clustering for Image
229    Segmentation" (https://link.springer.com/chapter/10.1007/978-3-642-39140-8_9).
230
231    Args:
232        graph: The graph (or region adjacency graph) of the multicut problem.
233        costs: The edge costs of the multicut problem.
234        n_threads: The number of threads used to solve sub-problems in parallel.
235        internal_solver: The name of the solver used for the sub-problems; one of
236            ``"kernighan-lin"``, ``"greedy-additive"`` or ``"greedy-fixation"``.
237
238    Returns:
239        The node label solution to the multicut problem.
240    """
241    objective = _to_objective(graph, costs)
242    solver = bic.graph.multicut.MulticutDecomposer(
243        sub_solver=_get_solver(internal_solver),
244        fallthrough_solver=_get_solver(internal_solver),
245        number_of_threads=n_threads,
246    )
247    return solver.optimize(objective)

Solve the multicut problem with the decomposition solver.

The graph is split into its connected components after removing strongly repulsive edges, each component is solved independently (in parallel) with internal_solver, and the solutions are combined. Introduced in "Break and Conquer: Efficient Correlation Clustering for Image Segmentation" (https://link.springer.com/chapter/10.1007/978-3-642-39140-8_9).

Args: graph: The graph (or region adjacency graph) of the multicut problem. costs: The edge costs of the multicut problem. n_threads: The number of threads used to solve sub-problems in parallel. internal_solver: The name of the solver used for the sub-problems; one of "kernighan-lin", "greedy-additive" or "greedy-fixation".

Returns: The node label solution to the multicut problem.

def multicut_gaec(graph, costs: numpy.ndarray) -> numpy.ndarray:
201def multicut_gaec(graph, costs: np.ndarray) -> np.ndarray:
202    """Solve the multicut problem with the greedy-additive edge contraction solver.
203
204    Introduced in "Fusion moves for correlation clustering"
205    (http://openaccess.thecvf.com/content_cvpr_2015/papers/Beier_Fusion_Moves_for_2015_CVPR_paper.pdf).
206
207    Args:
208        graph: The graph (or region adjacency graph) of the multicut problem.
209        costs: The edge costs of the multicut problem.
210
211    Returns:
212        The node label solution to the multicut problem.
213    """
214    objective = _to_objective(graph, costs)
215    return bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)

Solve the multicut problem with the greedy-additive edge contraction solver.

Introduced in "Fusion moves for correlation clustering" (http://openaccess.thecvf.com/content_cvpr_2015/papers/Beier_Fusion_Moves_for_2015_CVPR_paper.pdf).

Args: graph: The graph (or region adjacency graph) of the multicut problem. costs: The edge costs of the multicut problem.

Returns: The node label solution to the multicut problem.

def multicut_kernighan_lin(graph, costs: numpy.ndarray, warmstart: bool = True) -> numpy.ndarray:
176def multicut_kernighan_lin(graph, costs: np.ndarray, warmstart: bool = True) -> np.ndarray:
177    """Solve the multicut problem with the Kernighan-Lin solver.
178
179    Introduced in "An efficient heuristic procedure for partitioning graphs"
180    (http://xilinx.asia/_hdl/4/eda.ee.ucla.edu/EE201A-04Spring/kl.pdf).
181
182    Args:
183        graph: The graph (or region adjacency graph) of the multicut problem.
184        costs: The edge costs of the multicut problem.
185        warmstart: Whether to warmstart with the greedy-additive solution.
186
187    Returns:
188        The node label solution to the multicut problem.
189    """
190    objective = _to_objective(graph, costs)
191    if warmstart:
192        solver = bic.graph.multicut.ChainedMulticutSolvers([
193            bic.graph.multicut.GreedyAdditiveMulticut(),
194            bic.graph.multicut.KernighanLinMulticut(),
195        ])
196    else:
197        solver = bic.graph.multicut.KernighanLinMulticut()
198    return solver.optimize(objective)

Solve the multicut problem with the Kernighan-Lin solver.

Introduced in "An efficient heuristic procedure for partitioning graphs" (http://xilinx.asia/_hdl/4/eda.ee.ucla.edu/EE201A-04Spring/kl.pdf).

Args: graph: The graph (or region adjacency graph) of the multicut problem. costs: The edge costs of the multicut problem. warmstart: Whether to warmstart with the greedy-additive solution.

Returns: The node label solution to the multicut problem.