bioimage_py.segmentation
Segmentation: connected-component labeling and related operations.
1"""Segmentation: connected-component labeling and related operations.""" 2from .label import label 3from .multicut import (compute_edge_costs, multicut_decomposition, multicut_gaec, 4 multicut_kernighan_lin, transform_probabilities_to_costs) 5from .relabel import relabel_consecutive 6from .size_filter import segmentation_filter, size_filter 7from .stitching import stitch_segmentation, stitch_tiled_segmentation 8from .watershed import watershed 9 10__all__ = [ 11 "label", 12 "watershed", 13 "relabel_consecutive", 14 "segmentation_filter", 15 "size_filter", 16 "stitch_segmentation", 17 "stitch_tiled_segmentation", 18 "compute_edge_costs", 19 "transform_probabilities_to_costs", 20 "multicut_decomposition", 21 "multicut_gaec", 22 "multicut_kernighan_lin", 23]
127def label( 128 input: SourceLike, 129 output: Optional[SourceLike] = None, 130 *, 131 threshold: Optional[float] = None, 132 connectivity: Optional[int] = None, 133 block_shape: Optional[Tuple[int, ...]] = None, 134 job_type: str = "local", 135 job_config: Optional[RunnerConfig] = None, 136 num_workers: int = 1, 137 mask: Optional[SourceLike] = None, 138) -> SourceLike: 139 """Label connected components of (optionally thresholded) data, block-wise. 140 141 Unlike the single-pass operations, ``label`` is multi-stage with a global cross-block merge 142 (per-block labeling, then a union-find over touching components across block faces), so it 143 does **not** accept ``block_ids`` or ``resume_from``: a failed run must be re-run whole (it is 144 idempotent given the same ``output``). 145 146 Args: 147 input: The input data (a numpy/zarr/n5 array or a `Source`). 148 output: The ``uint64`` output array to write the labels into. Optional for local 149 execution — a numpy array is allocated and returned if omitted; **required** for 150 distributed execution. 151 threshold: If given, the input is binarized as ``input > threshold``; otherwise the 152 input is treated as a binary foreground mask. 153 connectivity: Neighbour connectivity in ``[1, ndim]`` (``1`` = orthogonal). Defaults 154 to ``1``; values ``> 1`` are only supported for the direct (single-block) path. 155 block_shape: Shape of the processing blocks. Defaults to the input/output chunk shape; 156 required for unchunked data. 157 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 158 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 159 num_workers: Number of parallel workers (threads for ``local``, tasks for distributed 160 backends). 161 mask: Optional binary mask; values outside the mask are excluded from the foreground. 162 163 Returns: 164 The output array (the provided ``output``, or a newly allocated numpy array), labeled 165 with consecutive ids (background stays ``0``). 166 """ 167 src = as_source(input) 168 ndim = src.ndim 169 conn = 1 if connectivity is None else int(connectivity) 170 if not 1 <= conn <= ndim: 171 raise ValueError(f"connectivity must be in [1, {ndim}], got {conn}.") 172 173 direct = is_direct(job_type, num_workers, block_shape) and mask is None 174 if conn > 1 and not direct: 175 raise NotImplementedError( 176 "Block-wise labeling only supports connectivity=1 (orthogonal). Use the direct " 177 "path (local, single worker, no block_shape, no mask) for higher connectivity." 178 ) 179 180 if output is None: 181 if job_type != "local": 182 raise ValueError( 183 f"'output' is required for distributed execution (job_type={job_type!r}); " 184 "pass a file-backed (zarr/n5) output array." 185 ) 186 out_array: SourceLike = np.zeros(tuple(src.shape), dtype="uint64") 187 else: 188 out_array = output 189 190 out = as_source(out_array) 191 if out.dtype != np.dtype("uint64"): 192 raise ValueError(f"output must have dtype uint64, got {out.dtype}.") 193 194 if direct: 195 binary = _binarize(src[full_roi(ndim)], threshold) 196 comp = bic.segmentation.label(binary, connectivity=conn).astype("uint64", copy=False) 197 out[full_roi(ndim)] = comp 198 return out_array 199 200 block_shape = _resolve_block_shape(src, out, block_shape) 201 offset_factor = int(np.prod(block_shape)) 202 blocking = get_blocking(src.shape, block_shape) 203 n_blocks = int(blocking.number_of_blocks) 204 if (n_blocks * offset_factor) >= int(np.iinfo(np.uint64).max): 205 raise ValueError( 206 "Label id overflow: number_of_blocks * prod(block_shape) exceeds uint64. " 207 "Reduce the block shape or the volume size." 208 ) 209 210 runner = get_runner(job_type, job_config) 211 212 # Stage 1: label each block independently with a globally-unique offset. 213 stage1 = _make_stage1(tuple(src.shape), block_shape, conn, threshold, offset_factor) 214 id_results = runner.run(stage1, [input], outputs=[out_array], block_shape=block_shape, 215 mask=mask, num_workers=num_workers, has_return_val=True, 216 name="label-blocks") 217 id_arrays = [a for a in id_results if a is not None and len(a)] 218 real_labels = np.unique(np.concatenate(id_arrays)) if id_arrays else np.zeros((0,), dtype="uint64") 219 max_label = int(real_labels.max()) if real_labels.size else 0 220 221 # Stage 2: collect label equivalences across lower block faces. 222 stage2 = _make_stage2(tuple(src.shape), block_shape) 223 pair_results = runner.run(stage2, [out_array], block_shape=block_shape, 224 num_workers=num_workers, has_return_val=True, name="merge-faces") 225 pairs = [p for p in pair_results if p is not None] 226 assignments = (np.unique(np.concatenate(pairs, axis=0), axis=0) 227 if pairs else np.zeros((0, 2), dtype="uint64")) 228 229 # Stage 3 (in process): union-find merge, then relabel only the labels that exist to 230 # consecutive ids (the offset space is sparse, so relabeling over it would not be compact). 231 uf = bic.utils.UnionFind(max_label + 1) 232 if len(assignments): 233 uf.merge(assignments.astype("uint64")) 234 mapping: Dict[int, int] = {0: 0} 235 if real_labels.size: 236 roots = np.asarray(uf.find(real_labels.astype("uint64"))) 237 root_to_new = {int(r): i + 1 for i, r in enumerate(np.unique(roots).tolist())} 238 for lab, root in zip(real_labels.tolist(), roots.tolist()): 239 mapping[int(lab)] = root_to_new[int(root)] 240 241 # Stage 4: apply the mapping in place. 242 stage4 = _make_stage4(mapping) 243 runner.run(stage4, [], outputs=[out_array], block_shape=block_shape, 244 num_workers=num_workers, has_return_val=False, name="relabel") 245 return out_array
Label connected components of (optionally thresholded) data, block-wise.
Unlike the single-pass operations, label is multi-stage with a global cross-block merge
(per-block labeling, then a union-find over touching components across block faces), so it
does not accept block_ids or resume_from: a failed run must be re-run whole (it is
idempotent given the same output).
Args:
input: The input data (a numpy/zarr/n5 array or a Source).
output: The uint64 output array to write the labels into. Optional for local
execution — a numpy array is allocated and returned if omitted; required for
distributed execution.
threshold: If given, the input is binarized as input > threshold; otherwise the
input is treated as a binary foreground mask.
connectivity: Neighbour connectivity in [1, ndim] (1 = orthogonal). Defaults
to 1; values > 1 are only supported for the direct (single-block) path.
block_shape: Shape of the processing blocks. Defaults to the input/output chunk shape;
required for unchunked data.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
num_workers: Number of parallel workers (threads for local, tasks for distributed
backends).
mask: Optional binary mask; values outside the mask are excluded from the foreground.
Returns:
The output array (the provided output, or a newly allocated numpy array), labeled
with consecutive ids (background stays 0).
67def watershed( 68 input: SourceLike, 69 seeds: SourceLike, 70 output: Optional[SourceLike] = None, 71 *, 72 halo: Optional[Sequence[int]] = None, 73 block_shape: Optional[Tuple[int, ...]] = None, 74 job_type: str = "local", 75 job_config: Optional[RunnerConfig] = None, 76 num_workers: int = 1, 77 mask: Optional[SourceLike] = None, 78 block_ids: Optional[Sequence[int]] = None, 79 resume_from: Optional[str] = None, 80) -> SourceLike: 81 """Compute a seeded watershed over a height map, block-wise. 82 83 Each block runs a seeded watershed (``bioimage_cpp.segmentation.watershed``) on a halo-padded 84 region and writes back the halo-free inner block. The ``seeds`` define the segments and their 85 ids are preserved verbatim -- there is no cross-block merge, so pass globally consistent seeds 86 (e.g. a connected-component labeling of a seed mask) for a coherent result. 87 88 Being single-stage, this operation supports ``block_ids`` / ``resume_from`` re-runs. The 89 block-wise output matches a whole-array watershed only where ``halo`` covers the relevant 90 catchment basins, but is bit-identical across backends for a fixed ``(block_shape, halo)``. 91 92 Args: 93 input: The height map (a numpy/zarr/n5 array or a `Source`). It is cast to ``float32`` for 94 the watershed if it is not already a float type. 95 seeds: The pre-computed integer seed markers (``0`` = background), same shape as ``input``. 96 A non-integer seed map is cast to ``uint32``. 97 output: The integer output array to write the segmentation into. Optional for local 98 execution -- a ``uint64`` numpy array is allocated and returned if omitted; **required** 99 for distributed execution. It must be wide enough to hold the seed ids (``uint64`` 100 recommended; a ``uint32`` watershed result writes losslessly into a ``uint64`` output). 101 halo: Per-axis halo enlarging each block; **required** for the block-wise path (there is no 102 principled default for a watershed). Choose it large enough to cover object extents at 103 block boundaries. Ignored by the direct (single-block) path. 104 block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required 105 for unchunked data. 106 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 107 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 108 num_workers: Number of parallel workers (threads for ``local``, tasks for distributed 109 backends). 110 mask: Optional binary mask; voxels outside the mask are excluded and stay ``0``. 111 block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks 112 into the existing ``output``). Mutually exclusive with ``resume_from``. 113 resume_from: Distributed only; the preserved temp folder of a failed run to resume (see 114 ``runner.run``); the missing blocks are written into ``output``. Mutually exclusive 115 with ``block_ids``. 116 117 Returns: 118 The output array (the provided ``output``, or a newly allocated ``uint64`` numpy array). 119 """ 120 check_rerun_args(job_type, resume_from, block_ids) 121 122 src = as_source(input) 123 seeds_src = as_source(seeds) 124 ndim = src.ndim 125 if tuple(seeds_src.shape) != tuple(src.shape): 126 raise ValueError( 127 f"seeds shape {seeds_src.shape} does not match input shape {src.shape}." 128 ) 129 130 # A subset/resume rerun is inherently block-wise, so it cannot use the direct (whole-array) path. 131 direct = (is_direct(job_type, num_workers, block_shape) 132 and block_ids is None and resume_from is None) 133 134 if output is None: 135 if job_type != "local": 136 raise ValueError( 137 f"'output' is required for distributed execution (job_type={job_type!r}); " 138 "pass a file-backed (zarr/n5) output array." 139 ) 140 out_array: SourceLike = np.zeros(tuple(src.shape), dtype="uint64") 141 else: 142 out_array = output 143 144 out = as_source(out_array) 145 if not np.issubdtype(out.dtype, np.integer): 146 raise ValueError(f"output must have an integer dtype, got {out.dtype}.") 147 148 if direct: 149 block_hmap = _as_hmap(src[full_roi(ndim)]) 150 block_seeds = _as_seeds(seeds_src[full_roi(ndim)]) 151 block_mask = as_source(mask)[full_roi(ndim)].astype(bool) if mask is not None else None 152 out[full_roi(ndim)] = bic.segmentation.watershed(block_hmap, block_seeds, mask=block_mask) 153 return out_array 154 155 if halo is None: 156 raise ValueError( 157 "halo is required for block-wise watershed; choose one large enough to cover object " 158 "extents at block boundaries." 159 ) 160 161 runner = get_runner(job_type, job_config) 162 runner.run(_watershed_block, [input, seeds], outputs=[out_array], halo=halo, 163 block_shape=block_shape, mask=mask, num_workers=num_workers, 164 block_ids=block_ids, resume_from=resume_from, name="watershed") 165 return out_array
Compute a seeded watershed over a height map, block-wise.
Each block runs a seeded watershed (bioimage_cpp.segmentation.watershed) on a halo-padded
region and writes back the halo-free inner block. The seeds define the segments and their
ids are preserved verbatim -- there is no cross-block merge, so pass globally consistent seeds
(e.g. a connected-component labeling of a seed mask) for a coherent result.
Being single-stage, this operation supports block_ids / resume_from re-runs. The
block-wise output matches a whole-array watershed only where halo covers the relevant
catchment basins, but is bit-identical across backends for a fixed (block_shape, halo).
Args:
input: The height map (a numpy/zarr/n5 array or a Source). It is cast to float32 for
the watershed if it is not already a float type.
seeds: The pre-computed integer seed markers (0 = background), same shape as input.
A non-integer seed map is cast to uint32.
output: The integer output array to write the segmentation into. Optional for local
execution -- a uint64 numpy array is allocated and returned if omitted; required
for distributed execution. It must be wide enough to hold the seed ids (uint64
recommended; a uint32 watershed result writes losslessly into a uint64 output).
halo: Per-axis halo enlarging each block; required for the block-wise path (there is no
principled default for a watershed). Choose it large enough to cover object extents at
block boundaries. Ignored by the direct (single-block) path.
block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
for unchunked data.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
num_workers: Number of parallel workers (threads for local, tasks for distributed
backends).
mask: Optional binary mask; voxels outside the mask are excluded and stay 0.
block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks
into the existing output). Mutually exclusive with resume_from.
resume_from: Distributed only; the preserved temp folder of a failed run to resume (see
runner.run); the missing blocks are written into output. Mutually exclusive
with block_ids.
Returns:
The output array (the provided output, or a newly allocated uint64 numpy array).
49def relabel_consecutive( 50 input: SourceLike, 51 output: Optional[SourceLike] = None, 52 *, 53 start_label: int = 0, 54 keep_zeros: bool = True, 55 block_shape: Optional[Tuple[int, ...]] = None, 56 job_type: str = "local", 57 job_config: Optional[RunnerConfig] = None, 58 num_workers: int = 1, 59 mask: Optional[SourceLike] = None, 60) -> Tuple[SourceLike, int, Dict[int, int]]: 61 """Relabel a segmentation to consecutive ids, block-wise. 62 63 Like ``label``, this is multi-stage (a global ``unique`` reduction, an in-process mapping, then a 64 block-wise write), so it does **not** accept ``block_ids`` or ``resume_from``: a failed run is 65 re-run whole (it is idempotent given the same ``output``). 66 67 Args: 68 input: The input label image (a numpy/zarr/n5 array or a `Source`); must be integer-typed. 69 output: The output array to write the relabeled segmentation into. Optional for local 70 execution -- a numpy array matching the input shape and dtype is allocated and returned 71 if omitted; **required** for distributed execution (a writable, file-backed zarr/n5 72 array). 73 start_label: The value the smallest unique id is mapped to (subsequent ids follow 74 consecutively). 75 keep_zeros: Whether to always keep ``0`` mapped to ``0`` (background), regardless of 76 ``start_label``. 77 block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required 78 for unchunked data. 79 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 80 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 81 num_workers: Number of parallel workers (threads for ``local``, tasks for distributed 82 backends). 83 mask: Optional binary mask; values outside the mask are excluded from the computation and 84 their output voxels are left unchanged. 85 86 Returns: 87 A ``(output, max_id, mapping)`` tuple: the relabeled output array, the maximum label id 88 after relabeling, and the ``{old_id: new_id}`` mapping that was applied. 89 """ 90 src = as_source(input) 91 if not np.issubdtype(np.dtype(src.dtype), np.integer): 92 raise ValueError(f"relabel_consecutive expects an integer label image, got dtype {src.dtype}.") 93 ndim = src.ndim 94 95 direct = is_direct(job_type, num_workers, block_shape) and mask is None 96 97 if output is None: 98 if job_type != "local": 99 raise ValueError( 100 f"'output' is required for distributed execution (job_type={job_type!r}); " 101 "pass a file-backed (zarr/n5) output array." 102 ) 103 out_array: SourceLike = np.zeros(tuple(src.shape), dtype=src.dtype) 104 else: 105 out_array = output 106 out = as_source(out_array) 107 if not direct and same_array(out, src): 108 raise ValueError("Block-wise relabel_consecutive needs 'output' to differ from 'input'.") 109 110 # Pass 1: the global set of unique values. 111 if direct: 112 uniques = np.unique(src[full_roi(ndim)]) 113 else: 114 uniques = unique(input, block_shape=block_shape, job_type=job_type, job_config=job_config, 115 num_workers=num_workers, mask=mask) 116 117 # In-process: build the old -> new mapping (consecutive ids from start_label). 118 mapping: Dict[int, int] = {int(v): i for i, v in enumerate(uniques.tolist(), start_label)} 119 if keep_zeros and 0 in mapping: 120 mapping[0] = 0 121 max_id = max(mapping.values()) if mapping else 0 122 123 # Pass 2: apply the mapping. 124 if direct: 125 out[full_roi(ndim)] = bic.utils.take_dict(mapping, src[full_roi(ndim)]) 126 return out_array, max_id, mapping 127 128 runner = get_runner(job_type, job_config) 129 runner.run(_make_relabel_block(mapping), [input], outputs=[out_array], block_shape=block_shape, 130 mask=mask, num_workers=num_workers, name="relabel") 131 return out_array, max_id, mapping
Relabel a segmentation to consecutive ids, block-wise.
Like label, this is multi-stage (a global unique reduction, an in-process mapping, then a
block-wise write), so it does not accept block_ids or resume_from: a failed run is
re-run whole (it is idempotent given the same output).
Args:
input: The input label image (a numpy/zarr/n5 array or a Source); must be integer-typed.
output: The output array to write the relabeled segmentation into. Optional for local
execution -- a numpy array matching the input shape and dtype is allocated and returned
if omitted; required for distributed execution (a writable, file-backed zarr/n5
array).
start_label: The value the smallest unique id is mapped to (subsequent ids follow
consecutively).
keep_zeros: Whether to always keep 0 mapped to 0 (background), regardless of
start_label.
block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
for unchunked data.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
num_workers: Number of parallel workers (threads for local, tasks for distributed
backends).
mask: Optional binary mask; values outside the mask are excluded from the computation and
their output voxels are left unchanged.
Returns:
A (output, max_id, mapping) tuple: the relabeled output array, the maximum label id
after relabeling, and the {old_id: new_id} mapping that was applied.
57def segmentation_filter( 58 input: SourceLike, 59 filter_function: BlockFn, 60 output: Optional[SourceLike] = None, 61 *, 62 relabel: Optional[BlockFn] = None, 63 block_shape: Optional[Tuple[int, ...]] = None, 64 job_type: str = "local", 65 job_config: Optional[RunnerConfig] = None, 66 num_workers: int = 1, 67 mask: Optional[SourceLike] = None, 68 block_ids: Optional[Sequence[int]] = None, 69 resume_from: Optional[str] = None, 70) -> SourceLike: 71 """Filter a segmentation with a custom per-block criterion, block-wise. 72 73 Args: 74 input: The input segmentation (a numpy/zarr/n5 array or a `Source`). 75 filter_function: A picklable callable ``filter_function(block_seg, block_mask)`` returning the 76 filtered block. ``block_mask`` is the block's boolean in-mask array, or ``None`` when no 77 mask is used; when a mask is used, restrict the criterion to the in-mask voxels. 78 output: The output array to write into. Optional for local execution -- a numpy array 79 matching the input shape and dtype is allocated and returned if omitted; **required** for 80 distributed execution (a writable, file-backed zarr/n5 array). 81 relabel: Optional picklable callable ``relabel(block_seg, block_mask)`` applied after 82 ``filter_function`` (e.g. a consecutive relabeling); same masking contract. 83 block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required 84 for unchunked data. 85 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 86 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 87 num_workers: Number of parallel workers (threads for ``local``, tasks for distributed 88 backends). 89 mask: Optional binary mask; out-of-mask output voxels are left unchanged. 90 block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks). 91 Mutually exclusive with ``resume_from``. 92 resume_from: Distributed only; the preserved temp folder of a failed run to resume (see 93 ``runner.run``). Mutually exclusive with ``block_ids``. 94 95 Returns: 96 The output array (the provided ``output``, or a newly allocated numpy array). 97 """ 98 check_rerun_args(job_type, resume_from, block_ids) 99 src = as_source(input) 100 ndim = src.ndim 101 direct = (is_direct(job_type, num_workers, block_shape) and mask is None 102 and block_ids is None and resume_from is None) 103 104 if output is None: 105 if job_type != "local": 106 raise ValueError( 107 f"'output' is required for distributed execution (job_type={job_type!r}); " 108 "pass a file-backed (zarr/n5) output array." 109 ) 110 out_array: SourceLike = np.zeros(tuple(src.shape), dtype=src.dtype) 111 else: 112 out_array = output 113 out = as_source(out_array) 114 if not direct and same_array(out, src): 115 raise ValueError("Block-wise segmentation_filter needs 'output' to differ from 'input'.") 116 117 if direct: 118 filtered = filter_function(src[full_roi(ndim)], None) 119 if relabel is not None: 120 filtered = relabel(filtered, None) 121 out[full_roi(ndim)] = filtered 122 return out_array 123 124 runner = get_runner(job_type, job_config) 125 runner.run(_make_filter_block(filter_function, relabel), [input], outputs=[out_array], 126 block_shape=block_shape, mask=mask, num_workers=num_workers, 127 block_ids=block_ids, resume_from=resume_from, name="segmentation_filter") 128 return out_array
Filter a segmentation with a custom per-block criterion, block-wise.
Args:
input: The input segmentation (a numpy/zarr/n5 array or a Source).
filter_function: A picklable callable filter_function(block_seg, block_mask) returning the
filtered block. block_mask is the block's boolean in-mask array, or None when no
mask is used; when a mask is used, restrict the criterion to the in-mask voxels.
output: The output array to write into. Optional for local execution -- a numpy array
matching the input shape and dtype is allocated and returned if omitted; required for
distributed execution (a writable, file-backed zarr/n5 array).
relabel: Optional picklable callable relabel(block_seg, block_mask) applied after
filter_function (e.g. a consecutive relabeling); same masking contract.
block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
for unchunked data.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
num_workers: Number of parallel workers (threads for local, tasks for distributed
backends).
mask: Optional binary mask; out-of-mask output voxels are left unchanged.
block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
Mutually exclusive with resume_from.
resume_from: Distributed only; the preserved temp folder of a failed run to resume (see
runner.run). Mutually exclusive with block_ids.
Returns:
The output array (the provided output, or a newly allocated numpy array).
158def size_filter( 159 input: SourceLike, 160 output: Optional[SourceLike] = None, 161 *, 162 min_size: Optional[int] = None, 163 max_size: Optional[int] = None, 164 relabel: bool = True, 165 block_shape: Optional[Tuple[int, ...]] = None, 166 job_type: str = "local", 167 job_config: Optional[RunnerConfig] = None, 168 num_workers: int = 1, 169 mask: Optional[SourceLike] = None, 170) -> SourceLike: 171 """Remove objects smaller than ``min_size`` and/or larger than ``max_size`` from a segmentation. 172 173 Multi-stage (a global ``unique`` count reduction, then a filter pass), so it does **not** accept 174 ``block_ids`` / ``resume_from``. By default it relabels the result consecutively; pass 175 ``relabel=False`` to keep the original ids of the surviving objects. 176 177 Args: 178 input: The input segmentation (a numpy/zarr/n5 array or a `Source`); must be integer-typed. 179 output: The output array to write into. Optional for local execution -- a numpy array 180 matching the input shape and dtype is allocated and returned if omitted; **required** for 181 distributed execution (a writable, file-backed zarr/n5 array). 182 min_size: The minimum object size; smaller objects are removed. At least one of ``min_size`` / 183 ``max_size`` is required. 184 max_size: The maximum object size; larger objects are removed. 185 relabel: Whether to relabel the surviving objects consecutively after filtering. 186 block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required 187 for unchunked data. Required when a ``mask`` is given (the size reduction is block-wise). 188 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 189 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 190 num_workers: Number of parallel workers (threads for ``local``, tasks for distributed 191 backends). 192 mask: Optional binary mask; out-of-mask output voxels are left unchanged. 193 194 Returns: 195 The output array (the provided ``output``, or a newly allocated numpy array). 196 """ 197 if min_size is None and max_size is None: 198 raise ValueError("size_filter requires at least one of 'min_size' or 'max_size'.") 199 src = as_source(input) 200 if not np.issubdtype(np.dtype(src.dtype), np.integer): 201 raise ValueError(f"size_filter expects an integer label image, got dtype {src.dtype}.") 202 203 # Pass 1: unique ids with their sizes. 204 ids, counts = unique(input, return_counts=True, block_shape=block_shape, job_type=job_type, 205 job_config=job_config, num_workers=num_workers, mask=mask) 206 207 # In-process: ids to discard and the consecutive relabeling of the survivors. 208 discard = np.zeros(ids.shape, dtype=bool) 209 if min_size is not None: 210 discard |= counts < min_size 211 if max_size is not None: 212 discard |= counts > max_size 213 filter_ids = ids[discard] 214 215 relabel_fn: Optional[BlockFn] = None 216 if relabel: 217 # Reserve 0 for background and map the surviving foreground ids to 1..K consecutively, so a 218 # surviving object can never collide with the (possibly newly introduced) background 0. 219 remaining_fg = ids[(~discard) & (ids != 0)] 220 mapping: Dict[int, int] = {int(v): i for i, v in enumerate(remaining_fg.tolist(), start=1)} 221 mapping[0] = 0 222 relabel_fn = _make_size_relabel(mapping) 223 224 return segmentation_filter(input, _make_size_filter(filter_ids), output, relabel=relabel_fn, 225 block_shape=block_shape, job_type=job_type, job_config=job_config, 226 num_workers=num_workers, mask=mask)
Remove objects smaller than min_size and/or larger than max_size from a segmentation.
Multi-stage (a global unique count reduction, then a filter pass), so it does not accept
block_ids / resume_from. By default it relabels the result consecutively; pass
relabel=False to keep the original ids of the surviving objects.
Args:
input: The input segmentation (a numpy/zarr/n5 array or a Source); must be integer-typed.
output: The output array to write into. Optional for local execution -- a numpy array
matching the input shape and dtype is allocated and returned if omitted; required for
distributed execution (a writable, file-backed zarr/n5 array).
min_size: The minimum object size; smaller objects are removed. At least one of min_size /
max_size is required.
max_size: The maximum object size; larger objects are removed.
relabel: Whether to relabel the surviving objects consecutively after filtering.
block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required
for unchunked data. Required when a mask is given (the size reduction is block-wise).
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
num_workers: Number of parallel workers (threads for local, tasks for distributed
backends).
mask: Optional binary mask; out-of-mask output voxels are left unchanged.
Returns:
The output array (the provided output, or a newly allocated numpy array).
411def stitch_segmentation( 412 input: SourceLike, 413 segmentation_function: Callable, 414 tile_shape: Tuple[int, ...], 415 tile_overlap: Tuple[int, ...], 416 output: Optional[SourceLike] = None, 417 *, 418 beta: float = 0.5, 419 shape: Optional[Tuple[int, ...]] = None, 420 with_background: bool = True, 421 num_workers: int = 1, 422 job_type: str = "local", 423 job_config: Optional[RunnerConfig] = None, 424 return_before_stitching: bool = False, 425) -> Union[SourceLike, Tuple[SourceLike, np.ndarray]]: 426 """Run a segmentation function tile-wise and stitch the results based on overlap. 427 428 Each tile is read with a halo (``tile_shape + 2 * tile_overlap``) and segmented independently; 429 the halo region is where the overlap with the neighbouring tile's segmentation of the same 430 pixels is measured. Objects that overlap strongly there are merged via a multicut over the 431 region adjacency graph of the per-tile labeling. 432 433 Args: 434 input: The input data (a numpy/zarr/n5 array or a `Source`). If it has channels they must be 435 the last (trailing) axes, and `shape` must give the spatial shape. 436 segmentation_function: The per-tile segmentation function with signature 437 ``f(tile_input, tile_id) -> labels``. It receives the haloed tile input and the tile id 438 (passed in case the segmentation depends on the tile; ignore it otherwise) and returns a 439 label image of the tile's (haloed) spatial shape. It is cloudpickled for distributed 440 execution, so it must be picklable. 441 tile_shape: The shape of the individual tiles. 442 tile_overlap: The halo added on each side of a tile; the input to the segmentation function 443 has size ``tile_shape + 2 * tile_overlap``, and the overlap is measured in the halo. 444 output: The ``uint64`` output array. Optional for local execution — a numpy array is 445 allocated and returned if omitted; **required** (file-backed) for distributed execution. 446 beta: The boundary bias of the multicut; ``> 0.5`` biases towards over-segmentation, 447 ``< 0.5`` towards under-segmentation. Must be in the exclusive range ``(0, 1)``. 448 shape: The spatial shape of the segmentation. Defaults to the input shape; must be passed if 449 the input has trailing channel axes. 450 with_background: Whether the problem has a background label (hard-coded ``0``) that must not 451 be merged with foreground objects. 452 num_workers: Number of parallel workers (threads for ``local``, tasks for distributed 453 backends) for the tile-segmentation and overlap-counting phases. 454 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 455 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). For distributed 456 backends a temporary per-tile store is created under ``job_config.tmp_root``. 457 return_before_stitching: Also return the (relabeled) pre-stitch segmentation, for debugging. 458 459 Returns: 460 The output array with the stitched segmentation, or ``(output, pre_stitch)`` if 461 ``return_before_stitching`` is set (``pre_stitch`` is an in-memory numpy array). 462 """ 463 src = as_source(input) 464 in_ndim = src.ndim 465 shape = tuple(int(s) for s in (src.shape if shape is None else shape)) 466 ndim = len(shape) 467 tile_shape = tuple(int(t) for t in tile_shape) 468 tile_overlap = tuple(int(t) for t in tile_overlap) 469 470 out_array = _prepare_output(output, shape, job_type) 471 out = as_source(out_array) 472 if out.dtype != np.dtype("uint64"): 473 raise ValueError(f"output must have dtype uint64, got {out.dtype}.") 474 475 blocking = get_blocking(shape, tile_shape) 476 n_blocks = int(blocking.number_of_blocks) 477 max_halo = tuple(ts + 2 * ov for ts, ov in zip(tile_shape, tile_overlap)) 478 offset_factor = int(np.prod(max_halo)) 479 if n_blocks * offset_factor >= int(np.iinfo(np.uint64).max): 480 raise ValueError("Label id overflow: number_of_blocks * prod(haloed tile shape) exceeds " 481 "uint64. Reduce the tile shape or the volume size.") 482 483 runner = get_runner(job_type, job_config) 484 input_handle = _capture(src, job_type) 485 store, store_cleanup, store_handle = _make_tile_store(n_blocks, max_halo, job_type, job_config) 486 try: 487 # Stage 1: segment each haloed tile, offset its ids, write the inner block + the store slot. 488 stage1 = _make_segment(shape, tile_shape, tile_overlap, segmentation_function, 489 with_background, offset_factor, store_handle, input_handle, 490 ndim, in_ndim) 491 id_results = runner.run(stage1, [], outputs=[out_array], block_shape=tile_shape, 492 halo=tile_overlap, num_workers=num_workers, has_return_val=True, 493 name="stitch-segment") 494 id_arrays = [a for a in id_results if a is not None and len(a)] 495 real = (np.unique(np.concatenate(id_arrays)) if id_arrays 496 else np.zeros((0,), dtype="uint64")) 497 498 # Build the dense relabeling (offset ids are sparse; this keeps the RAG node space compact). 499 mapping = {0: 0} 500 for i, lab in enumerate(real.tolist()): 501 mapping[int(lab)] = i + 1 502 503 # Stage 2: apply the dense relabeling to the pre-stitch output in place. 504 if real.size: 505 runner.run(_make_relabel(mapping), [], outputs=[out_array], block_shape=tile_shape, 506 num_workers=num_workers, has_return_val=False, name="stitch-densify") 507 508 # Stage 3: count object overlaps in the halo bands, reading haloed tiles from the store. 509 overlap_fn = _make_seg_overlap(shape, tile_shape, tile_overlap, store_handle, ndim) 510 ov_results = runner.run(overlap_fn, [out_array], block_shape=tile_shape, 511 num_workers=num_workers, has_return_val=True, name="stitch-overlaps") 512 finally: 513 store_cleanup() 514 515 uv, frac = _collect_edges(ov_results) 516 uv, frac = _map_edges(uv, frac, mapping) # overlap ids are offset ids -> map to dense. 517 518 seg = out[full_roi(ndim)] 519 stitched = _stitch_via_multicut(seg, uv, frac, with_background, beta, n_threads=None) 520 521 # Relabel to consecutive ids (elf semantics): keep 0 as background, or renumber 0 too when there 522 # is no background. 523 if with_background: 524 stitched, _, _ = bic.segmentation.relabel_sequential(stitched.astype("uint64"), offset=1) 525 else: 526 stitched, _, _ = bic.segmentation.relabel_sequential(stitched.astype("uint64") + 1, offset=1) 527 528 pre_stitch = seg.copy() if return_before_stitching else None 529 out[full_roi(ndim)] = stitched.astype(out.dtype, copy=False) 530 if return_before_stitching: 531 return out_array, pre_stitch 532 return out_array
Run a segmentation function tile-wise and stitch the results based on overlap.
Each tile is read with a halo (tile_shape + 2 * tile_overlap) and segmented independently;
the halo region is where the overlap with the neighbouring tile's segmentation of the same
pixels is measured. Objects that overlap strongly there are merged via a multicut over the
region adjacency graph of the per-tile labeling.
Args:
input: The input data (a numpy/zarr/n5 array or a Source). If it has channels they must be
the last (trailing) axes, and shape must give the spatial shape.
segmentation_function: The per-tile segmentation function with signature
f(tile_input, tile_id) -> labels. It receives the haloed tile input and the tile id
(passed in case the segmentation depends on the tile; ignore it otherwise) and returns a
label image of the tile's (haloed) spatial shape. It is cloudpickled for distributed
execution, so it must be picklable.
tile_shape: The shape of the individual tiles.
tile_overlap: The halo added on each side of a tile; the input to the segmentation function
has size tile_shape + 2 * tile_overlap, and the overlap is measured in the halo.
output: The uint64 output array. Optional for local execution — a numpy array is
allocated and returned if omitted; required (file-backed) for distributed execution.
beta: The boundary bias of the multicut; > 0.5 biases towards over-segmentation,
< 0.5 towards under-segmentation. Must be in the exclusive range (0, 1).
shape: The spatial shape of the segmentation. Defaults to the input shape; must be passed if
the input has trailing channel axes.
with_background: Whether the problem has a background label (hard-coded 0) that must not
be merged with foreground objects.
num_workers: Number of parallel workers (threads for local, tasks for distributed
backends) for the tile-segmentation and overlap-counting phases.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig). For distributed
backends a temporary per-tile store is created under job_config.tmp_root.
return_before_stitching: Also return the (relabeled) pre-stitch segmentation, for debugging.
Returns:
The output array with the stitched segmentation, or (output, pre_stitch) if
return_before_stitching is set (pre_stitch is an in-memory numpy array).
352def stitch_tiled_segmentation( 353 segmentation: SourceLike, 354 tile_shape: Tuple[int, ...], 355 output: Optional[SourceLike] = None, 356 *, 357 overlap: int = 1, 358 with_background: bool = True, 359 beta: float = 0.5, 360 num_workers: int = 1, 361 job_type: str = "local", 362 job_config: Optional[RunnerConfig] = None, 363) -> SourceLike: 364 """Stitch a segmentation that is already split into tiles with unique ids per tile. 365 366 The ids in the tiles of the input have to be unique (the segmentations are separate across 367 tiles). Objects that touch across a tile interface are merged based on how strongly they overlap 368 there, via a multicut over the region adjacency graph of the tiled segmentation. 369 370 Args: 371 segmentation: The input tiled segmentation (a numpy/zarr/n5 array or a `Source`); must be 372 integer-typed with ids unique across tiles. 373 tile_shape: The shape of the tiles (the block shape of the tiling). 374 output: The ``uint64`` output array. Optional for local execution — a numpy array is 375 allocated and returned if omitted; **required** (file-backed) for distributed execution. 376 overlap: The thickness (in pixels) of the tile-interface slab used to measure object overlap. 377 with_background: Whether the problem has a background label (hard-coded ``0``) that must not 378 be merged with foreground objects. 379 beta: The boundary bias of the multicut; ``> 0.5`` biases towards over-segmentation, 380 ``< 0.5`` towards under-segmentation. 381 num_workers: Number of parallel workers (threads for ``local``, tasks for distributed 382 backends) for the overlap-counting phase. 383 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 384 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 385 386 Returns: 387 The output array with merged labels. 388 """ 389 src = as_source(segmentation) 390 if not np.issubdtype(np.dtype(src.dtype), np.integer): 391 raise ValueError(f"stitch_tiled_segmentation expects an integer label image, got {src.dtype}.") 392 ndim = src.ndim 393 shape = tuple(int(s) for s in src.shape) 394 tile_shape = tuple(int(t) for t in tile_shape) 395 out_array = _prepare_output(output, shape, job_type) 396 397 runner = get_runner(job_type, job_config) 398 overlap_fn = _make_tiled_overlap(shape, tile_shape, int(overlap), ndim) 399 results = runner.run(overlap_fn, [segmentation], block_shape=tile_shape, num_workers=num_workers, 400 has_return_val=True, name="stitch-overlaps") 401 uv, frac = _collect_edges(results) 402 403 seg = src[full_roi(ndim)] 404 stitched = _stitch_via_multicut(seg, uv, frac, with_background, beta, n_threads=None) 405 406 out = as_source(out_array) 407 out[full_roi(ndim)] = stitched.astype(out.dtype, copy=False) 408 return out_array
Stitch a segmentation that is already split into tiles with unique ids per tile.
The ids in the tiles of the input have to be unique (the segmentations are separate across tiles). Objects that touch across a tile interface are merged based on how strongly they overlap there, via a multicut over the region adjacency graph of the tiled segmentation.
Args:
segmentation: The input tiled segmentation (a numpy/zarr/n5 array or a Source); must be
integer-typed with ids unique across tiles.
tile_shape: The shape of the tiles (the block shape of the tiling).
output: The uint64 output array. Optional for local execution — a numpy array is
allocated and returned if omitted; required (file-backed) for distributed execution.
overlap: The thickness (in pixels) of the tile-interface slab used to measure object overlap.
with_background: Whether the problem has a background label (hard-coded 0) that must not
be merged with foreground objects.
beta: The boundary bias of the multicut; > 0.5 biases towards over-segmentation,
< 0.5 towards under-segmentation.
num_workers: Number of parallel workers (threads for local, tasks for distributed
backends) for the overlap-counting phase.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
Returns: The output array with merged labels.
87def compute_edge_costs( 88 probs: np.ndarray, 89 edge_sizes: Optional[np.ndarray] = None, 90 z_edge_mask: Optional[np.ndarray] = None, 91 beta: float = 0.5, 92 weighting_scheme: Optional[str] = None, 93 weighting_exponent: float = 1.0, 94) -> np.ndarray: 95 """Compute multicut edge costs from probabilities with a pre-defined weighting scheme. 96 97 Args: 98 probs: The input edge probabilities, in ``[0, 1]``. 99 edge_sizes: The sizes of the edges; required for all weighting schemes except ``None`` / 100 ``"none"``. 101 z_edge_mask: A boolean mask of inter-slice edges; required for the ``"xyz"`` and ``"z"`` 102 schemes (for flat superpixels in a 3d problem). 103 beta: The boundary bias (see `transform_probabilities_to_costs`). 104 weighting_scheme: How to weight the costs by edge size; one of ``None``, ``"none"``, 105 ``"all"``, ``"xyz"`` or ``"z"``. 106 weighting_exponent: The exponent applied to the normalized edge sizes when weighting. 107 108 Returns: 109 The edge costs. 110 111 Raises: 112 ValueError: If ``weighting_scheme`` is unknown or a scheme's required inputs are missing. 113 """ 114 schemes = (None, "all", "none", "xyz", "z") 115 if weighting_scheme not in schemes: 116 schemes_str = ", ".join([str(scheme) for scheme in schemes]) 117 raise ValueError(f"Weighting scheme must be one of {schemes_str}, got {weighting_scheme}.") 118 119 if weighting_scheme is None or weighting_scheme == "none": 120 edge_pop = edge_sizes_ = None 121 122 elif weighting_scheme == "all": 123 if edge_sizes is None: 124 raise ValueError("Need edge sizes for weighting scheme 'all'.") 125 if len(edge_sizes) != len(probs): 126 raise ValueError("Invalid edge sizes.") 127 edge_sizes_ = edge_sizes 128 edge_pop = None 129 130 elif weighting_scheme == "xyz": 131 if edge_sizes is None or z_edge_mask is None: 132 raise ValueError("Need edge sizes and z edge mask for weighting scheme 'xyz'.") 133 if len(edge_sizes) != len(probs) or len(z_edge_mask) != len(probs): 134 raise ValueError("Invalid edge sizes or z edge mask.") 135 edge_pop = [z_edge_mask, np.logical_not(z_edge_mask)] 136 edge_sizes_ = edge_sizes 137 138 elif weighting_scheme == "z": 139 if edge_sizes is None or z_edge_mask is None: 140 raise ValueError("Need edge sizes and z edge mask for weighting scheme 'z'.") 141 if len(edge_sizes) != len(probs) or len(z_edge_mask) != len(probs): 142 raise ValueError("Invalid edge sizes or z edge mask.") 143 edge_pop = [z_edge_mask, np.logical_not(z_edge_mask)] 144 edge_sizes_ = edge_sizes.copy() 145 edge_sizes_[edge_pop[1]] = 1.0 146 147 return transform_probabilities_to_costs( 148 probs, beta=beta, edge_sizes=edge_sizes_, edge_populations=edge_pop, 149 weighting_exponent=weighting_exponent, 150 )
Compute multicut edge costs from probabilities with a pre-defined weighting scheme.
Args:
probs: The input edge probabilities, in [0, 1].
edge_sizes: The sizes of the edges; required for all weighting schemes except None /
"none".
z_edge_mask: A boolean mask of inter-slice edges; required for the "xyz" and "z"
schemes (for flat superpixels in a 3d problem).
beta: The boundary bias (see transform_probabilities_to_costs).
weighting_scheme: How to weight the costs by edge size; one of None, "none",
"all", "xyz" or "z".
weighting_exponent: The exponent applied to the normalized edge sizes when weighting.
Returns: The edge costs.
Raises:
ValueError: If weighting_scheme is unknown or a scheme's required inputs are missing.
47def transform_probabilities_to_costs( 48 probs: np.ndarray, 49 beta: float = 0.5, 50 edge_sizes: Optional[np.ndarray] = None, 51 edge_populations: Optional[List[np.ndarray]] = None, 52 weighting_exponent: float = 1.0, 53) -> np.ndarray: 54 """Transform merge probabilities to multicut costs via the negative log-likelihood. 55 56 Probabilities near ``1`` map to large positive costs (attractive, likely same segment) and 57 probabilities near ``0`` to large negative costs (repulsive). The boundary bias ``beta`` 58 shifts the decision threshold. 59 60 Args: 61 probs: The input edge probabilities, in ``[0, 1]``. 62 beta: The boundary bias term; ``> 0.5`` biases towards over-segmentation (more cuts), 63 ``< 0.5`` towards under-segmentation. Must be in the exclusive range ``(0, 1)``. 64 edge_sizes: The sizes of the edges, used for weighting if given. 65 edge_populations: Disjoint edge populations (lists of masks or index arrays) that are 66 size-weighted independently, e.g. flat superpixels in a 3d problem. 67 weighting_exponent: The exponent applied to the normalized edge sizes when weighting. 68 69 Returns: 70 The edge costs. 71 """ 72 p_min = 0.001 73 p_max = 1.0 - p_min 74 costs = (p_max - p_min) * probs + p_min 75 # Probabilities to costs; the second term is the boundary bias. 76 costs = np.log((1.0 - costs) / costs) + np.log((1.0 - beta) / beta) 77 # Weight the costs with edge sizes, if they are given. 78 if edge_sizes is not None: 79 assert len(edge_sizes) == len(costs) 80 if edge_populations is None: 81 costs = _weight_edges(costs, edge_sizes, weighting_exponent) 82 else: 83 costs = _weight_populations(costs, edge_sizes, edge_populations, weighting_exponent) 84 return costs
Transform merge probabilities to multicut costs via the negative log-likelihood.
Probabilities near 1 map to large positive costs (attractive, likely same segment) and
probabilities near 0 to large negative costs (repulsive). The boundary bias beta
shifts the decision threshold.
Args:
probs: The input edge probabilities, in [0, 1].
beta: The boundary bias term; > 0.5 biases towards over-segmentation (more cuts),
< 0.5 towards under-segmentation. Must be in the exclusive range (0, 1).
edge_sizes: The sizes of the edges, used for weighting if given.
edge_populations: Disjoint edge populations (lists of masks or index arrays) that are
size-weighted independently, e.g. flat superpixels in a 3d problem.
weighting_exponent: The exponent applied to the normalized edge sizes when weighting.
Returns: The edge costs.
218def multicut_decomposition( 219 graph, 220 costs: np.ndarray, 221 n_threads: int = 1, 222 internal_solver: str = "kernighan-lin", 223) -> np.ndarray: 224 """Solve the multicut problem with the decomposition solver. 225 226 The graph is split into its connected components after removing strongly repulsive edges, each 227 component is solved independently (in parallel) with ``internal_solver``, and the solutions are 228 combined. Introduced in "Break and Conquer: Efficient Correlation Clustering for Image 229 Segmentation" (https://link.springer.com/chapter/10.1007/978-3-642-39140-8_9). 230 231 Args: 232 graph: The graph (or region adjacency graph) of the multicut problem. 233 costs: The edge costs of the multicut problem. 234 n_threads: The number of threads used to solve sub-problems in parallel. 235 internal_solver: The name of the solver used for the sub-problems; one of 236 ``"kernighan-lin"``, ``"greedy-additive"`` or ``"greedy-fixation"``. 237 238 Returns: 239 The node label solution to the multicut problem. 240 """ 241 objective = _to_objective(graph, costs) 242 solver = bic.graph.multicut.MulticutDecomposer( 243 sub_solver=_get_solver(internal_solver), 244 fallthrough_solver=_get_solver(internal_solver), 245 number_of_threads=n_threads, 246 ) 247 return solver.optimize(objective)
Solve the multicut problem with the decomposition solver.
The graph is split into its connected components after removing strongly repulsive edges, each
component is solved independently (in parallel) with internal_solver, and the solutions are
combined. Introduced in "Break and Conquer: Efficient Correlation Clustering for Image
Segmentation" (https://link.springer.com/chapter/10.1007/978-3-642-39140-8_9).
Args:
graph: The graph (or region adjacency graph) of the multicut problem.
costs: The edge costs of the multicut problem.
n_threads: The number of threads used to solve sub-problems in parallel.
internal_solver: The name of the solver used for the sub-problems; one of
"kernighan-lin", "greedy-additive" or "greedy-fixation".
Returns: The node label solution to the multicut problem.
201def multicut_gaec(graph, costs: np.ndarray) -> np.ndarray: 202 """Solve the multicut problem with the greedy-additive edge contraction solver. 203 204 Introduced in "Fusion moves for correlation clustering" 205 (http://openaccess.thecvf.com/content_cvpr_2015/papers/Beier_Fusion_Moves_for_2015_CVPR_paper.pdf). 206 207 Args: 208 graph: The graph (or region adjacency graph) of the multicut problem. 209 costs: The edge costs of the multicut problem. 210 211 Returns: 212 The node label solution to the multicut problem. 213 """ 214 objective = _to_objective(graph, costs) 215 return bic.graph.multicut.GreedyAdditiveMulticut().optimize(objective)
Solve the multicut problem with the greedy-additive edge contraction solver.
Introduced in "Fusion moves for correlation clustering" (http://openaccess.thecvf.com/content_cvpr_2015/papers/Beier_Fusion_Moves_for_2015_CVPR_paper.pdf).
Args: graph: The graph (or region adjacency graph) of the multicut problem. costs: The edge costs of the multicut problem.
Returns: The node label solution to the multicut problem.
176def multicut_kernighan_lin(graph, costs: np.ndarray, warmstart: bool = True) -> np.ndarray: 177 """Solve the multicut problem with the Kernighan-Lin solver. 178 179 Introduced in "An efficient heuristic procedure for partitioning graphs" 180 (http://xilinx.asia/_hdl/4/eda.ee.ucla.edu/EE201A-04Spring/kl.pdf). 181 182 Args: 183 graph: The graph (or region adjacency graph) of the multicut problem. 184 costs: The edge costs of the multicut problem. 185 warmstart: Whether to warmstart with the greedy-additive solution. 186 187 Returns: 188 The node label solution to the multicut problem. 189 """ 190 objective = _to_objective(graph, costs) 191 if warmstart: 192 solver = bic.graph.multicut.ChainedMulticutSolvers([ 193 bic.graph.multicut.GreedyAdditiveMulticut(), 194 bic.graph.multicut.KernighanLinMulticut(), 195 ]) 196 else: 197 solver = bic.graph.multicut.KernighanLinMulticut() 198 return solver.optimize(objective)
Solve the multicut problem with the Kernighan-Lin solver.
Introduced in "An efficient heuristic procedure for partitioning graphs" (http://xilinx.asia/_hdl/4/eda.ee.ucla.edu/EE201A-04Spring/kl.pdf).
Args: graph: The graph (or region adjacency graph) of the multicut problem. costs: The edge costs of the multicut problem. warmstart: Whether to warmstart with the greedy-additive solution.
Returns: The node label solution to the multicut problem.