bioimage_py.evaluation.matching

Object matching scores (precision / recall / segmentation accuracy / f1) and mean segmentation accuracy.

These densify the contingency table into an overlap matrix (rows = segmentation objects, columns = groundtruth objects), score every pair under a matching criterion, and find the optimal one-to-one assignment with the Hungarian algorithm. Implementation follows https://github.com/mpicbg-csbd/stardist/blob/master/stardist/matching.py.

Note: this is the only metric whose cost is not purely table-sized — the dense (n_seg, n_gt) matrix plus the Hungarian solve scale with the number of objects (O(n_obj^2) memory, O(n_obj^3) time), not the number of voxels. The contingency-table build is still the parallel, voxel-scale step.

  1"""Object matching scores (precision / recall / segmentation accuracy / f1) and mean segmentation accuracy.
  2
  3These densify the contingency table into an overlap matrix (rows = segmentation objects, columns =
  4groundtruth objects), score every pair under a matching criterion, and find the optimal one-to-one
  5assignment with the Hungarian algorithm. Implementation follows
  6https://github.com/mpicbg-csbd/stardist/blob/master/stardist/matching.py.
  7
  8Note: this is the only metric whose cost is not purely table-sized — the dense ``(n_seg, n_gt)`` matrix
  9plus the Hungarian solve scale with the number of objects (``O(n_obj^2)`` memory, ``O(n_obj^3)`` time),
 10not the number of voxels. The contingency-table build is still the parallel, voxel-scale step.
 11"""
 12from __future__ import annotations
 13
 14from typing import Dict, Optional, Sequence, Tuple, Union
 15
 16import numpy as np
 17from scipy.optimize import linear_sum_assignment
 18
 19from ..runner.config import RunnerConfig
 20from ..sources import SourceLike
 21from ._common import build_table
 22from .contingency_table import ContingencyTable
 23
 24__all__ = ["matching_scores", "mean_segmentation_accuracy_scores", "matching",
 25           "mean_segmentation_accuracy"]
 26
 27
 28def _intersection_over_union(overlap: np.ndarray) -> np.ndarray:
 29    """@private"""
 30    if overlap.sum() == 0:
 31        return overlap
 32    n_pixels_pred = np.sum(overlap, axis=0, keepdims=True)
 33    n_pixels_true = np.sum(overlap, axis=1, keepdims=True)
 34    return overlap / np.maximum(n_pixels_pred + n_pixels_true - overlap, 1e-7)
 35
 36
 37def _intersection_over_true(overlap: np.ndarray) -> np.ndarray:
 38    """@private"""
 39    if overlap.sum() == 0:
 40        return overlap
 41    return overlap / np.sum(overlap, axis=1, keepdims=True)
 42
 43
 44def _intersection_over_pred(overlap: np.ndarray) -> np.ndarray:
 45    """@private"""
 46    if overlap.sum() == 0:
 47        return overlap
 48    return overlap / np.sum(overlap, axis=0, keepdims=True)
 49
 50
 51_MATCHING_CRITERIA = {"iou": _intersection_over_union,
 52                      "iot": _intersection_over_true,
 53                      "iop": _intersection_over_pred}
 54
 55
 56def _precision(tp: int, fp: int, fn: int) -> float:
 57    """@private"""
 58    return tp / (tp + fp) if tp > 0 else 0.0
 59
 60
 61def _recall(tp: int, fp: int, fn: int) -> float:
 62    """@private"""
 63    return tp / (tp + fn) if tp > 0 else 0.0
 64
 65
 66def _segmentation_accuracy(tp: int, fp: int, fn: int) -> float:
 67    """@private"""
 68    return tp / (tp + fp + fn) if tp > 0 else 0.0
 69
 70
 71def _f1(tp: int, fp: int, fn: int) -> float:
 72    """@private"""
 73    return (2 * tp) / (2 * tp + fp + fn) if tp > 0 else 0.0
 74
 75
 76def _label_index(labels: np.ndarray, label: int) -> Optional[int]:
 77    """Return the position of ``label`` in the sorted ``labels`` array, or ``None`` if absent."""
 78    idx = int(np.searchsorted(labels, label))
 79    if idx < labels.size and int(labels[idx]) == label:
 80        return idx
 81    return None
 82
 83
 84def _dense_overlap(table: ContingencyTable) -> np.ndarray:
 85    """Densify the contingency table into a ``(n_seg, n_gt)`` overlap matrix (rows = A, cols = B)."""
 86    overlap = np.zeros((table.labels_a.size, table.labels_b.size), dtype="float64")
 87    if table.pairs.shape[0]:
 88        ai = np.searchsorted(table.labels_a, table.pairs[:, 0])
 89        bi = np.searchsorted(table.labels_b, table.pairs[:, 1])
 90        overlap[ai, bi] = table.counts.astype("float64")
 91    return overlap
 92
 93
 94def _compute_scores(table: ContingencyTable, criterion: str,
 95                    ignore_label: Optional[int]) -> Tuple[int, int, int, np.ndarray]:
 96    """Build the matching-criterion score matrix and drop the ignore label's row/column."""
 97    if criterion not in _MATCHING_CRITERIA:
 98        raise ValueError(f"Unknown matching criterion {criterion!r}; expected one of "
 99                         f"{sorted(_MATCHING_CRITERIA)}.")
100    overlap = _dense_overlap(table)
101    scores = _MATCHING_CRITERIA[criterion](overlap)
102    if scores.size:
103        assert 0.0 <= float(np.min(scores)) <= float(np.max(scores)) <= 1.0, \
104            f"{np.min(scores)}, {np.max(scores)}"
105
106    if ignore_label is not None:
107        ai = _label_index(table.labels_a, ignore_label)
108        if ai is not None:
109            scores = np.delete(scores, ai, axis=0)
110        bi = _label_index(table.labels_b, ignore_label)
111        if bi is not None:
112            scores = np.delete(scores, bi, axis=1)
113
114    n_pred, n_true = scores.shape
115    n_matched = min(n_true, n_pred)
116    return n_true, n_matched, n_pred, scores
117
118
119def _compute_tps(scores: np.ndarray, n_matched: int, threshold: float) -> int:
120    """The number of true positives: optimal assignment with the score as a tie-breaker."""
121    if n_matched > 0 and np.any(scores >= threshold):
122        costs = -(scores >= threshold).astype(float) - scores / (2 * n_matched)
123        pred_ind, true_ind = linear_sum_assignment(costs)
124        assert n_matched == len(true_ind) == len(pred_ind)
125        return int(np.count_nonzero(scores[pred_ind, true_ind] >= threshold))
126    return 0
127
128
129def matching_scores(table: ContingencyTable, *, threshold: float = 0.5, criterion: str = "iou",
130                    ignore_label: Optional[int] = 0) -> Dict[str, float]:
131    """Compute object-matching scores from a contingency table.
132
133    Args:
134        table: A contingency table built as ``contingency_table(segmentation, groundtruth)``.
135        threshold: Overlap threshold for a match.
136        criterion: Matching criterion, one of ``"iou"``, ``"iot"`` or ``"iop"``.
137        ignore_label: Object label removed from both axes before matching (e.g. background). ``None``
138            keeps all objects.
139
140    Returns:
141        A mapping with keys ``precision``, ``recall``, ``segmentation_accuracy`` and ``f1``.
142    """
143    n_true, n_matched, n_pred, scores = _compute_scores(table, criterion, ignore_label)
144    tp = _compute_tps(scores, n_matched, threshold)
145    fp, fn = n_pred - tp, n_true - tp
146    return {"precision": _precision(tp, fp, fn), "recall": _recall(tp, fp, fn),
147            "segmentation_accuracy": _segmentation_accuracy(tp, fp, fn), "f1": _f1(tp, fp, fn)}
148
149
150def mean_segmentation_accuracy_scores(
151    table: ContingencyTable,
152    *,
153    thresholds: Optional[Sequence[float]] = None,
154    ignore_label: Optional[int] = 0,
155    return_accuracies: bool = False,
156) -> Union[float, Tuple[float, np.ndarray]]:
157    """Compute the mean segmentation accuracy (DSB-2018 style) from a contingency table.
158
159    Args:
160        table: A contingency table built as ``contingency_table(segmentation, groundtruth)``.
161        thresholds: IoU thresholds to average over; defaults to ``np.arange(0.5, 1.0, 0.05)``.
162        ignore_label: Object label removed from both axes before matching. ``None`` keeps all objects.
163        return_accuracies: Whether to also return the per-threshold accuracies.
164
165    Returns:
166        The mean segmentation accuracy, and (only if ``return_accuracies``) the per-threshold accuracies.
167    """
168    n_true, n_matched, n_pred, scores = _compute_scores(table, "iou", ignore_label)
169    thresholds = np.arange(0.5, 1.0, 0.05) if thresholds is None else np.asarray(thresholds, "float64")
170    accuracies = np.array([
171        _segmentation_accuracy(tp, n_pred - tp, n_true - tp)
172        for tp in (_compute_tps(scores, n_matched, float(t)) for t in thresholds)
173    ])
174    mean_accuracy = float(np.mean(accuracies)) if accuracies.size else 0.0
175    if return_accuracies:
176        return mean_accuracy, accuracies
177    return mean_accuracy
178
179
180def matching(
181    segmentation: SourceLike,
182    groundtruth: SourceLike,
183    *,
184    threshold: float = 0.5,
185    criterion: str = "iou",
186    ignore_label: Optional[int] = 0,
187    num_workers: int = 1,
188    block_shape: Optional[Tuple[int, ...]] = None,
189    job_type: str = "local",
190    job_config: Optional[RunnerConfig] = None,
191    mask: Optional[SourceLike] = None,
192) -> Dict[str, float]:
193    """Compute object-matching scores between two segmentations.
194
195    Args:
196        segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`).
197        groundtruth: The groundtruth segmentation; same shape as ``segmentation``.
198        threshold: Overlap threshold for a match.
199        criterion: Matching criterion, one of ``"iou"``, ``"iot"`` or ``"iop"``.
200        ignore_label: Object label removed from both axes before matching (e.g. background). ``None``
201            keeps all objects.
202        num_workers: Number of parallel workers used to build the contingency table.
203        block_shape: Shape of the processing blocks. Defaults to the input chunk shape.
204        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
205        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
206        mask: Optional binary mask; voxels outside the mask are excluded.
207
208    Returns:
209        A mapping with keys ``precision``, ``recall``, ``segmentation_accuracy`` and ``f1``.
210    """
211    table = build_table(segmentation, groundtruth, num_workers=num_workers, block_shape=block_shape,
212                        job_type=job_type, job_config=job_config, mask=mask)
213    return matching_scores(table, threshold=threshold, criterion=criterion, ignore_label=ignore_label)
214
215
216def mean_segmentation_accuracy(
217    segmentation: SourceLike,
218    groundtruth: SourceLike,
219    *,
220    thresholds: Optional[Sequence[float]] = None,
221    ignore_label: Optional[int] = 0,
222    return_accuracies: bool = False,
223    num_workers: int = 1,
224    block_shape: Optional[Tuple[int, ...]] = None,
225    job_type: str = "local",
226    job_config: Optional[RunnerConfig] = None,
227    mask: Optional[SourceLike] = None,
228) -> Union[float, Tuple[float, np.ndarray]]:
229    """Compute the mean segmentation accuracy between two segmentations.
230
231    Args:
232        segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`).
233        groundtruth: The groundtruth segmentation; same shape as ``segmentation``.
234        thresholds: IoU thresholds to average over; defaults to ``np.arange(0.5, 1.0, 0.05)``.
235        ignore_label: Object label removed from both axes before matching. ``None`` keeps all objects.
236        return_accuracies: Whether to also return the per-threshold accuracies.
237        num_workers: Number of parallel workers used to build the contingency table.
238        block_shape: Shape of the processing blocks. Defaults to the input chunk shape.
239        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
240        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
241        mask: Optional binary mask; voxels outside the mask are excluded.
242
243    Returns:
244        The mean segmentation accuracy, and (only if ``return_accuracies``) the per-threshold accuracies.
245    """
246    table = build_table(segmentation, groundtruth, num_workers=num_workers, block_shape=block_shape,
247                        job_type=job_type, job_config=job_config, mask=mask)
248    return mean_segmentation_accuracy_scores(table, thresholds=thresholds, ignore_label=ignore_label,
249                                             return_accuracies=return_accuracies)
def matching_scores( table: bioimage_py.evaluation.ContingencyTable, *, threshold: float = 0.5, criterion: str = 'iou', ignore_label: Optional[int] = 0) -> Dict[str, float]:
130def matching_scores(table: ContingencyTable, *, threshold: float = 0.5, criterion: str = "iou",
131                    ignore_label: Optional[int] = 0) -> Dict[str, float]:
132    """Compute object-matching scores from a contingency table.
133
134    Args:
135        table: A contingency table built as ``contingency_table(segmentation, groundtruth)``.
136        threshold: Overlap threshold for a match.
137        criterion: Matching criterion, one of ``"iou"``, ``"iot"`` or ``"iop"``.
138        ignore_label: Object label removed from both axes before matching (e.g. background). ``None``
139            keeps all objects.
140
141    Returns:
142        A mapping with keys ``precision``, ``recall``, ``segmentation_accuracy`` and ``f1``.
143    """
144    n_true, n_matched, n_pred, scores = _compute_scores(table, criterion, ignore_label)
145    tp = _compute_tps(scores, n_matched, threshold)
146    fp, fn = n_pred - tp, n_true - tp
147    return {"precision": _precision(tp, fp, fn), "recall": _recall(tp, fp, fn),
148            "segmentation_accuracy": _segmentation_accuracy(tp, fp, fn), "f1": _f1(tp, fp, fn)}

Compute object-matching scores from a contingency table.

Args: table: A contingency table built as contingency_table(segmentation, groundtruth). threshold: Overlap threshold for a match. criterion: Matching criterion, one of "iou", "iot" or "iop". ignore_label: Object label removed from both axes before matching (e.g. background). None keeps all objects.

Returns: A mapping with keys precision, recall, segmentation_accuracy and f1.

def mean_segmentation_accuracy_scores( table: bioimage_py.evaluation.ContingencyTable, *, thresholds: Optional[Sequence[float]] = None, ignore_label: Optional[int] = 0, return_accuracies: bool = False) -> Union[float, Tuple[float, numpy.ndarray]]:
151def mean_segmentation_accuracy_scores(
152    table: ContingencyTable,
153    *,
154    thresholds: Optional[Sequence[float]] = None,
155    ignore_label: Optional[int] = 0,
156    return_accuracies: bool = False,
157) -> Union[float, Tuple[float, np.ndarray]]:
158    """Compute the mean segmentation accuracy (DSB-2018 style) from a contingency table.
159
160    Args:
161        table: A contingency table built as ``contingency_table(segmentation, groundtruth)``.
162        thresholds: IoU thresholds to average over; defaults to ``np.arange(0.5, 1.0, 0.05)``.
163        ignore_label: Object label removed from both axes before matching. ``None`` keeps all objects.
164        return_accuracies: Whether to also return the per-threshold accuracies.
165
166    Returns:
167        The mean segmentation accuracy, and (only if ``return_accuracies``) the per-threshold accuracies.
168    """
169    n_true, n_matched, n_pred, scores = _compute_scores(table, "iou", ignore_label)
170    thresholds = np.arange(0.5, 1.0, 0.05) if thresholds is None else np.asarray(thresholds, "float64")
171    accuracies = np.array([
172        _segmentation_accuracy(tp, n_pred - tp, n_true - tp)
173        for tp in (_compute_tps(scores, n_matched, float(t)) for t in thresholds)
174    ])
175    mean_accuracy = float(np.mean(accuracies)) if accuracies.size else 0.0
176    if return_accuracies:
177        return mean_accuracy, accuracies
178    return mean_accuracy

Compute the mean segmentation accuracy (DSB-2018 style) from a contingency table.

Args: table: A contingency table built as contingency_table(segmentation, groundtruth). thresholds: IoU thresholds to average over; defaults to np.arange(0.5, 1.0, 0.05). ignore_label: Object label removed from both axes before matching. None keeps all objects. return_accuracies: Whether to also return the per-threshold accuracies.

Returns: The mean segmentation accuracy, and (only if return_accuracies) the per-threshold accuracies.

def matching( segmentation: 'SourceLike', groundtruth: 'SourceLike', *, threshold: float = 0.5, criterion: str = 'iou', ignore_label: Optional[int] = 0, num_workers: int = 1, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, mask: 'Optional[SourceLike]' = None) -> Dict[str, float]:
181def matching(
182    segmentation: SourceLike,
183    groundtruth: SourceLike,
184    *,
185    threshold: float = 0.5,
186    criterion: str = "iou",
187    ignore_label: Optional[int] = 0,
188    num_workers: int = 1,
189    block_shape: Optional[Tuple[int, ...]] = None,
190    job_type: str = "local",
191    job_config: Optional[RunnerConfig] = None,
192    mask: Optional[SourceLike] = None,
193) -> Dict[str, float]:
194    """Compute object-matching scores between two segmentations.
195
196    Args:
197        segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`).
198        groundtruth: The groundtruth segmentation; same shape as ``segmentation``.
199        threshold: Overlap threshold for a match.
200        criterion: Matching criterion, one of ``"iou"``, ``"iot"`` or ``"iop"``.
201        ignore_label: Object label removed from both axes before matching (e.g. background). ``None``
202            keeps all objects.
203        num_workers: Number of parallel workers used to build the contingency table.
204        block_shape: Shape of the processing blocks. Defaults to the input chunk shape.
205        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
206        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
207        mask: Optional binary mask; voxels outside the mask are excluded.
208
209    Returns:
210        A mapping with keys ``precision``, ``recall``, ``segmentation_accuracy`` and ``f1``.
211    """
212    table = build_table(segmentation, groundtruth, num_workers=num_workers, block_shape=block_shape,
213                        job_type=job_type, job_config=job_config, mask=mask)
214    return matching_scores(table, threshold=threshold, criterion=criterion, ignore_label=ignore_label)

Compute object-matching scores between two segmentations.

Args: segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a Source). groundtruth: The groundtruth segmentation; same shape as segmentation. threshold: Overlap threshold for a match. criterion: Matching criterion, one of "iou", "iot" or "iop". ignore_label: Object label removed from both axes before matching (e.g. background). None keeps all objects. num_workers: Number of parallel workers used to build the contingency table. block_shape: Shape of the processing blocks. Defaults to the input chunk shape. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). mask: Optional binary mask; voxels outside the mask are excluded.

Returns: A mapping with keys precision, recall, segmentation_accuracy and f1.

def mean_segmentation_accuracy( segmentation: 'SourceLike', groundtruth: 'SourceLike', *, thresholds: Optional[Sequence[float]] = None, ignore_label: Optional[int] = 0, return_accuracies: bool = False, num_workers: int = 1, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, mask: 'Optional[SourceLike]' = None) -> Union[float, Tuple[float, numpy.ndarray]]:
217def mean_segmentation_accuracy(
218    segmentation: SourceLike,
219    groundtruth: SourceLike,
220    *,
221    thresholds: Optional[Sequence[float]] = None,
222    ignore_label: Optional[int] = 0,
223    return_accuracies: bool = False,
224    num_workers: int = 1,
225    block_shape: Optional[Tuple[int, ...]] = None,
226    job_type: str = "local",
227    job_config: Optional[RunnerConfig] = None,
228    mask: Optional[SourceLike] = None,
229) -> Union[float, Tuple[float, np.ndarray]]:
230    """Compute the mean segmentation accuracy between two segmentations.
231
232    Args:
233        segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`).
234        groundtruth: The groundtruth segmentation; same shape as ``segmentation``.
235        thresholds: IoU thresholds to average over; defaults to ``np.arange(0.5, 1.0, 0.05)``.
236        ignore_label: Object label removed from both axes before matching. ``None`` keeps all objects.
237        return_accuracies: Whether to also return the per-threshold accuracies.
238        num_workers: Number of parallel workers used to build the contingency table.
239        block_shape: Shape of the processing blocks. Defaults to the input chunk shape.
240        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
241        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
242        mask: Optional binary mask; voxels outside the mask are excluded.
243
244    Returns:
245        The mean segmentation accuracy, and (only if ``return_accuracies``) the per-threshold accuracies.
246    """
247    table = build_table(segmentation, groundtruth, num_workers=num_workers, block_shape=block_shape,
248                        job_type=job_type, job_config=job_config, mask=mask)
249    return mean_segmentation_accuracy_scores(table, thresholds=thresholds, ignore_label=ignore_label,
250                                             return_accuracies=return_accuracies)

Compute the mean segmentation accuracy between two segmentations.

Args: segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a Source). groundtruth: The groundtruth segmentation; same shape as segmentation. thresholds: IoU thresholds to average over; defaults to np.arange(0.5, 1.0, 0.05). ignore_label: Object label removed from both axes before matching. None keeps all objects. return_accuracies: Whether to also return the per-threshold accuracies. num_workers: Number of parallel workers used to build the contingency table. block_shape: Shape of the processing blocks. Defaults to the input chunk shape. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). mask: Optional binary mask; voxels outside the mask are excluded.

Returns: The mean segmentation accuracy, and (only if return_accuracies) the per-threshold accuracies.