bioimage_py.evaluation.matching
Object matching scores (precision / recall / segmentation accuracy / f1) and mean segmentation accuracy.
These densify the contingency table into an overlap matrix (rows = segmentation objects, columns = groundtruth objects), score every pair under a matching criterion, and find the optimal one-to-one assignment with the Hungarian algorithm. Implementation follows https://github.com/mpicbg-csbd/stardist/blob/master/stardist/matching.py.
Note: this is the only metric whose cost is not purely table-sized — the dense (n_seg, n_gt) matrix
plus the Hungarian solve scale with the number of objects (O(n_obj^2) memory, O(n_obj^3) time),
not the number of voxels. The contingency-table build is still the parallel, voxel-scale step.
1"""Object matching scores (precision / recall / segmentation accuracy / f1) and mean segmentation accuracy. 2 3These densify the contingency table into an overlap matrix (rows = segmentation objects, columns = 4groundtruth objects), score every pair under a matching criterion, and find the optimal one-to-one 5assignment with the Hungarian algorithm. Implementation follows 6https://github.com/mpicbg-csbd/stardist/blob/master/stardist/matching.py. 7 8Note: this is the only metric whose cost is not purely table-sized — the dense ``(n_seg, n_gt)`` matrix 9plus the Hungarian solve scale with the number of objects (``O(n_obj^2)`` memory, ``O(n_obj^3)`` time), 10not the number of voxels. The contingency-table build is still the parallel, voxel-scale step. 11""" 12from __future__ import annotations 13 14from typing import Dict, Optional, Sequence, Tuple, Union 15 16import numpy as np 17from scipy.optimize import linear_sum_assignment 18 19from ..runner.config import RunnerConfig 20from ..sources import SourceLike 21from ._common import build_table 22from .contingency_table import ContingencyTable 23 24__all__ = ["matching_scores", "mean_segmentation_accuracy_scores", "matching", 25 "mean_segmentation_accuracy"] 26 27 28def _intersection_over_union(overlap: np.ndarray) -> np.ndarray: 29 """@private""" 30 if overlap.sum() == 0: 31 return overlap 32 n_pixels_pred = np.sum(overlap, axis=0, keepdims=True) 33 n_pixels_true = np.sum(overlap, axis=1, keepdims=True) 34 return overlap / np.maximum(n_pixels_pred + n_pixels_true - overlap, 1e-7) 35 36 37def _intersection_over_true(overlap: np.ndarray) -> np.ndarray: 38 """@private""" 39 if overlap.sum() == 0: 40 return overlap 41 return overlap / np.sum(overlap, axis=1, keepdims=True) 42 43 44def _intersection_over_pred(overlap: np.ndarray) -> np.ndarray: 45 """@private""" 46 if overlap.sum() == 0: 47 return overlap 48 return overlap / np.sum(overlap, axis=0, keepdims=True) 49 50 51_MATCHING_CRITERIA = {"iou": _intersection_over_union, 52 "iot": _intersection_over_true, 53 "iop": _intersection_over_pred} 54 55 56def _precision(tp: int, fp: int, fn: int) -> float: 57 """@private""" 58 return tp / (tp + fp) if tp > 0 else 0.0 59 60 61def _recall(tp: int, fp: int, fn: int) -> float: 62 """@private""" 63 return tp / (tp + fn) if tp > 0 else 0.0 64 65 66def _segmentation_accuracy(tp: int, fp: int, fn: int) -> float: 67 """@private""" 68 return tp / (tp + fp + fn) if tp > 0 else 0.0 69 70 71def _f1(tp: int, fp: int, fn: int) -> float: 72 """@private""" 73 return (2 * tp) / (2 * tp + fp + fn) if tp > 0 else 0.0 74 75 76def _label_index(labels: np.ndarray, label: int) -> Optional[int]: 77 """Return the position of ``label`` in the sorted ``labels`` array, or ``None`` if absent.""" 78 idx = int(np.searchsorted(labels, label)) 79 if idx < labels.size and int(labels[idx]) == label: 80 return idx 81 return None 82 83 84def _dense_overlap(table: ContingencyTable) -> np.ndarray: 85 """Densify the contingency table into a ``(n_seg, n_gt)`` overlap matrix (rows = A, cols = B).""" 86 overlap = np.zeros((table.labels_a.size, table.labels_b.size), dtype="float64") 87 if table.pairs.shape[0]: 88 ai = np.searchsorted(table.labels_a, table.pairs[:, 0]) 89 bi = np.searchsorted(table.labels_b, table.pairs[:, 1]) 90 overlap[ai, bi] = table.counts.astype("float64") 91 return overlap 92 93 94def _compute_scores(table: ContingencyTable, criterion: str, 95 ignore_label: Optional[int]) -> Tuple[int, int, int, np.ndarray]: 96 """Build the matching-criterion score matrix and drop the ignore label's row/column.""" 97 if criterion not in _MATCHING_CRITERIA: 98 raise ValueError(f"Unknown matching criterion {criterion!r}; expected one of " 99 f"{sorted(_MATCHING_CRITERIA)}.") 100 overlap = _dense_overlap(table) 101 scores = _MATCHING_CRITERIA[criterion](overlap) 102 if scores.size: 103 assert 0.0 <= float(np.min(scores)) <= float(np.max(scores)) <= 1.0, \ 104 f"{np.min(scores)}, {np.max(scores)}" 105 106 if ignore_label is not None: 107 ai = _label_index(table.labels_a, ignore_label) 108 if ai is not None: 109 scores = np.delete(scores, ai, axis=0) 110 bi = _label_index(table.labels_b, ignore_label) 111 if bi is not None: 112 scores = np.delete(scores, bi, axis=1) 113 114 n_pred, n_true = scores.shape 115 n_matched = min(n_true, n_pred) 116 return n_true, n_matched, n_pred, scores 117 118 119def _compute_tps(scores: np.ndarray, n_matched: int, threshold: float) -> int: 120 """The number of true positives: optimal assignment with the score as a tie-breaker.""" 121 if n_matched > 0 and np.any(scores >= threshold): 122 costs = -(scores >= threshold).astype(float) - scores / (2 * n_matched) 123 pred_ind, true_ind = linear_sum_assignment(costs) 124 assert n_matched == len(true_ind) == len(pred_ind) 125 return int(np.count_nonzero(scores[pred_ind, true_ind] >= threshold)) 126 return 0 127 128 129def matching_scores(table: ContingencyTable, *, threshold: float = 0.5, criterion: str = "iou", 130 ignore_label: Optional[int] = 0) -> Dict[str, float]: 131 """Compute object-matching scores from a contingency table. 132 133 Args: 134 table: A contingency table built as ``contingency_table(segmentation, groundtruth)``. 135 threshold: Overlap threshold for a match. 136 criterion: Matching criterion, one of ``"iou"``, ``"iot"`` or ``"iop"``. 137 ignore_label: Object label removed from both axes before matching (e.g. background). ``None`` 138 keeps all objects. 139 140 Returns: 141 A mapping with keys ``precision``, ``recall``, ``segmentation_accuracy`` and ``f1``. 142 """ 143 n_true, n_matched, n_pred, scores = _compute_scores(table, criterion, ignore_label) 144 tp = _compute_tps(scores, n_matched, threshold) 145 fp, fn = n_pred - tp, n_true - tp 146 return {"precision": _precision(tp, fp, fn), "recall": _recall(tp, fp, fn), 147 "segmentation_accuracy": _segmentation_accuracy(tp, fp, fn), "f1": _f1(tp, fp, fn)} 148 149 150def mean_segmentation_accuracy_scores( 151 table: ContingencyTable, 152 *, 153 thresholds: Optional[Sequence[float]] = None, 154 ignore_label: Optional[int] = 0, 155 return_accuracies: bool = False, 156) -> Union[float, Tuple[float, np.ndarray]]: 157 """Compute the mean segmentation accuracy (DSB-2018 style) from a contingency table. 158 159 Args: 160 table: A contingency table built as ``contingency_table(segmentation, groundtruth)``. 161 thresholds: IoU thresholds to average over; defaults to ``np.arange(0.5, 1.0, 0.05)``. 162 ignore_label: Object label removed from both axes before matching. ``None`` keeps all objects. 163 return_accuracies: Whether to also return the per-threshold accuracies. 164 165 Returns: 166 The mean segmentation accuracy, and (only if ``return_accuracies``) the per-threshold accuracies. 167 """ 168 n_true, n_matched, n_pred, scores = _compute_scores(table, "iou", ignore_label) 169 thresholds = np.arange(0.5, 1.0, 0.05) if thresholds is None else np.asarray(thresholds, "float64") 170 accuracies = np.array([ 171 _segmentation_accuracy(tp, n_pred - tp, n_true - tp) 172 for tp in (_compute_tps(scores, n_matched, float(t)) for t in thresholds) 173 ]) 174 mean_accuracy = float(np.mean(accuracies)) if accuracies.size else 0.0 175 if return_accuracies: 176 return mean_accuracy, accuracies 177 return mean_accuracy 178 179 180def matching( 181 segmentation: SourceLike, 182 groundtruth: SourceLike, 183 *, 184 threshold: float = 0.5, 185 criterion: str = "iou", 186 ignore_label: Optional[int] = 0, 187 num_workers: int = 1, 188 block_shape: Optional[Tuple[int, ...]] = None, 189 job_type: str = "local", 190 job_config: Optional[RunnerConfig] = None, 191 mask: Optional[SourceLike] = None, 192) -> Dict[str, float]: 193 """Compute object-matching scores between two segmentations. 194 195 Args: 196 segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`). 197 groundtruth: The groundtruth segmentation; same shape as ``segmentation``. 198 threshold: Overlap threshold for a match. 199 criterion: Matching criterion, one of ``"iou"``, ``"iot"`` or ``"iop"``. 200 ignore_label: Object label removed from both axes before matching (e.g. background). ``None`` 201 keeps all objects. 202 num_workers: Number of parallel workers used to build the contingency table. 203 block_shape: Shape of the processing blocks. Defaults to the input chunk shape. 204 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 205 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 206 mask: Optional binary mask; voxels outside the mask are excluded. 207 208 Returns: 209 A mapping with keys ``precision``, ``recall``, ``segmentation_accuracy`` and ``f1``. 210 """ 211 table = build_table(segmentation, groundtruth, num_workers=num_workers, block_shape=block_shape, 212 job_type=job_type, job_config=job_config, mask=mask) 213 return matching_scores(table, threshold=threshold, criterion=criterion, ignore_label=ignore_label) 214 215 216def mean_segmentation_accuracy( 217 segmentation: SourceLike, 218 groundtruth: SourceLike, 219 *, 220 thresholds: Optional[Sequence[float]] = None, 221 ignore_label: Optional[int] = 0, 222 return_accuracies: bool = False, 223 num_workers: int = 1, 224 block_shape: Optional[Tuple[int, ...]] = None, 225 job_type: str = "local", 226 job_config: Optional[RunnerConfig] = None, 227 mask: Optional[SourceLike] = None, 228) -> Union[float, Tuple[float, np.ndarray]]: 229 """Compute the mean segmentation accuracy between two segmentations. 230 231 Args: 232 segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`). 233 groundtruth: The groundtruth segmentation; same shape as ``segmentation``. 234 thresholds: IoU thresholds to average over; defaults to ``np.arange(0.5, 1.0, 0.05)``. 235 ignore_label: Object label removed from both axes before matching. ``None`` keeps all objects. 236 return_accuracies: Whether to also return the per-threshold accuracies. 237 num_workers: Number of parallel workers used to build the contingency table. 238 block_shape: Shape of the processing blocks. Defaults to the input chunk shape. 239 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 240 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 241 mask: Optional binary mask; voxels outside the mask are excluded. 242 243 Returns: 244 The mean segmentation accuracy, and (only if ``return_accuracies``) the per-threshold accuracies. 245 """ 246 table = build_table(segmentation, groundtruth, num_workers=num_workers, block_shape=block_shape, 247 job_type=job_type, job_config=job_config, mask=mask) 248 return mean_segmentation_accuracy_scores(table, thresholds=thresholds, ignore_label=ignore_label, 249 return_accuracies=return_accuracies)
130def matching_scores(table: ContingencyTable, *, threshold: float = 0.5, criterion: str = "iou", 131 ignore_label: Optional[int] = 0) -> Dict[str, float]: 132 """Compute object-matching scores from a contingency table. 133 134 Args: 135 table: A contingency table built as ``contingency_table(segmentation, groundtruth)``. 136 threshold: Overlap threshold for a match. 137 criterion: Matching criterion, one of ``"iou"``, ``"iot"`` or ``"iop"``. 138 ignore_label: Object label removed from both axes before matching (e.g. background). ``None`` 139 keeps all objects. 140 141 Returns: 142 A mapping with keys ``precision``, ``recall``, ``segmentation_accuracy`` and ``f1``. 143 """ 144 n_true, n_matched, n_pred, scores = _compute_scores(table, criterion, ignore_label) 145 tp = _compute_tps(scores, n_matched, threshold) 146 fp, fn = n_pred - tp, n_true - tp 147 return {"precision": _precision(tp, fp, fn), "recall": _recall(tp, fp, fn), 148 "segmentation_accuracy": _segmentation_accuracy(tp, fp, fn), "f1": _f1(tp, fp, fn)}
Compute object-matching scores from a contingency table.
Args:
table: A contingency table built as contingency_table(segmentation, groundtruth).
threshold: Overlap threshold for a match.
criterion: Matching criterion, one of "iou", "iot" or "iop".
ignore_label: Object label removed from both axes before matching (e.g. background). None
keeps all objects.
Returns:
A mapping with keys precision, recall, segmentation_accuracy and f1.
151def mean_segmentation_accuracy_scores( 152 table: ContingencyTable, 153 *, 154 thresholds: Optional[Sequence[float]] = None, 155 ignore_label: Optional[int] = 0, 156 return_accuracies: bool = False, 157) -> Union[float, Tuple[float, np.ndarray]]: 158 """Compute the mean segmentation accuracy (DSB-2018 style) from a contingency table. 159 160 Args: 161 table: A contingency table built as ``contingency_table(segmentation, groundtruth)``. 162 thresholds: IoU thresholds to average over; defaults to ``np.arange(0.5, 1.0, 0.05)``. 163 ignore_label: Object label removed from both axes before matching. ``None`` keeps all objects. 164 return_accuracies: Whether to also return the per-threshold accuracies. 165 166 Returns: 167 The mean segmentation accuracy, and (only if ``return_accuracies``) the per-threshold accuracies. 168 """ 169 n_true, n_matched, n_pred, scores = _compute_scores(table, "iou", ignore_label) 170 thresholds = np.arange(0.5, 1.0, 0.05) if thresholds is None else np.asarray(thresholds, "float64") 171 accuracies = np.array([ 172 _segmentation_accuracy(tp, n_pred - tp, n_true - tp) 173 for tp in (_compute_tps(scores, n_matched, float(t)) for t in thresholds) 174 ]) 175 mean_accuracy = float(np.mean(accuracies)) if accuracies.size else 0.0 176 if return_accuracies: 177 return mean_accuracy, accuracies 178 return mean_accuracy
Compute the mean segmentation accuracy (DSB-2018 style) from a contingency table.
Args:
table: A contingency table built as contingency_table(segmentation, groundtruth).
thresholds: IoU thresholds to average over; defaults to np.arange(0.5, 1.0, 0.05).
ignore_label: Object label removed from both axes before matching. None keeps all objects.
return_accuracies: Whether to also return the per-threshold accuracies.
Returns:
The mean segmentation accuracy, and (only if return_accuracies) the per-threshold accuracies.
181def matching( 182 segmentation: SourceLike, 183 groundtruth: SourceLike, 184 *, 185 threshold: float = 0.5, 186 criterion: str = "iou", 187 ignore_label: Optional[int] = 0, 188 num_workers: int = 1, 189 block_shape: Optional[Tuple[int, ...]] = None, 190 job_type: str = "local", 191 job_config: Optional[RunnerConfig] = None, 192 mask: Optional[SourceLike] = None, 193) -> Dict[str, float]: 194 """Compute object-matching scores between two segmentations. 195 196 Args: 197 segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`). 198 groundtruth: The groundtruth segmentation; same shape as ``segmentation``. 199 threshold: Overlap threshold for a match. 200 criterion: Matching criterion, one of ``"iou"``, ``"iot"`` or ``"iop"``. 201 ignore_label: Object label removed from both axes before matching (e.g. background). ``None`` 202 keeps all objects. 203 num_workers: Number of parallel workers used to build the contingency table. 204 block_shape: Shape of the processing blocks. Defaults to the input chunk shape. 205 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 206 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 207 mask: Optional binary mask; voxels outside the mask are excluded. 208 209 Returns: 210 A mapping with keys ``precision``, ``recall``, ``segmentation_accuracy`` and ``f1``. 211 """ 212 table = build_table(segmentation, groundtruth, num_workers=num_workers, block_shape=block_shape, 213 job_type=job_type, job_config=job_config, mask=mask) 214 return matching_scores(table, threshold=threshold, criterion=criterion, ignore_label=ignore_label)
Compute object-matching scores between two segmentations.
Args:
segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a Source).
groundtruth: The groundtruth segmentation; same shape as segmentation.
threshold: Overlap threshold for a match.
criterion: Matching criterion, one of "iou", "iot" or "iop".
ignore_label: Object label removed from both axes before matching (e.g. background). None
keeps all objects.
num_workers: Number of parallel workers used to build the contingency table.
block_shape: Shape of the processing blocks. Defaults to the input chunk shape.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
mask: Optional binary mask; voxels outside the mask are excluded.
Returns:
A mapping with keys precision, recall, segmentation_accuracy and f1.
217def mean_segmentation_accuracy( 218 segmentation: SourceLike, 219 groundtruth: SourceLike, 220 *, 221 thresholds: Optional[Sequence[float]] = None, 222 ignore_label: Optional[int] = 0, 223 return_accuracies: bool = False, 224 num_workers: int = 1, 225 block_shape: Optional[Tuple[int, ...]] = None, 226 job_type: str = "local", 227 job_config: Optional[RunnerConfig] = None, 228 mask: Optional[SourceLike] = None, 229) -> Union[float, Tuple[float, np.ndarray]]: 230 """Compute the mean segmentation accuracy between two segmentations. 231 232 Args: 233 segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`). 234 groundtruth: The groundtruth segmentation; same shape as ``segmentation``. 235 thresholds: IoU thresholds to average over; defaults to ``np.arange(0.5, 1.0, 0.05)``. 236 ignore_label: Object label removed from both axes before matching. ``None`` keeps all objects. 237 return_accuracies: Whether to also return the per-threshold accuracies. 238 num_workers: Number of parallel workers used to build the contingency table. 239 block_shape: Shape of the processing blocks. Defaults to the input chunk shape. 240 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 241 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 242 mask: Optional binary mask; voxels outside the mask are excluded. 243 244 Returns: 245 The mean segmentation accuracy, and (only if ``return_accuracies``) the per-threshold accuracies. 246 """ 247 table = build_table(segmentation, groundtruth, num_workers=num_workers, block_shape=block_shape, 248 job_type=job_type, job_config=job_config, mask=mask) 249 return mean_segmentation_accuracy_scores(table, thresholds=thresholds, ignore_label=ignore_label, 250 return_accuracies=return_accuracies)
Compute the mean segmentation accuracy between two segmentations.
Args:
segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a Source).
groundtruth: The groundtruth segmentation; same shape as segmentation.
thresholds: IoU thresholds to average over; defaults to np.arange(0.5, 1.0, 0.05).
ignore_label: Object label removed from both axes before matching. None keeps all objects.
return_accuracies: Whether to also return the per-threshold accuracies.
num_workers: Number of parallel workers used to build the contingency table.
block_shape: Shape of the processing blocks. Defaults to the input chunk shape.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
mask: Optional binary mask; voxels outside the mask are excluded.
Returns:
The mean segmentation accuracy, and (only if return_accuracies) the per-threshold accuracies.