bioimage_py.evaluation.variation_of_information
Variation of information (split / merge) and its per-object decomposition.
Both are pure reductions of a ContingencyTable built as
contingency_table(segmentation, groundtruth) (axis A = segmentation, axis B = groundtruth). The
split score is the conditional entropy H(seg | gt) (over-segmentation) and the merge score is
H(gt | seg) (under-segmentation); their sum is the variation of information.
1"""Variation of information (split / merge) and its per-object decomposition. 2 3Both are pure reductions of a :class:`ContingencyTable` built as 4``contingency_table(segmentation, groundtruth)`` (axis A = segmentation, axis B = groundtruth). The 5split score is the conditional entropy ``H(seg | gt)`` (over-segmentation) and the merge score is 6``H(gt | seg)`` (under-segmentation); their sum is the variation of information. 7""" 8from __future__ import annotations 9 10from typing import Optional, Sequence, Tuple 11 12import numpy as np 13import pandas as pd 14 15from ..runner.config import RunnerConfig 16from ..sources import SourceLike 17from ._common import build_table 18from .contingency_table import ContingencyTable 19 20__all__ = ["vi_scores", "object_vi_scores", "variation_of_information", "object_vi"] 21 22 23def _pair_sizes(table: ContingencyTable) -> Tuple[np.ndarray, np.ndarray]: 24 """Return, per pair, the marginal sizes of its A-label and B-label (both float64).""" 25 sa = table.sizes_a.astype("float64")[np.searchsorted(table.labels_a, table.pairs[:, 0])] 26 sb = table.sizes_b.astype("float64")[np.searchsorted(table.labels_b, table.pairs[:, 1])] 27 return sa, sb 28 29 30def vi_scores(table: ContingencyTable, *, use_log2: bool = True) -> Tuple[float, float]: 31 """Compute the split and merge variation of information from a contingency table. 32 33 Args: 34 table: A contingency table built as ``contingency_table(segmentation, groundtruth)``. 35 use_log2: Whether to use ``log2`` (bits) or natural ``log`` (nats). 36 37 Returns: 38 The split variation of information (``H(seg | gt)``) and the merge variation of information 39 (``H(gt | seg)``). 40 """ 41 n = table.n_points 42 if n == 0: 43 return 0.0, 0.0 44 log = np.log2 if use_log2 else np.log 45 counts = table.counts.astype("float64") 46 pa = table.sizes_a.astype("float64") / n 47 pb = table.sizes_b.astype("float64") / n 48 h_a = -np.sum(pa * log(pa)) 49 h_b = -np.sum(pb * log(pb)) 50 sa, sb = _pair_sizes(table) 51 mutual = np.sum(counts / n * log(n * counts / (sa * sb))) 52 return float(h_a - mutual), float(h_b - mutual) 53 54 55def object_vi_scores(table: ContingencyTable, *, use_log2: bool = True) -> "pd.DataFrame": 56 """Compute the per-groundtruth-object variation of information from a contingency table. 57 58 Based on https://arxiv.org/pdf/1708.02599.pdf (page 16). 59 60 Args: 61 table: A contingency table built as ``contingency_table(segmentation, groundtruth)``. 62 use_log2: Whether to use ``log2`` (bits) or natural ``log`` (nats). 63 64 Returns: 65 A pandas DataFrame with one row per groundtruth object, sorted by label, with columns 66 ``label`` (groundtruth id), ``vi_split`` and ``vi_merge``. 67 """ 68 if table.pairs.shape[0] == 0: 69 return pd.DataFrame({"label": pd.Series(dtype="uint64"), 70 "vi_split": pd.Series(dtype="float64"), 71 "vi_merge": pd.Series(dtype="float64")}) 72 log = np.log2 if use_log2 else np.log 73 counts = table.counts.astype("float64") 74 sa, sb = _pair_sizes(table) 75 76 # Group the pairs by their groundtruth (B) label. 77 order = np.argsort(table.pairs[:, 1], kind="stable") 78 b_sorted = table.pairs[:, 1][order] 79 c, sa_o, sb_o = counts[order], sa[order], sb[order] 80 starts = np.flatnonzero(np.concatenate(([True], b_sorted[1:] != b_sorted[:-1]))) 81 82 vi_merge = np.add.reduceat(-(c / sb_o) * log(c / sb_o), starts) 83 vi_split = np.add.reduceat(-(c / sb_o) * log(c / sa_o), starts) 84 return pd.DataFrame({"label": b_sorted[starts].astype("uint64"), 85 "vi_split": vi_split, "vi_merge": vi_merge}).reset_index(drop=True) 86 87 88def variation_of_information( 89 segmentation: SourceLike, 90 groundtruth: SourceLike, 91 *, 92 ignore_seg: Optional[Sequence[int]] = None, 93 ignore_gt: Optional[Sequence[int]] = None, 94 use_log2: bool = True, 95 num_workers: int = 1, 96 block_shape: Optional[Tuple[int, ...]] = None, 97 job_type: str = "local", 98 job_config: Optional[RunnerConfig] = None, 99 mask: Optional[SourceLike] = None, 100) -> Tuple[float, float]: 101 """Compute the split and merge variation of information between two segmentations. 102 103 Args: 104 segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`). 105 groundtruth: The groundtruth segmentation; same shape as ``segmentation``. 106 ignore_seg: Labels to ignore in the segmentation (their voxels are excluded). 107 ignore_gt: Labels to ignore in the groundtruth (their voxels are excluded). 108 use_log2: Whether to use ``log2`` (bits) or natural ``log`` (nats). 109 num_workers: Number of parallel workers used to build the contingency table. 110 block_shape: Shape of the processing blocks. Defaults to the input chunk shape. 111 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 112 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 113 mask: Optional binary mask; voxels outside the mask are excluded. 114 115 Returns: 116 The split variation of information and the merge variation of information. 117 """ 118 table = build_table(segmentation, groundtruth, ignore_seg=ignore_seg, ignore_gt=ignore_gt, 119 num_workers=num_workers, block_shape=block_shape, job_type=job_type, 120 job_config=job_config, mask=mask) 121 return vi_scores(table, use_log2=use_log2) 122 123 124def object_vi( 125 segmentation: SourceLike, 126 groundtruth: SourceLike, 127 *, 128 ignore_seg: Optional[Sequence[int]] = None, 129 ignore_gt: Optional[Sequence[int]] = None, 130 use_log2: bool = True, 131 num_workers: int = 1, 132 block_shape: Optional[Tuple[int, ...]] = None, 133 job_type: str = "local", 134 job_config: Optional[RunnerConfig] = None, 135 mask: Optional[SourceLike] = None, 136) -> "pd.DataFrame": 137 """Compute the per-groundtruth-object variation of information between two segmentations. 138 139 Args: 140 segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`). 141 groundtruth: The groundtruth segmentation; same shape as ``segmentation``. 142 ignore_seg: Labels to ignore in the segmentation (their voxels are excluded). 143 ignore_gt: Labels to ignore in the groundtruth (their voxels are excluded). 144 use_log2: Whether to use ``log2`` (bits) or natural ``log`` (nats). 145 num_workers: Number of parallel workers used to build the contingency table. 146 block_shape: Shape of the processing blocks. Defaults to the input chunk shape. 147 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 148 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 149 mask: Optional binary mask; voxels outside the mask are excluded. 150 151 Returns: 152 A pandas DataFrame with one row per groundtruth object (columns ``label``, ``vi_split``, 153 ``vi_merge``). 154 """ 155 table = build_table(segmentation, groundtruth, ignore_seg=ignore_seg, ignore_gt=ignore_gt, 156 num_workers=num_workers, block_shape=block_shape, job_type=job_type, 157 job_config=job_config, mask=mask) 158 return object_vi_scores(table, use_log2=use_log2)
31def vi_scores(table: ContingencyTable, *, use_log2: bool = True) -> Tuple[float, float]: 32 """Compute the split and merge variation of information from a contingency table. 33 34 Args: 35 table: A contingency table built as ``contingency_table(segmentation, groundtruth)``. 36 use_log2: Whether to use ``log2`` (bits) or natural ``log`` (nats). 37 38 Returns: 39 The split variation of information (``H(seg | gt)``) and the merge variation of information 40 (``H(gt | seg)``). 41 """ 42 n = table.n_points 43 if n == 0: 44 return 0.0, 0.0 45 log = np.log2 if use_log2 else np.log 46 counts = table.counts.astype("float64") 47 pa = table.sizes_a.astype("float64") / n 48 pb = table.sizes_b.astype("float64") / n 49 h_a = -np.sum(pa * log(pa)) 50 h_b = -np.sum(pb * log(pb)) 51 sa, sb = _pair_sizes(table) 52 mutual = np.sum(counts / n * log(n * counts / (sa * sb))) 53 return float(h_a - mutual), float(h_b - mutual)
Compute the split and merge variation of information from a contingency table.
Args:
table: A contingency table built as contingency_table(segmentation, groundtruth).
use_log2: Whether to use log2 (bits) or natural log (nats).
Returns:
The split variation of information (H(seg | gt)) and the merge variation of information
(H(gt | seg)).
56def object_vi_scores(table: ContingencyTable, *, use_log2: bool = True) -> "pd.DataFrame": 57 """Compute the per-groundtruth-object variation of information from a contingency table. 58 59 Based on https://arxiv.org/pdf/1708.02599.pdf (page 16). 60 61 Args: 62 table: A contingency table built as ``contingency_table(segmentation, groundtruth)``. 63 use_log2: Whether to use ``log2`` (bits) or natural ``log`` (nats). 64 65 Returns: 66 A pandas DataFrame with one row per groundtruth object, sorted by label, with columns 67 ``label`` (groundtruth id), ``vi_split`` and ``vi_merge``. 68 """ 69 if table.pairs.shape[0] == 0: 70 return pd.DataFrame({"label": pd.Series(dtype="uint64"), 71 "vi_split": pd.Series(dtype="float64"), 72 "vi_merge": pd.Series(dtype="float64")}) 73 log = np.log2 if use_log2 else np.log 74 counts = table.counts.astype("float64") 75 sa, sb = _pair_sizes(table) 76 77 # Group the pairs by their groundtruth (B) label. 78 order = np.argsort(table.pairs[:, 1], kind="stable") 79 b_sorted = table.pairs[:, 1][order] 80 c, sa_o, sb_o = counts[order], sa[order], sb[order] 81 starts = np.flatnonzero(np.concatenate(([True], b_sorted[1:] != b_sorted[:-1]))) 82 83 vi_merge = np.add.reduceat(-(c / sb_o) * log(c / sb_o), starts) 84 vi_split = np.add.reduceat(-(c / sb_o) * log(c / sa_o), starts) 85 return pd.DataFrame({"label": b_sorted[starts].astype("uint64"), 86 "vi_split": vi_split, "vi_merge": vi_merge}).reset_index(drop=True)
Compute the per-groundtruth-object variation of information from a contingency table.
Based on https://arxiv.org/pdf/1708.02599.pdf (page 16).
Args:
table: A contingency table built as contingency_table(segmentation, groundtruth).
use_log2: Whether to use log2 (bits) or natural log (nats).
Returns:
A pandas DataFrame with one row per groundtruth object, sorted by label, with columns
label (groundtruth id), vi_split and vi_merge.
89def variation_of_information( 90 segmentation: SourceLike, 91 groundtruth: SourceLike, 92 *, 93 ignore_seg: Optional[Sequence[int]] = None, 94 ignore_gt: Optional[Sequence[int]] = None, 95 use_log2: bool = True, 96 num_workers: int = 1, 97 block_shape: Optional[Tuple[int, ...]] = None, 98 job_type: str = "local", 99 job_config: Optional[RunnerConfig] = None, 100 mask: Optional[SourceLike] = None, 101) -> Tuple[float, float]: 102 """Compute the split and merge variation of information between two segmentations. 103 104 Args: 105 segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`). 106 groundtruth: The groundtruth segmentation; same shape as ``segmentation``. 107 ignore_seg: Labels to ignore in the segmentation (their voxels are excluded). 108 ignore_gt: Labels to ignore in the groundtruth (their voxels are excluded). 109 use_log2: Whether to use ``log2`` (bits) or natural ``log`` (nats). 110 num_workers: Number of parallel workers used to build the contingency table. 111 block_shape: Shape of the processing blocks. Defaults to the input chunk shape. 112 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 113 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 114 mask: Optional binary mask; voxels outside the mask are excluded. 115 116 Returns: 117 The split variation of information and the merge variation of information. 118 """ 119 table = build_table(segmentation, groundtruth, ignore_seg=ignore_seg, ignore_gt=ignore_gt, 120 num_workers=num_workers, block_shape=block_shape, job_type=job_type, 121 job_config=job_config, mask=mask) 122 return vi_scores(table, use_log2=use_log2)
Compute the split and merge variation of information between two segmentations.
Args:
segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a Source).
groundtruth: The groundtruth segmentation; same shape as segmentation.
ignore_seg: Labels to ignore in the segmentation (their voxels are excluded).
ignore_gt: Labels to ignore in the groundtruth (their voxels are excluded).
use_log2: Whether to use log2 (bits) or natural log (nats).
num_workers: Number of parallel workers used to build the contingency table.
block_shape: Shape of the processing blocks. Defaults to the input chunk shape.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
mask: Optional binary mask; voxels outside the mask are excluded.
Returns: The split variation of information and the merge variation of information.
125def object_vi( 126 segmentation: SourceLike, 127 groundtruth: SourceLike, 128 *, 129 ignore_seg: Optional[Sequence[int]] = None, 130 ignore_gt: Optional[Sequence[int]] = None, 131 use_log2: bool = True, 132 num_workers: int = 1, 133 block_shape: Optional[Tuple[int, ...]] = None, 134 job_type: str = "local", 135 job_config: Optional[RunnerConfig] = None, 136 mask: Optional[SourceLike] = None, 137) -> "pd.DataFrame": 138 """Compute the per-groundtruth-object variation of information between two segmentations. 139 140 Args: 141 segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a `Source`). 142 groundtruth: The groundtruth segmentation; same shape as ``segmentation``. 143 ignore_seg: Labels to ignore in the segmentation (their voxels are excluded). 144 ignore_gt: Labels to ignore in the groundtruth (their voxels are excluded). 145 use_log2: Whether to use ``log2`` (bits) or natural ``log`` (nats). 146 num_workers: Number of parallel workers used to build the contingency table. 147 block_shape: Shape of the processing blocks. Defaults to the input chunk shape. 148 job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``. 149 job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`). 150 mask: Optional binary mask; voxels outside the mask are excluded. 151 152 Returns: 153 A pandas DataFrame with one row per groundtruth object (columns ``label``, ``vi_split``, 154 ``vi_merge``). 155 """ 156 table = build_table(segmentation, groundtruth, ignore_seg=ignore_seg, ignore_gt=ignore_gt, 157 num_workers=num_workers, block_shape=block_shape, job_type=job_type, 158 job_config=job_config, mask=mask) 159 return object_vi_scores(table, use_log2=use_log2)
Compute the per-groundtruth-object variation of information between two segmentations.
Args:
segmentation: Candidate segmentation to evaluate (a numpy/zarr/n5 array or a Source).
groundtruth: The groundtruth segmentation; same shape as segmentation.
ignore_seg: Labels to ignore in the segmentation (their voxels are excluded).
ignore_gt: Labels to ignore in the groundtruth (their voxels are excluded).
use_log2: Whether to use log2 (bits) or natural log (nats).
num_workers: Number of parallel workers used to build the contingency table.
block_shape: Shape of the processing blocks. Defaults to the input chunk shape.
job_type: Execution backend: one of "local", "subprocess" or "slurm".
job_config: Backend configuration (a RunnerConfig / SlurmConfig).
mask: Optional binary mask; voxels outside the mask are excluded.
Returns:
A pandas DataFrame with one row per groundtruth object (columns label, vi_split,
vi_merge).