synapse_net.ground_truth.az_evaluation

View Source

  1import os
  2from typing import List, Optional
  3
  4import h5py
  5import pandas as pd
  6import numpy as np
  7import vigra
  8
  9from elf.evaluation.matching import _compute_scores, _compute_tps
 10from elf.evaluation import dice_score
 11from elf.segmentation.workflows import simple_multicut_workflow
 12from scipy.ndimage import binary_dilation, binary_closing, distance_transform_edt, binary_opening
 13from skimage.measure import label, regionprops, regionprops_table
 14from skimage.segmentation import relabel_sequential, watershed
 15from tqdm import tqdm
 16
 17
 18def _expand_seg(az, iterations):
 19    return binary_closing(binary_dilation(az, iterations=iterations), iterations=iterations)
 20
 21
 22def _crop(seg, gt, return_bb=False):
 23    bb_seg, bb_gt = np.where(seg), np.where(gt)
 24
 25    # Handle empty segmentations.
 26    if bb_seg[0].size == 0:
 27        bb = tuple(slice(bgt.min(), bgt.max() + 1) for bseg, bgt in zip(bb_seg, bb_gt))
 28    else:
 29        bb = tuple(slice(
 30            min(bseg.min(), bgt.min()), max(bseg.max(), bgt.max()) + 1
 31        ) for bseg, bgt in zip(bb_seg, bb_gt))
 32
 33    if return_bb:
 34        return seg[bb], gt[bb], bb
 35    else:
 36        return seg[bb], gt[bb]
 37
 38
 39def _postprocess(data, apply_cc, min_component_size, iterations=0):
 40    if iterations > 0:
 41        data = _expand_seg(data, iterations)
 42    if apply_cc:
 43        data = label(data)
 44        ids, sizes = np.unique(data, return_counts=True)
 45        filter_ids = ids[sizes < min_component_size]
 46        data[np.isin(data, filter_ids)] = 0
 47        data, _, _ = relabel_sequential(data)
 48    return data
 49
 50
 51def _single_az_evaluation(seg, gt, apply_cc, min_component_size, iterations, criterion):
 52    assert seg.shape == gt.shape, f"{seg.shape}, {gt.shape}"
 53    dice = dice_score(seg > 0, gt > 0)
 54
 55    seg = _postprocess(seg, apply_cc, min_component_size, iterations=iterations)
 56    gt = _postprocess(gt, apply_cc, min_component_size=500)
 57
 58    n_true, n_matched, n_pred, scores = _compute_scores(seg, gt, criterion=criterion, ignore_label=0)
 59    tp = _compute_tps(scores, n_matched, threshold=0.5)
 60    fp = n_pred - tp
 61    fn = n_true - tp
 62
 63    return {"tp": tp, "fp": fp, "fn": fn, "dice": dice}
 64
 65
 66def az_evaluation(
 67    seg_paths: List[str],
 68    gt_paths: List[str],
 69    seg_key: str,
 70    gt_key: str,
 71    crop: bool = True,
 72    apply_cc: bool = True,
 73    min_component_size: int = 5000,
 74    iterations: int = 3,
 75    criterion: str = "iou",
 76    threshold: Optional[float] = None,
 77    **extra_cols
 78) -> pd.DataFrame:
 79    """Evaluate active zone segmentations against ground-truth annotations.
 80
 81    This computes the dice score as well as false positives, false negatives and true positives
 82    for each segmented tomogram.
 83
 84    Args:
 85        seg_paths: The filepaths to the segmentations, stored as hd5 files.
 86        gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files.
 87        seg_key: The internal path to the data in the segmentation hdf5 file.
 88        gt_key: The internal path to the data in the ground-truth hdf5 file.
 89        crop: Whether to crop the segmentation and ground-truth to the bounding box.
 90        apply_cc: Whether to apply connected components before evaluation.
 91        min_component_size: Minimum component size for filtering the segmentation before evaluation.
 92        iterations: Post-processing iterations for expanding the AZ annotations.
 93        criterion: The criterion for matching annotations and segmentations
 94        threshold: Threshold applied to the segmentation. This is required if the segmentation is passed as
 95        probability prediction instead of a binary segmentation. Possible values: 'iou', 'iop', 'iot'.
 96        extra_cols: Additional columns for the result table.
 97
 98    Returns:
 99        A data frame with the evaluation results per tomogram.
100    """
101    assert len(seg_paths) == len(gt_paths)
102
103    results = {key: [] for key in extra_cols.keys()}
104    results.update({
105        "tomo_name": [],
106        "tp": [],
107        "fp": [],
108        "fn": [],
109        "dice": [],
110    })
111
112    i = 0
113    for seg_path, gt_path in tqdm(zip(seg_paths, gt_paths), total=len(seg_paths), desc="Run AZ Eval"):
114        with h5py.File(seg_path, "r") as f:
115            if seg_key not in f:
116                print("Segmentation", seg_key, "could not be found in", seg_path)
117                i += 1
118                continue
119            seg = f[seg_key][:].squeeze()
120
121        with h5py.File(gt_path, "r") as f:
122            gt = f[gt_key][:]
123
124        if threshold is not None:
125            seg = seg > threshold
126
127        if crop:
128            seg, gt = _crop(seg, gt)
129
130        result = _single_az_evaluation(seg, gt, apply_cc, min_component_size, iterations, criterion=criterion)
131        results["tomo_name"].append(os.path.basename(seg_path))
132        for res in ("tp", "fp", "fn", "dice"):
133            results[res].append(result[res])
134        for name, val in extra_cols.items():
135            results[name].append(val[i])
136        i += 1
137
138    return pd.DataFrame(results)
139
140
141def _get_presynaptic_mask(boundary_map, vesicles):
142    mask = np.zeros(vesicles.shape, dtype="bool")
143
144    def _compute_mask_2d(z):
145        distances = distance_transform_edt(boundary_map[z] < 0.25).astype("float32")
146        seeds = vigra.analysis.localMaxima(distances, marker=np.nan, allowAtBorder=True, allowPlateaus=True)
147        seeds = label(np.isnan(seeds))
148        overseg = watershed(boundary_map[z], markers=seeds)
149        seg = simple_multicut_workflow(
150            boundary_map[z], use_2dws=False, watershed=overseg, n_threads=1, beta=0.6
151        )
152
153        def n_vesicles(mask, seg):
154            return len(np.unique(seg[mask])) - 1
155
156        props = pd.DataFrame(regionprops_table(seg, vesicles[z], properties=["label"], extra_properties=[n_vesicles]))
157        ids, n_ves = props.label.values, props.n_vesicles.values
158        presyn_id = ids[np.argmax(n_ves)]
159
160        mask[z] = seg == presyn_id
161
162    for z in range(mask.shape[0]):
163        _compute_mask_2d(z)
164
165    mask = binary_opening(mask, iterations=5)
166
167    return mask
168
169
170def thin_az(
171    az_segmentation: np.ndarray,
172    boundary_map: np.typing.ArrayLike,
173    vesicles: np.typing.ArrayLike,
174    tomo: Optional[np.typing.ArrayLike] = None,
175    presyn_dist: int = 6,
176    min_thinning_size: int = 2500,
177    post_closing: int = 2,
178    check: bool = False,
179) -> np.ndarray:
180    """Thin the active zone annotations by restricting them to a certain distance from the presynaptic mask.
181
182    Args:
183        az_segmentation: The active zone annotations.
184        boundary_map: The boundary / membrane predictions.
185        vesicles: The vesicle segmentation.
186        tomo: The tomogram data. Optional, will only be used for evaluation.
187        presyn_dist: The maximal distance to the presynaptic compartment, which is used for thinning.
188        min_thinning_size: The minimal size for a label component.
189        post_closing: Closing iterations to apply to the AZ annotations after thinning.
190        check: Whether to visually check the results.
191
192    Returns:
193        The thinned AZ annotations.
194    """
195    az_segmentation = label(az_segmentation)
196    thinned_az = np.zeros(az_segmentation.shape, dtype="uint8")
197    props = regionprops(az_segmentation)
198
199    min_bb_shape = (32, 384, 384)
200
201    for prop in props:
202        az_id = prop.label
203
204        bb = tuple(slice(start, stop) for start, stop in zip(prop.bbox[:3], prop.bbox[3:]))
205        pad_width = [max(sh - (b.stop - b.start), 0) // 2 for b, sh in zip(bb, min_bb_shape)]
206        bb = tuple(
207            slice(max(b.start - pw, 0), min(b.stop + pw, sh)) for b, pw, sh in zip(bb, pad_width, az_segmentation.shape)
208        )
209
210        # If this is a small component then we discard it. This is likely some artifact in the ground-truth.
211        if prop.area < min_thinning_size:
212            continue
213
214        # First, get everything for this bounding box.
215        az_bb = (az_segmentation[bb] == az_id)
216        vesicles_bb = vesicles[bb]
217        # Skip if we don't have a vesicle.
218        if vesicles[bb].max() == 0:
219            continue
220
221        mask_bb = _get_presynaptic_mask(boundary_map[bb], vesicles_bb)
222
223        # Apply post-processing to filter out only the parts of the AZ close to the presynaptic mask.
224        distances = np.stack([distance_transform_edt(mask_bb[z] == 0) for z in range(mask_bb.shape[0])])
225        az_bb[distances > presyn_dist] = 0
226        az_bb = np.logical_or(binary_closing(az_bb, iterations=post_closing), az_bb)
227
228        if check:
229            import napari
230            tomo_bb = tomo[bb]
231
232            v = napari.Viewer()
233            v.add_image(tomo_bb)
234            v.add_labels(az_bb.astype("uint8"), name="az-thinned")
235            v.add_labels(az_segmentation[bb], name="az", visible=False)
236            v.add_labels(mask_bb, visible=False)
237            v.title = f"{prop.label}: {prop.area}"
238
239            napari.run()
240
241        thinned_az[bb][az_bb] = 1
242
243    return thinned_az

def az_evaluation( seg_paths: List[str], gt_paths: List[str], seg_key: str, gt_key: str, crop: bool = True, apply_cc: bool = True, min_component_size: int = 5000, iterations: int = 3, criterion: str = 'iou', threshold: Optional[float] = None, **extra_cols) -> pandas.core.frame.DataFrame: View Source

 67def az_evaluation(
 68    seg_paths: List[str],
 69    gt_paths: List[str],
 70    seg_key: str,
 71    gt_key: str,
 72    crop: bool = True,
 73    apply_cc: bool = True,
 74    min_component_size: int = 5000,
 75    iterations: int = 3,
 76    criterion: str = "iou",
 77    threshold: Optional[float] = None,
 78    **extra_cols
 79) -> pd.DataFrame:
 80    """Evaluate active zone segmentations against ground-truth annotations.
 81
 82    This computes the dice score as well as false positives, false negatives and true positives
 83    for each segmented tomogram.
 84
 85    Args:
 86        seg_paths: The filepaths to the segmentations, stored as hd5 files.
 87        gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files.
 88        seg_key: The internal path to the data in the segmentation hdf5 file.
 89        gt_key: The internal path to the data in the ground-truth hdf5 file.
 90        crop: Whether to crop the segmentation and ground-truth to the bounding box.
 91        apply_cc: Whether to apply connected components before evaluation.
 92        min_component_size: Minimum component size for filtering the segmentation before evaluation.
 93        iterations: Post-processing iterations for expanding the AZ annotations.
 94        criterion: The criterion for matching annotations and segmentations
 95        threshold: Threshold applied to the segmentation. This is required if the segmentation is passed as
 96        probability prediction instead of a binary segmentation. Possible values: 'iou', 'iop', 'iot'.
 97        extra_cols: Additional columns for the result table.
 98
 99    Returns:
100        A data frame with the evaluation results per tomogram.
101    """
102    assert len(seg_paths) == len(gt_paths)
103
104    results = {key: [] for key in extra_cols.keys()}
105    results.update({
106        "tomo_name": [],
107        "tp": [],
108        "fp": [],
109        "fn": [],
110        "dice": [],
111    })
112
113    i = 0
114    for seg_path, gt_path in tqdm(zip(seg_paths, gt_paths), total=len(seg_paths), desc="Run AZ Eval"):
115        with h5py.File(seg_path, "r") as f:
116            if seg_key not in f:
117                print("Segmentation", seg_key, "could not be found in", seg_path)
118                i += 1
119                continue
120            seg = f[seg_key][:].squeeze()
121
122        with h5py.File(gt_path, "r") as f:
123            gt = f[gt_key][:]
124
125        if threshold is not None:
126            seg = seg > threshold
127
128        if crop:
129            seg, gt = _crop(seg, gt)
130
131        result = _single_az_evaluation(seg, gt, apply_cc, min_component_size, iterations, criterion=criterion)
132        results["tomo_name"].append(os.path.basename(seg_path))
133        for res in ("tp", "fp", "fn", "dice"):
134            results[res].append(result[res])
135        for name, val in extra_cols.items():
136            results[name].append(val[i])
137        i += 1
138
139    return pd.DataFrame(results)

Evaluate active zone segmentations against ground-truth annotations.

This computes the dice score as well as false positives, false negatives and true positives for each segmented tomogram.

Arguments:

seg_paths: The filepaths to the segmentations, stored as hd5 files.
gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files.
seg_key: The internal path to the data in the segmentation hdf5 file.
gt_key: The internal path to the data in the ground-truth hdf5 file.
crop: Whether to crop the segmentation and ground-truth to the bounding box.
apply_cc: Whether to apply connected components before evaluation.
min_component_size: Minimum component size for filtering the segmentation before evaluation.
iterations: Post-processing iterations for expanding the AZ annotations.
criterion: The criterion for matching annotations and segmentations
threshold: Threshold applied to the segmentation. This is required if the segmentation is passed as
probability prediction instead of a binary segmentation. Possible values: 'iou', 'iop', 'iot'.
extra_cols: Additional columns for the result table.

Returns:

A data frame with the evaluation results per tomogram.

def thin_az( az_segmentation: numpy.ndarray, boundary_map: Union[Buffer, numpy._typing._array_like._SupportsArray[numpy.dtype[Any]], numpy._typing._nested_sequence._NestedSequence[numpy._typing._array_like._SupportsArray[numpy.dtype[Any]]], complex, bytes, str, numpy._typing._nested_sequence._NestedSequence[complex | bytes | str]], vesicles: Union[Buffer, numpy._typing._array_like._SupportsArray[numpy.dtype[Any]], numpy._typing._nested_sequence._NestedSequence[numpy._typing._array_like._SupportsArray[numpy.dtype[Any]]], complex, bytes, str, numpy._typing._nested_sequence._NestedSequence[complex | bytes | str]], tomo: Union[Buffer, numpy._typing._array_like._SupportsArray[numpy.dtype[Any]], numpy._typing._nested_sequence._NestedSequence[numpy._typing._array_like._SupportsArray[numpy.dtype[Any]]], complex, bytes, str, numpy._typing._nested_sequence._NestedSequence[complex | bytes | str], NoneType] = None, presyn_dist: int = 6, min_thinning_size: int = 2500, post_closing: int = 2, check: bool = False) -> numpy.ndarray: View Source

171def thin_az(
172    az_segmentation: np.ndarray,
173    boundary_map: np.typing.ArrayLike,
174    vesicles: np.typing.ArrayLike,
175    tomo: Optional[np.typing.ArrayLike] = None,
176    presyn_dist: int = 6,
177    min_thinning_size: int = 2500,
178    post_closing: int = 2,
179    check: bool = False,
180) -> np.ndarray:
181    """Thin the active zone annotations by restricting them to a certain distance from the presynaptic mask.
182
183    Args:
184        az_segmentation: The active zone annotations.
185        boundary_map: The boundary / membrane predictions.
186        vesicles: The vesicle segmentation.
187        tomo: The tomogram data. Optional, will only be used for evaluation.
188        presyn_dist: The maximal distance to the presynaptic compartment, which is used for thinning.
189        min_thinning_size: The minimal size for a label component.
190        post_closing: Closing iterations to apply to the AZ annotations after thinning.
191        check: Whether to visually check the results.
192
193    Returns:
194        The thinned AZ annotations.
195    """
196    az_segmentation = label(az_segmentation)
197    thinned_az = np.zeros(az_segmentation.shape, dtype="uint8")
198    props = regionprops(az_segmentation)
199
200    min_bb_shape = (32, 384, 384)
201
202    for prop in props:
203        az_id = prop.label
204
205        bb = tuple(slice(start, stop) for start, stop in zip(prop.bbox[:3], prop.bbox[3:]))
206        pad_width = [max(sh - (b.stop - b.start), 0) // 2 for b, sh in zip(bb, min_bb_shape)]
207        bb = tuple(
208            slice(max(b.start - pw, 0), min(b.stop + pw, sh)) for b, pw, sh in zip(bb, pad_width, az_segmentation.shape)
209        )
210
211        # If this is a small component then we discard it. This is likely some artifact in the ground-truth.
212        if prop.area < min_thinning_size:
213            continue
214
215        # First, get everything for this bounding box.
216        az_bb = (az_segmentation[bb] == az_id)
217        vesicles_bb = vesicles[bb]
218        # Skip if we don't have a vesicle.
219        if vesicles[bb].max() == 0:
220            continue
221
222        mask_bb = _get_presynaptic_mask(boundary_map[bb], vesicles_bb)
223
224        # Apply post-processing to filter out only the parts of the AZ close to the presynaptic mask.
225        distances = np.stack([distance_transform_edt(mask_bb[z] == 0) for z in range(mask_bb.shape[0])])
226        az_bb[distances > presyn_dist] = 0
227        az_bb = np.logical_or(binary_closing(az_bb, iterations=post_closing), az_bb)
228
229        if check:
230            import napari
231            tomo_bb = tomo[bb]
232
233            v = napari.Viewer()
234            v.add_image(tomo_bb)
235            v.add_labels(az_bb.astype("uint8"), name="az-thinned")
236            v.add_labels(az_segmentation[bb], name="az", visible=False)
237            v.add_labels(mask_bb, visible=False)
238            v.title = f"{prop.label}: {prop.area}"
239
240            napari.run()
241
242        thinned_az[bb][az_bb] = 1
243
244    return thinned_az

Thin the active zone annotations by restricting them to a certain distance from the presynaptic mask.

Arguments:

az_segmentation: The active zone annotations.
boundary_map: The boundary / membrane predictions.
vesicles: The vesicle segmentation.
tomo: The tomogram data. Optional, will only be used for evaluation.
presyn_dist: The maximal distance to the presynaptic compartment, which is used for thinning.
min_thinning_size: The minimal size for a label component.
post_closing: Closing iterations to apply to the AZ annotations after thinning.
check: Whether to visually check the results.

Returns:

The thinned AZ annotations.