synapse_net.ground_truth.az_evaluation
1import os 2from typing import List, Optional 3 4import h5py 5import pandas as pd 6import numpy as np 7import vigra 8 9from elf.evaluation.matching import _compute_scores, _compute_tps 10from elf.evaluation import dice_score 11from elf.segmentation.workflows import simple_multicut_workflow 12from scipy.ndimage import binary_dilation, binary_closing, distance_transform_edt, binary_opening 13from skimage.measure import label, regionprops, regionprops_table 14from skimage.segmentation import relabel_sequential, watershed 15from tqdm import tqdm 16 17 18def _expand_seg(az, iterations): 19 return binary_closing(binary_dilation(az, iterations=iterations), iterations=iterations) 20 21 22def _crop(seg, gt, return_bb=False): 23 bb_seg, bb_gt = np.where(seg), np.where(gt) 24 25 # Handle empty segmentations. 26 if bb_seg[0].size == 0: 27 bb = tuple(slice(bgt.min(), bgt.max() + 1) for bseg, bgt in zip(bb_seg, bb_gt)) 28 else: 29 bb = tuple(slice( 30 min(bseg.min(), bgt.min()), max(bseg.max(), bgt.max()) + 1 31 ) for bseg, bgt in zip(bb_seg, bb_gt)) 32 33 if return_bb: 34 return seg[bb], gt[bb], bb 35 else: 36 return seg[bb], gt[bb] 37 38 39def _postprocess(data, apply_cc, min_component_size, iterations=0): 40 if iterations > 0: 41 data = _expand_seg(data, iterations) 42 if apply_cc: 43 data = label(data) 44 ids, sizes = np.unique(data, return_counts=True) 45 filter_ids = ids[sizes < min_component_size] 46 data[np.isin(data, filter_ids)] = 0 47 data, _, _ = relabel_sequential(data) 48 return data 49 50 51def _single_az_evaluation(seg, gt, apply_cc, min_component_size, iterations, criterion): 52 assert seg.shape == gt.shape, f"{seg.shape}, {gt.shape}" 53 dice = dice_score(seg > 0, gt > 0) 54 55 seg = _postprocess(seg, apply_cc, min_component_size, iterations=iterations) 56 gt = _postprocess(gt, apply_cc, min_component_size=500) 57 58 n_true, n_matched, n_pred, scores = _compute_scores(seg, gt, criterion=criterion, ignore_label=0) 59 tp = _compute_tps(scores, n_matched, threshold=0.5) 60 fp = n_pred - tp 61 fn = n_true - tp 62 63 return {"tp": tp, "fp": fp, "fn": fn, "dice": dice} 64 65 66def az_evaluation( 67 seg_paths: List[str], 68 gt_paths: List[str], 69 seg_key: str, 70 gt_key: str, 71 crop: bool = True, 72 apply_cc: bool = True, 73 min_component_size: int = 5000, 74 iterations: int = 3, 75 criterion: str = "iou", 76 threshold: Optional[float] = None, 77 **extra_cols 78) -> pd.DataFrame: 79 """Evaluate active zone segmentations against ground-truth annotations. 80 81 This computes the dice score as well as false positives, false negatives and true positives 82 for each segmented tomogram. 83 84 Args: 85 seg_paths: The filepaths to the segmentations, stored as hd5 files. 86 gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files. 87 seg_key: The internal path to the data in the segmentation hdf5 file. 88 gt_key: The internal path to the data in the ground-truth hdf5 file. 89 crop: Whether to crop the segmentation and ground-truth to the bounding box. 90 apply_cc: Whether to apply connected components before evaluation. 91 min_component_size: Minimum component size for filtering the segmentation before evaluation. 92 iterations: Post-processing iterations for expanding the AZ annotations. 93 criterion: The criterion for matching annotations and segmentations 94 threshold: Threshold applied to the segmentation. This is required if the segmentation is passed as 95 probability prediction instead of a binary segmentation. Possible values: 'iou', 'iop', 'iot'. 96 extra_cols: Additional columns for the result table. 97 98 Returns: 99 A data frame with the evaluation results per tomogram. 100 """ 101 assert len(seg_paths) == len(gt_paths) 102 103 results = {key: [] for key in extra_cols.keys()} 104 results.update({ 105 "tomo_name": [], 106 "tp": [], 107 "fp": [], 108 "fn": [], 109 "dice": [], 110 }) 111 112 i = 0 113 for seg_path, gt_path in tqdm(zip(seg_paths, gt_paths), total=len(seg_paths), desc="Run AZ Eval"): 114 with h5py.File(seg_path, "r") as f: 115 if seg_key not in f: 116 print("Segmentation", seg_key, "could not be found in", seg_path) 117 i += 1 118 continue 119 seg = f[seg_key][:].squeeze() 120 121 with h5py.File(gt_path, "r") as f: 122 gt = f[gt_key][:] 123 124 if threshold is not None: 125 seg = seg > threshold 126 127 if crop: 128 seg, gt = _crop(seg, gt) 129 130 result = _single_az_evaluation(seg, gt, apply_cc, min_component_size, iterations, criterion=criterion) 131 results["tomo_name"].append(os.path.basename(seg_path)) 132 for res in ("tp", "fp", "fn", "dice"): 133 results[res].append(result[res]) 134 for name, val in extra_cols.items(): 135 results[name].append(val[i]) 136 i += 1 137 138 return pd.DataFrame(results) 139 140 141def _get_presynaptic_mask(boundary_map, vesicles): 142 mask = np.zeros(vesicles.shape, dtype="bool") 143 144 def _compute_mask_2d(z): 145 distances = distance_transform_edt(boundary_map[z] < 0.25).astype("float32") 146 seeds = vigra.analysis.localMaxima(distances, marker=np.nan, allowAtBorder=True, allowPlateaus=True) 147 seeds = label(np.isnan(seeds)) 148 overseg = watershed(boundary_map[z], markers=seeds) 149 seg = simple_multicut_workflow( 150 boundary_map[z], use_2dws=False, watershed=overseg, n_threads=1, beta=0.6 151 ) 152 153 def n_vesicles(mask, seg): 154 return len(np.unique(seg[mask])) - 1 155 156 props = pd.DataFrame(regionprops_table(seg, vesicles[z], properties=["label"], extra_properties=[n_vesicles])) 157 ids, n_ves = props.label.values, props.n_vesicles.values 158 presyn_id = ids[np.argmax(n_ves)] 159 160 mask[z] = seg == presyn_id 161 162 for z in range(mask.shape[0]): 163 _compute_mask_2d(z) 164 165 mask = binary_opening(mask, iterations=5) 166 167 return mask 168 169 170def thin_az( 171 az_segmentation: np.ndarray, 172 boundary_map: np.typing.ArrayLike, 173 vesicles: np.typing.ArrayLike, 174 tomo: Optional[np.typing.ArrayLike] = None, 175 presyn_dist: int = 6, 176 min_thinning_size: int = 2500, 177 post_closing: int = 2, 178 check: bool = False, 179) -> np.ndarray: 180 """Thin the active zone annotations by restricting them to a certain distance from the presynaptic mask. 181 182 Args: 183 az_segmentation: The active zone annotations. 184 boundary_map: The boundary / membrane predictions. 185 vesicles: The vesicle segmentation. 186 tomo: The tomogram data. Optional, will only be used for evaluation. 187 presyn_dist: The maximal distance to the presynaptic compartment, which is used for thinning. 188 min_thinning_size: The minimal size for a label component. 189 post_closing: Closing iterations to apply to the AZ annotations after thinning. 190 check: Whether to visually check the results. 191 192 Returns: 193 The thinned AZ annotations. 194 """ 195 az_segmentation = label(az_segmentation) 196 thinned_az = np.zeros(az_segmentation.shape, dtype="uint8") 197 props = regionprops(az_segmentation) 198 199 min_bb_shape = (32, 384, 384) 200 201 for prop in props: 202 az_id = prop.label 203 204 bb = tuple(slice(start, stop) for start, stop in zip(prop.bbox[:3], prop.bbox[3:])) 205 pad_width = [max(sh - (b.stop - b.start), 0) // 2 for b, sh in zip(bb, min_bb_shape)] 206 bb = tuple( 207 slice(max(b.start - pw, 0), min(b.stop + pw, sh)) for b, pw, sh in zip(bb, pad_width, az_segmentation.shape) 208 ) 209 210 # If this is a small component then we discard it. This is likely some artifact in the ground-truth. 211 if prop.area < min_thinning_size: 212 continue 213 214 # First, get everything for this bounding box. 215 az_bb = (az_segmentation[bb] == az_id) 216 vesicles_bb = vesicles[bb] 217 # Skip if we don't have a vesicle. 218 if vesicles[bb].max() == 0: 219 continue 220 221 mask_bb = _get_presynaptic_mask(boundary_map[bb], vesicles_bb) 222 223 # Apply post-processing to filter out only the parts of the AZ close to the presynaptic mask. 224 distances = np.stack([distance_transform_edt(mask_bb[z] == 0) for z in range(mask_bb.shape[0])]) 225 az_bb[distances > presyn_dist] = 0 226 az_bb = np.logical_or(binary_closing(az_bb, iterations=post_closing), az_bb) 227 228 if check: 229 import napari 230 tomo_bb = tomo[bb] 231 232 v = napari.Viewer() 233 v.add_image(tomo_bb) 234 v.add_labels(az_bb.astype("uint8"), name="az-thinned") 235 v.add_labels(az_segmentation[bb], name="az", visible=False) 236 v.add_labels(mask_bb, visible=False) 237 v.title = f"{prop.label}: {prop.area}" 238 239 napari.run() 240 241 thinned_az[bb][az_bb] = 1 242 243 return thinned_az
def
az_evaluation( seg_paths: List[str], gt_paths: List[str], seg_key: str, gt_key: str, crop: bool = True, apply_cc: bool = True, min_component_size: int = 5000, iterations: int = 3, criterion: str = 'iou', threshold: Optional[float] = None, **extra_cols) -> pandas.core.frame.DataFrame:
67def az_evaluation( 68 seg_paths: List[str], 69 gt_paths: List[str], 70 seg_key: str, 71 gt_key: str, 72 crop: bool = True, 73 apply_cc: bool = True, 74 min_component_size: int = 5000, 75 iterations: int = 3, 76 criterion: str = "iou", 77 threshold: Optional[float] = None, 78 **extra_cols 79) -> pd.DataFrame: 80 """Evaluate active zone segmentations against ground-truth annotations. 81 82 This computes the dice score as well as false positives, false negatives and true positives 83 for each segmented tomogram. 84 85 Args: 86 seg_paths: The filepaths to the segmentations, stored as hd5 files. 87 gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files. 88 seg_key: The internal path to the data in the segmentation hdf5 file. 89 gt_key: The internal path to the data in the ground-truth hdf5 file. 90 crop: Whether to crop the segmentation and ground-truth to the bounding box. 91 apply_cc: Whether to apply connected components before evaluation. 92 min_component_size: Minimum component size for filtering the segmentation before evaluation. 93 iterations: Post-processing iterations for expanding the AZ annotations. 94 criterion: The criterion for matching annotations and segmentations 95 threshold: Threshold applied to the segmentation. This is required if the segmentation is passed as 96 probability prediction instead of a binary segmentation. Possible values: 'iou', 'iop', 'iot'. 97 extra_cols: Additional columns for the result table. 98 99 Returns: 100 A data frame with the evaluation results per tomogram. 101 """ 102 assert len(seg_paths) == len(gt_paths) 103 104 results = {key: [] for key in extra_cols.keys()} 105 results.update({ 106 "tomo_name": [], 107 "tp": [], 108 "fp": [], 109 "fn": [], 110 "dice": [], 111 }) 112 113 i = 0 114 for seg_path, gt_path in tqdm(zip(seg_paths, gt_paths), total=len(seg_paths), desc="Run AZ Eval"): 115 with h5py.File(seg_path, "r") as f: 116 if seg_key not in f: 117 print("Segmentation", seg_key, "could not be found in", seg_path) 118 i += 1 119 continue 120 seg = f[seg_key][:].squeeze() 121 122 with h5py.File(gt_path, "r") as f: 123 gt = f[gt_key][:] 124 125 if threshold is not None: 126 seg = seg > threshold 127 128 if crop: 129 seg, gt = _crop(seg, gt) 130 131 result = _single_az_evaluation(seg, gt, apply_cc, min_component_size, iterations, criterion=criterion) 132 results["tomo_name"].append(os.path.basename(seg_path)) 133 for res in ("tp", "fp", "fn", "dice"): 134 results[res].append(result[res]) 135 for name, val in extra_cols.items(): 136 results[name].append(val[i]) 137 i += 1 138 139 return pd.DataFrame(results)
Evaluate active zone segmentations against ground-truth annotations.
This computes the dice score as well as false positives, false negatives and true positives for each segmented tomogram.
Arguments:
- seg_paths: The filepaths to the segmentations, stored as hd5 files.
- gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files.
- seg_key: The internal path to the data in the segmentation hdf5 file.
- gt_key: The internal path to the data in the ground-truth hdf5 file.
- crop: Whether to crop the segmentation and ground-truth to the bounding box.
- apply_cc: Whether to apply connected components before evaluation.
- min_component_size: Minimum component size for filtering the segmentation before evaluation.
- iterations: Post-processing iterations for expanding the AZ annotations.
- criterion: The criterion for matching annotations and segmentations
- threshold: Threshold applied to the segmentation. This is required if the segmentation is passed as
- probability prediction instead of a binary segmentation. Possible values: 'iou', 'iop', 'iot'.
- extra_cols: Additional columns for the result table.
Returns:
A data frame with the evaluation results per tomogram.
def
thin_az( az_segmentation: numpy.ndarray, boundary_map: Union[Buffer, numpy._typing._array_like._SupportsArray[numpy.dtype[Any]], numpy._typing._nested_sequence._NestedSequence[numpy._typing._array_like._SupportsArray[numpy.dtype[Any]]], bool, int, float, complex, str, bytes, numpy._typing._nested_sequence._NestedSequence[bool | int | float | complex | str | bytes]], vesicles: Union[Buffer, numpy._typing._array_like._SupportsArray[numpy.dtype[Any]], numpy._typing._nested_sequence._NestedSequence[numpy._typing._array_like._SupportsArray[numpy.dtype[Any]]], bool, int, float, complex, str, bytes, numpy._typing._nested_sequence._NestedSequence[bool | int | float | complex | str | bytes]], tomo: Union[Buffer, numpy._typing._array_like._SupportsArray[numpy.dtype[Any]], numpy._typing._nested_sequence._NestedSequence[numpy._typing._array_like._SupportsArray[numpy.dtype[Any]]], bool, int, float, complex, str, bytes, numpy._typing._nested_sequence._NestedSequence[bool | int | float | complex | str | bytes], NoneType] = None, presyn_dist: int = 6, min_thinning_size: int = 2500, post_closing: int = 2, check: bool = False) -> numpy.ndarray:
171def thin_az( 172 az_segmentation: np.ndarray, 173 boundary_map: np.typing.ArrayLike, 174 vesicles: np.typing.ArrayLike, 175 tomo: Optional[np.typing.ArrayLike] = None, 176 presyn_dist: int = 6, 177 min_thinning_size: int = 2500, 178 post_closing: int = 2, 179 check: bool = False, 180) -> np.ndarray: 181 """Thin the active zone annotations by restricting them to a certain distance from the presynaptic mask. 182 183 Args: 184 az_segmentation: The active zone annotations. 185 boundary_map: The boundary / membrane predictions. 186 vesicles: The vesicle segmentation. 187 tomo: The tomogram data. Optional, will only be used for evaluation. 188 presyn_dist: The maximal distance to the presynaptic compartment, which is used for thinning. 189 min_thinning_size: The minimal size for a label component. 190 post_closing: Closing iterations to apply to the AZ annotations after thinning. 191 check: Whether to visually check the results. 192 193 Returns: 194 The thinned AZ annotations. 195 """ 196 az_segmentation = label(az_segmentation) 197 thinned_az = np.zeros(az_segmentation.shape, dtype="uint8") 198 props = regionprops(az_segmentation) 199 200 min_bb_shape = (32, 384, 384) 201 202 for prop in props: 203 az_id = prop.label 204 205 bb = tuple(slice(start, stop) for start, stop in zip(prop.bbox[:3], prop.bbox[3:])) 206 pad_width = [max(sh - (b.stop - b.start), 0) // 2 for b, sh in zip(bb, min_bb_shape)] 207 bb = tuple( 208 slice(max(b.start - pw, 0), min(b.stop + pw, sh)) for b, pw, sh in zip(bb, pad_width, az_segmentation.shape) 209 ) 210 211 # If this is a small component then we discard it. This is likely some artifact in the ground-truth. 212 if prop.area < min_thinning_size: 213 continue 214 215 # First, get everything for this bounding box. 216 az_bb = (az_segmentation[bb] == az_id) 217 vesicles_bb = vesicles[bb] 218 # Skip if we don't have a vesicle. 219 if vesicles[bb].max() == 0: 220 continue 221 222 mask_bb = _get_presynaptic_mask(boundary_map[bb], vesicles_bb) 223 224 # Apply post-processing to filter out only the parts of the AZ close to the presynaptic mask. 225 distances = np.stack([distance_transform_edt(mask_bb[z] == 0) for z in range(mask_bb.shape[0])]) 226 az_bb[distances > presyn_dist] = 0 227 az_bb = np.logical_or(binary_closing(az_bb, iterations=post_closing), az_bb) 228 229 if check: 230 import napari 231 tomo_bb = tomo[bb] 232 233 v = napari.Viewer() 234 v.add_image(tomo_bb) 235 v.add_labels(az_bb.astype("uint8"), name="az-thinned") 236 v.add_labels(az_segmentation[bb], name="az", visible=False) 237 v.add_labels(mask_bb, visible=False) 238 v.title = f"{prop.label}: {prop.area}" 239 240 napari.run() 241 242 thinned_az[bb][az_bb] = 1 243 244 return thinned_az
Thin the active zone annotations by restricting them to a certain distance from the presynaptic mask.
Arguments:
- az_segmentation: The active zone annotations.
- boundary_map: The boundary / membrane predictions.
- vesicles: The vesicle segmentation.
- tomo: The tomogram data. Optional, will only be used for evaluation.
- presyn_dist: The maximal distance to the presynaptic compartment, which is used for thinning.
- min_thinning_size: The minimal size for a label component.
- post_closing: Closing iterations to apply to the AZ annotations after thinning.
- check: Whether to visually check the results.
Returns:
The thinned AZ annotations.