synapse_net.ground_truth.az_evaluation
1import os 2from typing import List, Optional 3 4import h5py 5import pandas as pd 6import numpy as np 7 8from elf.evaluation.matching import _compute_scores, _compute_tps 9from elf.evaluation import dice_score 10from elf.segmentation.workflows import simple_multicut_workflow 11from scipy.ndimage import binary_dilation, binary_closing, distance_transform_edt, binary_opening 12from skimage.measure import label, regionprops, regionprops_table 13from skimage.morphology import local_maxima 14from skimage.segmentation import relabel_sequential, watershed 15from tqdm import tqdm 16 17 18def _expand_seg(az, iterations): 19 return binary_closing(binary_dilation(az, iterations=iterations), iterations=iterations) 20 21 22def _crop(seg, gt, return_bb=False): 23 bb_seg, bb_gt = np.where(seg), np.where(gt) 24 25 # Handle empty segmentations. 26 if bb_seg[0].size == 0: 27 bb = tuple(slice(bgt.min(), bgt.max() + 1) for bseg, bgt in zip(bb_seg, bb_gt)) 28 else: 29 bb = tuple(slice( 30 min(bseg.min(), bgt.min()), max(bseg.max(), bgt.max()) + 1 31 ) for bseg, bgt in zip(bb_seg, bb_gt)) 32 33 if return_bb: 34 return seg[bb], gt[bb], bb 35 else: 36 return seg[bb], gt[bb] 37 38 39def _postprocess(data, apply_cc, min_component_size, iterations=0): 40 if iterations > 0: 41 data = _expand_seg(data, iterations) 42 if apply_cc: 43 data = label(data) 44 ids, sizes = np.unique(data, return_counts=True) 45 filter_ids = ids[sizes < min_component_size] 46 data[np.isin(data, filter_ids)] = 0 47 data, _, _ = relabel_sequential(data) 48 return data 49 50 51def _single_az_evaluation(seg, gt, apply_cc, min_component_size, iterations, criterion): 52 assert seg.shape == gt.shape, f"{seg.shape}, {gt.shape}" 53 dice = dice_score(seg > 0, gt > 0) 54 55 seg = _postprocess(seg, apply_cc, min_component_size, iterations=iterations) 56 gt = _postprocess(gt, apply_cc, min_component_size=500) 57 58 n_true, n_matched, n_pred, scores = _compute_scores(seg, gt, criterion=criterion, ignore_label=0) 59 tp = _compute_tps(scores, n_matched, threshold=0.5) 60 fp = n_pred - tp 61 fn = n_true - tp 62 63 return {"tp": tp, "fp": fp, "fn": fn, "dice": dice} 64 65 66def az_evaluation( 67 seg_paths: List[str], 68 gt_paths: List[str], 69 seg_key: str, 70 gt_key: str, 71 crop: bool = True, 72 apply_cc: bool = True, 73 min_component_size: int = 5000, 74 iterations: int = 3, 75 criterion: str = "iou", 76 threshold: Optional[float] = None, 77 **extra_cols 78) -> pd.DataFrame: 79 """Evaluate active zone segmentations against ground-truth annotations. 80 81 This computes the dice score as well as false positives, false negatives and true positives 82 for each segmented tomogram. 83 84 Args: 85 seg_paths: The filepaths to the segmentations, stored as hd5 files. 86 gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files. 87 seg_key: The internal path to the data in the segmentation hdf5 file. 88 gt_key: The internal path to the data in the ground-truth hdf5 file. 89 crop: Whether to crop the segmentation and ground-truth to the bounding box. 90 apply_cc: Whether to apply connected components before evaluation. 91 min_component_size: Minimum component size for filtering the segmentation before evaluation. 92 iterations: Post-processing iterations for expanding the AZ annotations. 93 criterion: The criterion for matching annotations and segmentations 94 threshold: Threshold applied to the segmentation. This is required if the segmentation is passed as 95 probability prediction instead of a binary segmentation. Possible values: 'iou', 'iop', 'iot'. 96 extra_cols: Additional columns for the result table. 97 98 Returns: 99 A data frame with the evaluation results per tomogram. 100 """ 101 assert len(seg_paths) == len(gt_paths) 102 103 results = {key: [] for key in extra_cols.keys()} 104 results.update({ 105 "tomo_name": [], 106 "tp": [], 107 "fp": [], 108 "fn": [], 109 "dice": [], 110 }) 111 112 i = 0 113 for seg_path, gt_path in tqdm(zip(seg_paths, gt_paths), total=len(seg_paths), desc="Run AZ Eval"): 114 with h5py.File(seg_path, "r") as f: 115 if seg_key not in f: 116 print("Segmentation", seg_key, "could not be found in", seg_path) 117 i += 1 118 continue 119 seg = f[seg_key][:].squeeze() 120 121 with h5py.File(gt_path, "r") as f: 122 gt = f[gt_key][:] 123 124 if threshold is not None: 125 seg = seg > threshold 126 127 if crop: 128 seg, gt = _crop(seg, gt) 129 130 result = _single_az_evaluation(seg, gt, apply_cc, min_component_size, iterations, criterion=criterion) 131 results["tomo_name"].append(os.path.basename(seg_path)) 132 for res in ("tp", "fp", "fn", "dice"): 133 results[res].append(result[res]) 134 for name, val in extra_cols.items(): 135 results[name].append(val[i]) 136 i += 1 137 138 return pd.DataFrame(results) 139 140 141def _get_presynaptic_mask(boundary_map, vesicles): 142 mask = np.zeros(vesicles.shape, dtype="bool") 143 144 def _compute_mask_2d(z): 145 distances = distance_transform_edt(boundary_map[z] < 0.25).astype("float32") 146 seeds = label(local_maxima(distances, allow_borders=True)) 147 overseg = watershed(boundary_map[z], markers=seeds) 148 seg = simple_multicut_workflow( 149 boundary_map[z], use_2dws=False, watershed=overseg, n_threads=1, beta=0.6 150 ) 151 152 def n_vesicles(mask, seg): 153 return len(np.unique(seg[mask])) - 1 154 155 props = pd.DataFrame(regionprops_table(seg, vesicles[z], properties=["label"], extra_properties=[n_vesicles])) 156 ids, n_ves = props.label.values, props.n_vesicles.values 157 presyn_id = ids[np.argmax(n_ves)] 158 159 mask[z] = seg == presyn_id 160 161 for z in range(mask.shape[0]): 162 _compute_mask_2d(z) 163 164 mask = binary_opening(mask, iterations=5) 165 166 return mask 167 168 169def thin_az( 170 az_segmentation: np.ndarray, 171 boundary_map: np.typing.ArrayLike, 172 vesicles: np.typing.ArrayLike, 173 tomo: Optional[np.typing.ArrayLike] = None, 174 presyn_dist: int = 6, 175 min_thinning_size: int = 2500, 176 post_closing: int = 2, 177 check: bool = False, 178) -> np.ndarray: 179 """Thin the active zone annotations by restricting them to a certain distance from the presynaptic mask. 180 181 Args: 182 az_segmentation: The active zone annotations. 183 boundary_map: The boundary / membrane predictions. 184 vesicles: The vesicle segmentation. 185 tomo: The tomogram data. Optional, will only be used for evaluation. 186 presyn_dist: The maximal distance to the presynaptic compartment, which is used for thinning. 187 min_thinning_size: The minimal size for a label component. 188 post_closing: Closing iterations to apply to the AZ annotations after thinning. 189 check: Whether to visually check the results. 190 191 Returns: 192 The thinned AZ annotations. 193 """ 194 az_segmentation = label(az_segmentation) 195 thinned_az = np.zeros(az_segmentation.shape, dtype="uint8") 196 props = regionprops(az_segmentation) 197 198 min_bb_shape = (32, 384, 384) 199 200 for prop in props: 201 az_id = prop.label 202 203 bb = tuple(slice(start, stop) for start, stop in zip(prop.bbox[:3], prop.bbox[3:])) 204 pad_width = [max(sh - (b.stop - b.start), 0) // 2 for b, sh in zip(bb, min_bb_shape)] 205 bb = tuple( 206 slice(max(b.start - pw, 0), min(b.stop + pw, sh)) for b, pw, sh in zip(bb, pad_width, az_segmentation.shape) 207 ) 208 209 # If this is a small component then we discard it. This is likely some artifact in the ground-truth. 210 if prop.area < min_thinning_size: 211 continue 212 213 # First, get everything for this bounding box. 214 az_bb = (az_segmentation[bb] == az_id) 215 vesicles_bb = vesicles[bb] 216 # Skip if we don't have a vesicle. 217 if vesicles[bb].max() == 0: 218 continue 219 220 mask_bb = _get_presynaptic_mask(boundary_map[bb], vesicles_bb) 221 222 # Apply post-processing to filter out only the parts of the AZ close to the presynaptic mask. 223 distances = np.stack([distance_transform_edt(mask_bb[z] == 0) for z in range(mask_bb.shape[0])]) 224 az_bb[distances > presyn_dist] = 0 225 az_bb = np.logical_or(binary_closing(az_bb, iterations=post_closing), az_bb) 226 227 if check: 228 import napari 229 tomo_bb = tomo[bb] 230 231 v = napari.Viewer() 232 v.add_image(tomo_bb) 233 v.add_labels(az_bb.astype("uint8"), name="az-thinned") 234 v.add_labels(az_segmentation[bb], name="az", visible=False) 235 v.add_labels(mask_bb, visible=False) 236 v.title = f"{prop.label}: {prop.area}" 237 238 napari.run() 239 240 thinned_az[bb][az_bb] = 1 241 242 return thinned_az
def
az_evaluation( seg_paths: List[str], gt_paths: List[str], seg_key: str, gt_key: str, crop: bool = True, apply_cc: bool = True, min_component_size: int = 5000, iterations: int = 3, criterion: str = 'iou', threshold: Optional[float] = None, **extra_cols) -> pandas.DataFrame:
67def az_evaluation( 68 seg_paths: List[str], 69 gt_paths: List[str], 70 seg_key: str, 71 gt_key: str, 72 crop: bool = True, 73 apply_cc: bool = True, 74 min_component_size: int = 5000, 75 iterations: int = 3, 76 criterion: str = "iou", 77 threshold: Optional[float] = None, 78 **extra_cols 79) -> pd.DataFrame: 80 """Evaluate active zone segmentations against ground-truth annotations. 81 82 This computes the dice score as well as false positives, false negatives and true positives 83 for each segmented tomogram. 84 85 Args: 86 seg_paths: The filepaths to the segmentations, stored as hd5 files. 87 gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files. 88 seg_key: The internal path to the data in the segmentation hdf5 file. 89 gt_key: The internal path to the data in the ground-truth hdf5 file. 90 crop: Whether to crop the segmentation and ground-truth to the bounding box. 91 apply_cc: Whether to apply connected components before evaluation. 92 min_component_size: Minimum component size for filtering the segmentation before evaluation. 93 iterations: Post-processing iterations for expanding the AZ annotations. 94 criterion: The criterion for matching annotations and segmentations 95 threshold: Threshold applied to the segmentation. This is required if the segmentation is passed as 96 probability prediction instead of a binary segmentation. Possible values: 'iou', 'iop', 'iot'. 97 extra_cols: Additional columns for the result table. 98 99 Returns: 100 A data frame with the evaluation results per tomogram. 101 """ 102 assert len(seg_paths) == len(gt_paths) 103 104 results = {key: [] for key in extra_cols.keys()} 105 results.update({ 106 "tomo_name": [], 107 "tp": [], 108 "fp": [], 109 "fn": [], 110 "dice": [], 111 }) 112 113 i = 0 114 for seg_path, gt_path in tqdm(zip(seg_paths, gt_paths), total=len(seg_paths), desc="Run AZ Eval"): 115 with h5py.File(seg_path, "r") as f: 116 if seg_key not in f: 117 print("Segmentation", seg_key, "could not be found in", seg_path) 118 i += 1 119 continue 120 seg = f[seg_key][:].squeeze() 121 122 with h5py.File(gt_path, "r") as f: 123 gt = f[gt_key][:] 124 125 if threshold is not None: 126 seg = seg > threshold 127 128 if crop: 129 seg, gt = _crop(seg, gt) 130 131 result = _single_az_evaluation(seg, gt, apply_cc, min_component_size, iterations, criterion=criterion) 132 results["tomo_name"].append(os.path.basename(seg_path)) 133 for res in ("tp", "fp", "fn", "dice"): 134 results[res].append(result[res]) 135 for name, val in extra_cols.items(): 136 results[name].append(val[i]) 137 i += 1 138 139 return pd.DataFrame(results)
Evaluate active zone segmentations against ground-truth annotations.
This computes the dice score as well as false positives, false negatives and true positives for each segmented tomogram.
Arguments:
- seg_paths: The filepaths to the segmentations, stored as hd5 files.
- gt_paths: The filepaths to the ground-truth annotatons, stored as hdf5 files.
- seg_key: The internal path to the data in the segmentation hdf5 file.
- gt_key: The internal path to the data in the ground-truth hdf5 file.
- crop: Whether to crop the segmentation and ground-truth to the bounding box.
- apply_cc: Whether to apply connected components before evaluation.
- min_component_size: Minimum component size for filtering the segmentation before evaluation.
- iterations: Post-processing iterations for expanding the AZ annotations.
- criterion: The criterion for matching annotations and segmentations
- threshold: Threshold applied to the segmentation. This is required if the segmentation is passed as
- probability prediction instead of a binary segmentation. Possible values: 'iou', 'iop', 'iot'.
- extra_cols: Additional columns for the result table.
Returns:
A data frame with the evaluation results per tomogram.
def
thin_az( az_segmentation: numpy.ndarray, boundary_map: Union[numpy._typing._array_like._Buffer, numpy._typing._array_like._SupportsArray[numpy.dtype[Any]], numpy._typing._nested_sequence._NestedSequence[numpy._typing._array_like._SupportsArray[numpy.dtype[Any]]], complex, bytes, str, numpy._typing._nested_sequence._NestedSequence[complex | bytes | str]], vesicles: Union[numpy._typing._array_like._Buffer, numpy._typing._array_like._SupportsArray[numpy.dtype[Any]], numpy._typing._nested_sequence._NestedSequence[numpy._typing._array_like._SupportsArray[numpy.dtype[Any]]], complex, bytes, str, numpy._typing._nested_sequence._NestedSequence[complex | bytes | str]], tomo: Union[numpy._typing._array_like._Buffer, numpy._typing._array_like._SupportsArray[numpy.dtype[Any]], numpy._typing._nested_sequence._NestedSequence[numpy._typing._array_like._SupportsArray[numpy.dtype[Any]]], complex, bytes, str, numpy._typing._nested_sequence._NestedSequence[complex | bytes | str], NoneType] = None, presyn_dist: int = 6, min_thinning_size: int = 2500, post_closing: int = 2, check: bool = False) -> numpy.ndarray:
170def thin_az( 171 az_segmentation: np.ndarray, 172 boundary_map: np.typing.ArrayLike, 173 vesicles: np.typing.ArrayLike, 174 tomo: Optional[np.typing.ArrayLike] = None, 175 presyn_dist: int = 6, 176 min_thinning_size: int = 2500, 177 post_closing: int = 2, 178 check: bool = False, 179) -> np.ndarray: 180 """Thin the active zone annotations by restricting them to a certain distance from the presynaptic mask. 181 182 Args: 183 az_segmentation: The active zone annotations. 184 boundary_map: The boundary / membrane predictions. 185 vesicles: The vesicle segmentation. 186 tomo: The tomogram data. Optional, will only be used for evaluation. 187 presyn_dist: The maximal distance to the presynaptic compartment, which is used for thinning. 188 min_thinning_size: The minimal size for a label component. 189 post_closing: Closing iterations to apply to the AZ annotations after thinning. 190 check: Whether to visually check the results. 191 192 Returns: 193 The thinned AZ annotations. 194 """ 195 az_segmentation = label(az_segmentation) 196 thinned_az = np.zeros(az_segmentation.shape, dtype="uint8") 197 props = regionprops(az_segmentation) 198 199 min_bb_shape = (32, 384, 384) 200 201 for prop in props: 202 az_id = prop.label 203 204 bb = tuple(slice(start, stop) for start, stop in zip(prop.bbox[:3], prop.bbox[3:])) 205 pad_width = [max(sh - (b.stop - b.start), 0) // 2 for b, sh in zip(bb, min_bb_shape)] 206 bb = tuple( 207 slice(max(b.start - pw, 0), min(b.stop + pw, sh)) for b, pw, sh in zip(bb, pad_width, az_segmentation.shape) 208 ) 209 210 # If this is a small component then we discard it. This is likely some artifact in the ground-truth. 211 if prop.area < min_thinning_size: 212 continue 213 214 # First, get everything for this bounding box. 215 az_bb = (az_segmentation[bb] == az_id) 216 vesicles_bb = vesicles[bb] 217 # Skip if we don't have a vesicle. 218 if vesicles[bb].max() == 0: 219 continue 220 221 mask_bb = _get_presynaptic_mask(boundary_map[bb], vesicles_bb) 222 223 # Apply post-processing to filter out only the parts of the AZ close to the presynaptic mask. 224 distances = np.stack([distance_transform_edt(mask_bb[z] == 0) for z in range(mask_bb.shape[0])]) 225 az_bb[distances > presyn_dist] = 0 226 az_bb = np.logical_or(binary_closing(az_bb, iterations=post_closing), az_bb) 227 228 if check: 229 import napari 230 tomo_bb = tomo[bb] 231 232 v = napari.Viewer() 233 v.add_image(tomo_bb) 234 v.add_labels(az_bb.astype("uint8"), name="az-thinned") 235 v.add_labels(az_segmentation[bb], name="az", visible=False) 236 v.add_labels(mask_bb, visible=False) 237 v.title = f"{prop.label}: {prop.area}" 238 239 napari.run() 240 241 thinned_az[bb][az_bb] = 1 242 243 return thinned_az
Thin the active zone annotations by restricting them to a certain distance from the presynaptic mask.
Arguments:
- az_segmentation: The active zone annotations.
- boundary_map: The boundary / membrane predictions.
- vesicles: The vesicle segmentation.
- tomo: The tomogram data. Optional, will only be used for evaluation.
- presyn_dist: The maximal distance to the presynaptic compartment, which is used for thinning.
- min_thinning_size: The minimal size for a label component.
- post_closing: Closing iterations to apply to the AZ annotations after thinning.
- check: Whether to visually check the results.
Returns:
The thinned AZ annotations.