synapse_net.ground_truth.vesicles
1import json 2import os 3import warnings 4from pathlib import Path 5from typing import Optional, Tuple 6 7import mrcfile 8import numpy as np 9from elf.io import open_file 10from skimage.measure import label 11 12from ..imod import export_point_annotations, export_segmentation, get_label_names 13 14 15def _check_volume(raw, vesicles, labels, title=None, **extra_segmentations): 16 import napari 17 from nifty.tools import takeDict 18 19 if labels is None: 20 label_vol = None 21 else: 22 labels[0] = 0 23 label_vol = takeDict(labels, vesicles) 24 25 v = napari.Viewer() 26 if raw is not None: 27 v.add_image(raw) 28 if vesicles is not None: 29 v.add_labels(vesicles) 30 if labels is not None: 31 v.add_labels(label_vol) 32 for name, seg in extra_segmentations.items(): 33 v.add_labels(seg, name=name) 34 if title is not None: 35 v.title = title 36 napari.run() 37 38 39def _export_segmentations(imod_path, data_path, object_ids): 40 extra_seg = None 41 for object_id in object_ids: 42 seg = export_segmentation(imod_path, data_path, object_id=object_id, require_object=True) 43 seg = label(seg) 44 if extra_seg is None: 45 extra_seg = seg 46 else: 47 label_offset = int(extra_seg.max()) 48 mask = seg != 0 49 extra_seg[mask] = seg[mask] + label_offset 50 return extra_seg 51 52 53def write_vesicle_training_volume( 54 data_path: str, 55 imod_path: str, 56 output_path: str, 57 original_path: Optional[str] = None, 58 exclude_labels: Optional[Tuple[int]] = None, 59 exclude_label_patterns: Optional[Tuple[str]] = None, 60 contour_label_patterns: Optional[Tuple[str]] = None, 61 visualize: bool = False, 62 resolution: Optional[Tuple[int, int, int]] = None, 63): 64 """Extract vesicle annotations from IMOD and write them to an hdf5 file. 65 66 By default this will export all point annotations from an imod file. 67 The arguments `exclude_labels` and `exclude_label_patterns` can be used 68 to exclude certain point annotations from the export. 69 The argument `contour_label_patterns` can be used to also export selected 70 contour annotations from the imod file. 71 72 Args: 73 data_path: The path to the mrc file. 74 imod_path: The path to the mod file with vesicle annotations. 75 output_path: The path to the hdf5 file to save the extracted annotations. 76 original_path: The orignal path name. This parameter is optional, and the path name 77 will be saved as an attribute in the output hdf5 file, in order to map back 78 extracted to original input data. 79 exclude_labels: An optional list of object ids in the mod file that should be excluded 80 from the export. 81 exclude_label_patterns: An optional list of object names in the mode file that 82 should be excluded from the export. 83 contour_label_patterns: An optonal list of object names for contour annotations 84 (= more complex object annotations) that should also be exported as vesicles 85 from the imod file. This can be used in case some vesicles are annotated as 86 objects with contours instead of just being point annotations. 87 visualize: Whether to visualize the exported data with napari instead of saving it. 88 For debugging purposes. 89 resolution: The voxel size of the data in nanometers. It will be used to scale the 90 radius of the point annotations exported from imod. By default the resolution 91 will be read from the mrc header, but can be over-ridden by passing this value 92 in case of wrong resolution information in the header. 93 """ 94 if resolution is None: 95 with mrcfile.open(data_path, "r") as f: 96 resolution = f.voxel_size.tolist() 97 resolution = tuple(np.round(res / 10, 3) for res in resolution) 98 assert len(resolution) == 3 99 100 with open_file(data_path, "r") as f: 101 vol = f["data"][:] 102 103 vesicle_seg, labels, label_names, coords, radii = export_point_annotations( 104 imod_path, vol.shape, exclude_labels=exclude_labels, exclude_label_patterns=exclude_label_patterns, 105 resolution=resolution[0], return_coords_and_radii=True 106 ) 107 108 if contour_label_patterns is not None: 109 all_label_names, label_types = get_label_names(imod_path, return_types=True) 110 mesh_object_ids = { 111 obj_id: name for obj_id, name in all_label_names.items() 112 if label_types[obj_id] == "closed contours" and any(pattern in name for pattern in contour_label_patterns) 113 } 114 115 # TODO double check this 116 extra_seg = _export_segmentations(imod_path, data_path, mesh_object_ids) 117 # extra_seg = imod_meshes_to_segmentations(imod_path, vol.shape, mesh_object_ids) 118 seg_id_offset = vesicle_seg.max() + 1 119 label_id_offset = max(list(labels.values())) + 1 120 121 for i, (name, seg) in enumerate(extra_seg.items()): 122 seg_id = seg_id_offset + i 123 seg_mask = seg_id == 1 124 if seg_mask.all(): 125 warnings.warn(f"All foreground mesh for {imod_path}: {name} is skipped.") 126 continue 127 vesicle_seg[seg_mask] = seg_id 128 label_id = [i for i, pattern in enumerate(contour_label_patterns) if pattern in name] 129 assert len(label_id) == 1 130 labels[int(seg_id)] = int(label_id[0] + label_id_offset) 131 132 print("Extracted the following labels:", label_names) 133 print("With counts:", {k: v for k, v in zip(*np.unique(list(labels.values()), return_counts=True))}) 134 if visualize: 135 _check_volume(vol, vesicle_seg, labels) 136 137 with open_file(output_path, "a") as f: 138 f.create_dataset("raw", data=vol, compression="gzip") 139 140 ds = f.create_dataset("labels/vesicles", data=vesicle_seg, compression="gzip") 141 ds.attrs["labels"] = json.dumps(labels) 142 ds.attrs["label_names"] = json.dumps(label_names) 143 144 f.create_dataset("labels/imod/vesicles/coordinates", data=coords) 145 f.create_dataset("labels/imod/vesicles/radii", data=radii) 146 147 if original_path is not None: 148 f.attrs["filename"] = original_path 149 150 151def extract_vesicle_training_data( 152 data_folder: str, 153 gt_folder: str, 154 output_folder: str, 155 to_label_path: Optional[callable] = None, 156 skip_no_labels: bool = False, 157 exclude: Optional[Tuple[str]] = None, 158 exclude_labels: Optional[Tuple[int]] = None, 159 exclude_label_patterns: Optional[Tuple[str]] = None, 160 contour_label_patterns: Optional[Tuple[str]] = None, 161 visualize: bool = False, 162 resolution: Optional[Tuple[int, int, int]] = None, 163): 164 """Extract all vesicle annotations from a folder hierarchy stored in mrc and imod files 165 and write them to an hdf5 file. 166 167 This function calls `write_vesicle_training_volume` for each mrc/mod file pair it encounters. 168 The output files will be stored with a simple naming pattern 'tomogram00i.h5'. 169 The original filename for each exported file is stored in the attribute 'filename' at 170 the root level of the hdf5. 171 172 Args: 173 data_folder: The root folder containing the mrc files. 174 imod_path: The root folder containing the mod files. can be the same as `data_folder`. 175 output_folder: The output folder where the hdf5 files with exported raw data and 176 vesicle segmentations will be saved. 177 to_label_path: A function for converting the mrc filename to the name of the 178 corresponding .mod file. If not given the file extension .mrc will be replaced 179 with .mod. 180 skip_no_labels: Whether to skip extracting mrc files for which a matching .mod file 181 could not be found. If true will raise a warning for these cases, 182 otherwise will throw an error. 183 exclude: An optional list of filenames to be excluded from the export. 184 exclude_labels: An optional list of object ids in the mod file that should be excluded 185 from the export. 186 exclude_label_patterns: An optional list of object names in the mode file that 187 should be excluded from the export. 188 contour_label_patterns: An optonal list of object names for contour annotations 189 (= more complex object annotations) that should also be exported as vesicles 190 from the imod file. This can be used in case some vesicles are annotated as 191 objects with contours instead of just being point annotations. 192 visualize: Whether to visualize the exported data with napari instead of saving it. 193 For debugging purposes. 194 resolution: The voxel size of the data in nanometers. It will be used to scale the 195 radius of the point annotations exported from imod. By default the resolution 196 will be read from the mrc header, but can be over-ridden by passing this value 197 in case of wrong resolution information in the header. 198 """ 199 os.makedirs(output_folder, exist_ok=True) 200 201 train_id = 0 202 for root, dirs, files in os.walk(data_folder): 203 dirs.sort() 204 files.sort() 205 206 # check if we exclude this directory 207 if exclude is not None and root in exclude: 208 print("Skipping", root) 209 continue 210 211 for fname in files: 212 # check if we exclude this file 213 #TODO distinguish between directory and file to skip 214 if exclude is not None and fname in exclude: 215 print("Skipping", fname) 216 continue 217 218 if Path(fname).suffix not in (".mrc", ".rec"): 219 continue 220 221 output_path = os.path.join(output_folder, f"tomogram-{train_id:03}.h5") 222 if os.path.exists(output_path): 223 train_id += 1 224 continue 225 226 file_path = os.path.join(root, fname) 227 relative_path = os.path.relpath(file_path, data_folder) 228 229 if to_label_path is None: 230 imod_path = os.path.join(gt_folder, relative_path.replace(Path(relative_path).suffix, ".imod")) 231 else: 232 imod_path = to_label_path(gt_folder, relative_path) 233 234 if not os.path.exists(imod_path): 235 if skip_no_labels: 236 print("Skipping", file_path, "because no matching labels were found at", imod_path) 237 train_id += 1 238 continue 239 else: 240 raise RuntimeError(f"Can't find labels for {file_path} at {imod_path}.") 241 242 print("Processing", file_path, "with target", output_path) 243 write_vesicle_training_volume( 244 file_path, imod_path, output_path, relative_path, 245 exclude_labels=exclude_labels, 246 exclude_label_patterns=exclude_label_patterns, 247 contour_label_patterns=contour_label_patterns, 248 visualize=visualize, 249 resolution=resolution, 250 ) 251 train_id += 1
54def write_vesicle_training_volume( 55 data_path: str, 56 imod_path: str, 57 output_path: str, 58 original_path: Optional[str] = None, 59 exclude_labels: Optional[Tuple[int]] = None, 60 exclude_label_patterns: Optional[Tuple[str]] = None, 61 contour_label_patterns: Optional[Tuple[str]] = None, 62 visualize: bool = False, 63 resolution: Optional[Tuple[int, int, int]] = None, 64): 65 """Extract vesicle annotations from IMOD and write them to an hdf5 file. 66 67 By default this will export all point annotations from an imod file. 68 The arguments `exclude_labels` and `exclude_label_patterns` can be used 69 to exclude certain point annotations from the export. 70 The argument `contour_label_patterns` can be used to also export selected 71 contour annotations from the imod file. 72 73 Args: 74 data_path: The path to the mrc file. 75 imod_path: The path to the mod file with vesicle annotations. 76 output_path: The path to the hdf5 file to save the extracted annotations. 77 original_path: The orignal path name. This parameter is optional, and the path name 78 will be saved as an attribute in the output hdf5 file, in order to map back 79 extracted to original input data. 80 exclude_labels: An optional list of object ids in the mod file that should be excluded 81 from the export. 82 exclude_label_patterns: An optional list of object names in the mode file that 83 should be excluded from the export. 84 contour_label_patterns: An optonal list of object names for contour annotations 85 (= more complex object annotations) that should also be exported as vesicles 86 from the imod file. This can be used in case some vesicles are annotated as 87 objects with contours instead of just being point annotations. 88 visualize: Whether to visualize the exported data with napari instead of saving it. 89 For debugging purposes. 90 resolution: The voxel size of the data in nanometers. It will be used to scale the 91 radius of the point annotations exported from imod. By default the resolution 92 will be read from the mrc header, but can be over-ridden by passing this value 93 in case of wrong resolution information in the header. 94 """ 95 if resolution is None: 96 with mrcfile.open(data_path, "r") as f: 97 resolution = f.voxel_size.tolist() 98 resolution = tuple(np.round(res / 10, 3) for res in resolution) 99 assert len(resolution) == 3 100 101 with open_file(data_path, "r") as f: 102 vol = f["data"][:] 103 104 vesicle_seg, labels, label_names, coords, radii = export_point_annotations( 105 imod_path, vol.shape, exclude_labels=exclude_labels, exclude_label_patterns=exclude_label_patterns, 106 resolution=resolution[0], return_coords_and_radii=True 107 ) 108 109 if contour_label_patterns is not None: 110 all_label_names, label_types = get_label_names(imod_path, return_types=True) 111 mesh_object_ids = { 112 obj_id: name for obj_id, name in all_label_names.items() 113 if label_types[obj_id] == "closed contours" and any(pattern in name for pattern in contour_label_patterns) 114 } 115 116 # TODO double check this 117 extra_seg = _export_segmentations(imod_path, data_path, mesh_object_ids) 118 # extra_seg = imod_meshes_to_segmentations(imod_path, vol.shape, mesh_object_ids) 119 seg_id_offset = vesicle_seg.max() + 1 120 label_id_offset = max(list(labels.values())) + 1 121 122 for i, (name, seg) in enumerate(extra_seg.items()): 123 seg_id = seg_id_offset + i 124 seg_mask = seg_id == 1 125 if seg_mask.all(): 126 warnings.warn(f"All foreground mesh for {imod_path}: {name} is skipped.") 127 continue 128 vesicle_seg[seg_mask] = seg_id 129 label_id = [i for i, pattern in enumerate(contour_label_patterns) if pattern in name] 130 assert len(label_id) == 1 131 labels[int(seg_id)] = int(label_id[0] + label_id_offset) 132 133 print("Extracted the following labels:", label_names) 134 print("With counts:", {k: v for k, v in zip(*np.unique(list(labels.values()), return_counts=True))}) 135 if visualize: 136 _check_volume(vol, vesicle_seg, labels) 137 138 with open_file(output_path, "a") as f: 139 f.create_dataset("raw", data=vol, compression="gzip") 140 141 ds = f.create_dataset("labels/vesicles", data=vesicle_seg, compression="gzip") 142 ds.attrs["labels"] = json.dumps(labels) 143 ds.attrs["label_names"] = json.dumps(label_names) 144 145 f.create_dataset("labels/imod/vesicles/coordinates", data=coords) 146 f.create_dataset("labels/imod/vesicles/radii", data=radii) 147 148 if original_path is not None: 149 f.attrs["filename"] = original_path
Extract vesicle annotations from IMOD and write them to an hdf5 file.
By default this will export all point annotations from an imod file.
The arguments exclude_labels
and exclude_label_patterns
can be used
to exclude certain point annotations from the export.
The argument contour_label_patterns
can be used to also export selected
contour annotations from the imod file.
Arguments:
- data_path: The path to the mrc file.
- imod_path: The path to the mod file with vesicle annotations.
- output_path: The path to the hdf5 file to save the extracted annotations.
- original_path: The orignal path name. This parameter is optional, and the path name will be saved as an attribute in the output hdf5 file, in order to map back extracted to original input data.
- exclude_labels: An optional list of object ids in the mod file that should be excluded from the export.
- exclude_label_patterns: An optional list of object names in the mode file that should be excluded from the export.
- contour_label_patterns: An optonal list of object names for contour annotations (= more complex object annotations) that should also be exported as vesicles from the imod file. This can be used in case some vesicles are annotated as objects with contours instead of just being point annotations.
- visualize: Whether to visualize the exported data with napari instead of saving it. For debugging purposes.
- resolution: The voxel size of the data in nanometers. It will be used to scale the radius of the point annotations exported from imod. By default the resolution will be read from the mrc header, but can be over-ridden by passing this value in case of wrong resolution information in the header.
152def extract_vesicle_training_data( 153 data_folder: str, 154 gt_folder: str, 155 output_folder: str, 156 to_label_path: Optional[callable] = None, 157 skip_no_labels: bool = False, 158 exclude: Optional[Tuple[str]] = None, 159 exclude_labels: Optional[Tuple[int]] = None, 160 exclude_label_patterns: Optional[Tuple[str]] = None, 161 contour_label_patterns: Optional[Tuple[str]] = None, 162 visualize: bool = False, 163 resolution: Optional[Tuple[int, int, int]] = None, 164): 165 """Extract all vesicle annotations from a folder hierarchy stored in mrc and imod files 166 and write them to an hdf5 file. 167 168 This function calls `write_vesicle_training_volume` for each mrc/mod file pair it encounters. 169 The output files will be stored with a simple naming pattern 'tomogram00i.h5'. 170 The original filename for each exported file is stored in the attribute 'filename' at 171 the root level of the hdf5. 172 173 Args: 174 data_folder: The root folder containing the mrc files. 175 imod_path: The root folder containing the mod files. can be the same as `data_folder`. 176 output_folder: The output folder where the hdf5 files with exported raw data and 177 vesicle segmentations will be saved. 178 to_label_path: A function for converting the mrc filename to the name of the 179 corresponding .mod file. If not given the file extension .mrc will be replaced 180 with .mod. 181 skip_no_labels: Whether to skip extracting mrc files for which a matching .mod file 182 could not be found. If true will raise a warning for these cases, 183 otherwise will throw an error. 184 exclude: An optional list of filenames to be excluded from the export. 185 exclude_labels: An optional list of object ids in the mod file that should be excluded 186 from the export. 187 exclude_label_patterns: An optional list of object names in the mode file that 188 should be excluded from the export. 189 contour_label_patterns: An optonal list of object names for contour annotations 190 (= more complex object annotations) that should also be exported as vesicles 191 from the imod file. This can be used in case some vesicles are annotated as 192 objects with contours instead of just being point annotations. 193 visualize: Whether to visualize the exported data with napari instead of saving it. 194 For debugging purposes. 195 resolution: The voxel size of the data in nanometers. It will be used to scale the 196 radius of the point annotations exported from imod. By default the resolution 197 will be read from the mrc header, but can be over-ridden by passing this value 198 in case of wrong resolution information in the header. 199 """ 200 os.makedirs(output_folder, exist_ok=True) 201 202 train_id = 0 203 for root, dirs, files in os.walk(data_folder): 204 dirs.sort() 205 files.sort() 206 207 # check if we exclude this directory 208 if exclude is not None and root in exclude: 209 print("Skipping", root) 210 continue 211 212 for fname in files: 213 # check if we exclude this file 214 #TODO distinguish between directory and file to skip 215 if exclude is not None and fname in exclude: 216 print("Skipping", fname) 217 continue 218 219 if Path(fname).suffix not in (".mrc", ".rec"): 220 continue 221 222 output_path = os.path.join(output_folder, f"tomogram-{train_id:03}.h5") 223 if os.path.exists(output_path): 224 train_id += 1 225 continue 226 227 file_path = os.path.join(root, fname) 228 relative_path = os.path.relpath(file_path, data_folder) 229 230 if to_label_path is None: 231 imod_path = os.path.join(gt_folder, relative_path.replace(Path(relative_path).suffix, ".imod")) 232 else: 233 imod_path = to_label_path(gt_folder, relative_path) 234 235 if not os.path.exists(imod_path): 236 if skip_no_labels: 237 print("Skipping", file_path, "because no matching labels were found at", imod_path) 238 train_id += 1 239 continue 240 else: 241 raise RuntimeError(f"Can't find labels for {file_path} at {imod_path}.") 242 243 print("Processing", file_path, "with target", output_path) 244 write_vesicle_training_volume( 245 file_path, imod_path, output_path, relative_path, 246 exclude_labels=exclude_labels, 247 exclude_label_patterns=exclude_label_patterns, 248 contour_label_patterns=contour_label_patterns, 249 visualize=visualize, 250 resolution=resolution, 251 ) 252 train_id += 1
Extract all vesicle annotations from a folder hierarchy stored in mrc and imod files and write them to an hdf5 file.
This function calls write_vesicle_training_volume
for each mrc/mod file pair it encounters.
The output files will be stored with a simple naming pattern 'tomogram00i.h5'.
The original filename for each exported file is stored in the attribute 'filename' at
the root level of the hdf5.
Arguments:
- data_folder: The root folder containing the mrc files.
- imod_path: The root folder containing the mod files. can be the same as
data_folder
. - output_folder: The output folder where the hdf5 files with exported raw data and vesicle segmentations will be saved.
- to_label_path: A function for converting the mrc filename to the name of the corresponding .mod file. If not given the file extension .mrc will be replaced with .mod.
- skip_no_labels: Whether to skip extracting mrc files for which a matching .mod file could not be found. If true will raise a warning for these cases, otherwise will throw an error.
- exclude: An optional list of filenames to be excluded from the export.
- exclude_labels: An optional list of object ids in the mod file that should be excluded from the export.
- exclude_label_patterns: An optional list of object names in the mode file that should be excluded from the export.
- contour_label_patterns: An optonal list of object names for contour annotations (= more complex object annotations) that should also be exported as vesicles from the imod file. This can be used in case some vesicles are annotated as objects with contours instead of just being point annotations.
- visualize: Whether to visualize the exported data with napari instead of saving it. For debugging purposes.
- resolution: The voxel size of the data in nanometers. It will be used to scale the radius of the point annotations exported from imod. By default the resolution will be read from the mrc header, but can be over-ridden by passing this value in case of wrong resolution information in the header.