synapse_net.ground_truth.vesicles

  1import json
  2import os
  3import warnings
  4from pathlib import Path
  5from typing import Optional, Tuple
  6
  7import mrcfile
  8import numpy as np
  9from elf.io import open_file
 10from skimage.measure import label
 11
 12from ..imod import export_point_annotations, export_segmentation, get_label_names
 13
 14
 15def _check_volume(raw, vesicles, labels, title=None, **extra_segmentations):
 16    import napari
 17    from nifty.tools import takeDict
 18
 19    if labels is None:
 20        label_vol = None
 21    else:
 22        labels[0] = 0
 23        label_vol = takeDict(labels, vesicles)
 24
 25    v = napari.Viewer()
 26    if raw is not None:
 27        v.add_image(raw)
 28    if vesicles is not None:
 29        v.add_labels(vesicles)
 30    if labels is not None:
 31        v.add_labels(label_vol)
 32    for name, seg in extra_segmentations.items():
 33        v.add_labels(seg, name=name)
 34    if title is not None:
 35        v.title = title
 36    napari.run()
 37
 38
 39def _export_segmentations(imod_path, data_path, object_ids):
 40    extra_seg = None
 41    for object_id in object_ids:
 42        seg = export_segmentation(imod_path, data_path, object_id=object_id, require_object=True)
 43        seg = label(seg)
 44        if extra_seg is None:
 45            extra_seg = seg
 46        else:
 47            label_offset = int(extra_seg.max())
 48            mask = seg != 0
 49            extra_seg[mask] = seg[mask] + label_offset
 50    return extra_seg
 51
 52
 53def write_vesicle_training_volume(
 54    data_path: str,
 55    imod_path: str,
 56    output_path: str,
 57    original_path: Optional[str] = None,
 58    exclude_labels: Optional[Tuple[int]] = None,
 59    exclude_label_patterns: Optional[Tuple[str]] = None,
 60    contour_label_patterns: Optional[Tuple[str]] = None,
 61    visualize: bool = False,
 62    resolution: Optional[Tuple[int, int, int]] = None,
 63):
 64    """Extract vesicle annotations from IMOD and write them to an hdf5 file.
 65
 66    By default this will export all point annotations from an imod file.
 67    The arguments `exclude_labels` and `exclude_label_patterns` can be used
 68    to exclude certain point annotations from the export.
 69    The argument `contour_label_patterns` can be used to also export selected
 70    contour annotations from the imod file.
 71
 72    Args:
 73        data_path: The path to the mrc file.
 74        imod_path: The path to the mod file with vesicle annotations.
 75        output_path: The path to the hdf5 file to save the extracted annotations.
 76        original_path: The orignal path name. This parameter is optional, and the path name
 77            will be saved as an attribute in the output hdf5 file, in order to map back
 78            extracted to original input data.
 79        exclude_labels: An optional list of object ids in the mod file that should be excluded
 80            from the export.
 81        exclude_label_patterns: An optional list of object names in the mode file that
 82            should be excluded from the export.
 83        contour_label_patterns: An optonal list of object names for contour annotations
 84            (= more complex object annotations) that should also be exported as vesicles
 85            from the imod file. This can be used in case some vesicles are annotated as
 86            objects with contours instead of just being point annotations.
 87        visualize: Whether to visualize the exported data with napari instead of saving it.
 88            For debugging purposes.
 89        resolution: The voxel size of the data in nanometers. It will be used to scale the
 90            radius of the point annotations exported from imod. By default the resolution
 91            will be read from the mrc header, but can be over-ridden by passing this value
 92            in case of wrong resolution information in the header.
 93    """
 94    if resolution is None:
 95        with mrcfile.open(data_path, "r") as f:
 96            resolution = f.voxel_size.tolist()
 97        resolution = tuple(np.round(res / 10, 3) for res in resolution)
 98    assert len(resolution) == 3
 99
100    with open_file(data_path, "r") as f:
101        vol = f["data"][:]
102
103    vesicle_seg, labels, label_names, coords, radii = export_point_annotations(
104        imod_path, vol.shape, exclude_labels=exclude_labels, exclude_label_patterns=exclude_label_patterns,
105        resolution=resolution[0], return_coords_and_radii=True
106    )
107
108    if contour_label_patterns is not None:
109        all_label_names, label_types = get_label_names(imod_path, return_types=True)
110        mesh_object_ids = {
111            obj_id: name for obj_id, name in all_label_names.items()
112            if label_types[obj_id] == "closed contours" and any(pattern in name for pattern in contour_label_patterns)
113        }
114
115        # TODO double check this
116        extra_seg = _export_segmentations(imod_path, data_path, mesh_object_ids)
117        # extra_seg = imod_meshes_to_segmentations(imod_path, vol.shape, mesh_object_ids)
118        seg_id_offset = vesicle_seg.max() + 1
119        label_id_offset = max(list(labels.values())) + 1
120
121        for i, (name, seg) in enumerate(extra_seg.items()):
122            seg_id = seg_id_offset + i
123            seg_mask = seg_id == 1
124            if seg_mask.all():
125                warnings.warn(f"All foreground mesh for {imod_path}: {name} is skipped.")
126                continue
127            vesicle_seg[seg_mask] = seg_id
128            label_id = [i for i, pattern in enumerate(contour_label_patterns) if pattern in name]
129            assert len(label_id) == 1
130            labels[int(seg_id)] = int(label_id[0] + label_id_offset)
131
132    print("Extracted the following labels:", label_names)
133    print("With counts:", {k: v for k, v in zip(*np.unique(list(labels.values()), return_counts=True))})
134    if visualize:
135        _check_volume(vol, vesicle_seg, labels)
136
137    with open_file(output_path, "a") as f:
138        f.create_dataset("raw", data=vol, compression="gzip")
139
140        ds = f.create_dataset("labels/vesicles", data=vesicle_seg, compression="gzip")
141        ds.attrs["labels"] = json.dumps(labels)
142        ds.attrs["label_names"] = json.dumps(label_names)
143
144        f.create_dataset("labels/imod/vesicles/coordinates", data=coords)
145        f.create_dataset("labels/imod/vesicles/radii", data=radii)
146
147        if original_path is not None:
148            f.attrs["filename"] = original_path
149
150
151def extract_vesicle_training_data(
152    data_folder: str,
153    gt_folder: str,
154    output_folder: str,
155    to_label_path: Optional[callable] = None,
156    skip_no_labels: bool = False,
157    exclude: Optional[Tuple[str]] = None,
158    exclude_labels: Optional[Tuple[int]] = None,
159    exclude_label_patterns: Optional[Tuple[str]] = None,
160    contour_label_patterns: Optional[Tuple[str]] = None,
161    visualize: bool = False,
162    resolution: Optional[Tuple[int, int, int]] = None,
163):
164    """Extract all vesicle annotations from a folder hierarchy stored in mrc and imod files
165    and write them to an hdf5 file.
166
167    This function calls `write_vesicle_training_volume` for each mrc/mod file pair it encounters.
168    The output files will be stored with a simple naming pattern 'tomogram00i.h5'.
169    The original filename for each exported file is stored in the attribute 'filename' at
170    the root level of the hdf5.
171
172    Args:
173        data_folder: The root folder containing the mrc files.
174        imod_path: The root folder containing the mod files. can be the same as `data_folder`.
175        output_folder: The output folder where the hdf5 files with exported raw data and
176            vesicle segmentations will be saved.
177        to_label_path: A function for converting the mrc filename to the name of the
178            corresponding .mod file. If not given the file extension .mrc will be replaced
179            with .mod.
180        skip_no_labels: Whether to skip extracting mrc files for which a matching .mod file
181            could not be found. If true will raise a warning for these cases,
182            otherwise will throw an error.
183        exclude: An optional list of filenames to be excluded from the export.
184        exclude_labels: An optional list of object ids in the mod file that should be excluded
185            from the export.
186        exclude_label_patterns: An optional list of object names in the mode file that
187            should be excluded from the export.
188        contour_label_patterns: An optonal list of object names for contour annotations
189            (= more complex object annotations) that should also be exported as vesicles
190            from the imod file. This can be used in case some vesicles are annotated as
191            objects with contours instead of just being point annotations.
192        visualize: Whether to visualize the exported data with napari instead of saving it.
193            For debugging purposes.
194        resolution: The voxel size of the data in nanometers. It will be used to scale the
195            radius of the point annotations exported from imod. By default the resolution
196            will be read from the mrc header, but can be over-ridden by passing this value
197            in case of wrong resolution information in the header.
198    """
199    os.makedirs(output_folder, exist_ok=True)
200
201    train_id = 0
202    for root, dirs, files in os.walk(data_folder):
203        dirs.sort()
204        files.sort()
205
206        # check if we exclude this directory
207        if exclude is not None and root in exclude:
208            print("Skipping", root)
209            continue
210
211        for fname in files:
212            # check if we exclude this file
213            #TODO distinguish between directory and file to skip
214            if exclude is not None and fname in exclude:
215                print("Skipping", fname)
216                continue
217
218            if Path(fname).suffix not in (".mrc", ".rec"):
219                continue
220
221            output_path = os.path.join(output_folder, f"tomogram-{train_id:03}.h5")
222            if os.path.exists(output_path):
223                train_id += 1
224                continue
225
226            file_path = os.path.join(root, fname)
227            relative_path = os.path.relpath(file_path, data_folder)
228
229            if to_label_path is None:
230                imod_path = os.path.join(gt_folder, relative_path.replace(Path(relative_path).suffix, ".imod"))
231            else:
232                imod_path = to_label_path(gt_folder, relative_path)
233
234            if not os.path.exists(imod_path):
235                if skip_no_labels:
236                    print("Skipping", file_path, "because no matching labels were found at", imod_path)
237                    train_id += 1
238                    continue
239                else:
240                    raise RuntimeError(f"Can't find labels for {file_path} at {imod_path}.")
241
242            print("Processing", file_path, "with target", output_path)
243            write_vesicle_training_volume(
244                file_path, imod_path, output_path, relative_path,
245                exclude_labels=exclude_labels,
246                exclude_label_patterns=exclude_label_patterns,
247                contour_label_patterns=contour_label_patterns,
248                visualize=visualize,
249                resolution=resolution,
250            )
251            train_id += 1
def write_vesicle_training_volume( data_path: str, imod_path: str, output_path: str, original_path: Optional[str] = None, exclude_labels: Optional[Tuple[int]] = None, exclude_label_patterns: Optional[Tuple[str]] = None, contour_label_patterns: Optional[Tuple[str]] = None, visualize: bool = False, resolution: Optional[Tuple[int, int, int]] = None):
 54def write_vesicle_training_volume(
 55    data_path: str,
 56    imod_path: str,
 57    output_path: str,
 58    original_path: Optional[str] = None,
 59    exclude_labels: Optional[Tuple[int]] = None,
 60    exclude_label_patterns: Optional[Tuple[str]] = None,
 61    contour_label_patterns: Optional[Tuple[str]] = None,
 62    visualize: bool = False,
 63    resolution: Optional[Tuple[int, int, int]] = None,
 64):
 65    """Extract vesicle annotations from IMOD and write them to an hdf5 file.
 66
 67    By default this will export all point annotations from an imod file.
 68    The arguments `exclude_labels` and `exclude_label_patterns` can be used
 69    to exclude certain point annotations from the export.
 70    The argument `contour_label_patterns` can be used to also export selected
 71    contour annotations from the imod file.
 72
 73    Args:
 74        data_path: The path to the mrc file.
 75        imod_path: The path to the mod file with vesicle annotations.
 76        output_path: The path to the hdf5 file to save the extracted annotations.
 77        original_path: The orignal path name. This parameter is optional, and the path name
 78            will be saved as an attribute in the output hdf5 file, in order to map back
 79            extracted to original input data.
 80        exclude_labels: An optional list of object ids in the mod file that should be excluded
 81            from the export.
 82        exclude_label_patterns: An optional list of object names in the mode file that
 83            should be excluded from the export.
 84        contour_label_patterns: An optonal list of object names for contour annotations
 85            (= more complex object annotations) that should also be exported as vesicles
 86            from the imod file. This can be used in case some vesicles are annotated as
 87            objects with contours instead of just being point annotations.
 88        visualize: Whether to visualize the exported data with napari instead of saving it.
 89            For debugging purposes.
 90        resolution: The voxel size of the data in nanometers. It will be used to scale the
 91            radius of the point annotations exported from imod. By default the resolution
 92            will be read from the mrc header, but can be over-ridden by passing this value
 93            in case of wrong resolution information in the header.
 94    """
 95    if resolution is None:
 96        with mrcfile.open(data_path, "r") as f:
 97            resolution = f.voxel_size.tolist()
 98        resolution = tuple(np.round(res / 10, 3) for res in resolution)
 99    assert len(resolution) == 3
100
101    with open_file(data_path, "r") as f:
102        vol = f["data"][:]
103
104    vesicle_seg, labels, label_names, coords, radii = export_point_annotations(
105        imod_path, vol.shape, exclude_labels=exclude_labels, exclude_label_patterns=exclude_label_patterns,
106        resolution=resolution[0], return_coords_and_radii=True
107    )
108
109    if contour_label_patterns is not None:
110        all_label_names, label_types = get_label_names(imod_path, return_types=True)
111        mesh_object_ids = {
112            obj_id: name for obj_id, name in all_label_names.items()
113            if label_types[obj_id] == "closed contours" and any(pattern in name for pattern in contour_label_patterns)
114        }
115
116        # TODO double check this
117        extra_seg = _export_segmentations(imod_path, data_path, mesh_object_ids)
118        # extra_seg = imod_meshes_to_segmentations(imod_path, vol.shape, mesh_object_ids)
119        seg_id_offset = vesicle_seg.max() + 1
120        label_id_offset = max(list(labels.values())) + 1
121
122        for i, (name, seg) in enumerate(extra_seg.items()):
123            seg_id = seg_id_offset + i
124            seg_mask = seg_id == 1
125            if seg_mask.all():
126                warnings.warn(f"All foreground mesh for {imod_path}: {name} is skipped.")
127                continue
128            vesicle_seg[seg_mask] = seg_id
129            label_id = [i for i, pattern in enumerate(contour_label_patterns) if pattern in name]
130            assert len(label_id) == 1
131            labels[int(seg_id)] = int(label_id[0] + label_id_offset)
132
133    print("Extracted the following labels:", label_names)
134    print("With counts:", {k: v for k, v in zip(*np.unique(list(labels.values()), return_counts=True))})
135    if visualize:
136        _check_volume(vol, vesicle_seg, labels)
137
138    with open_file(output_path, "a") as f:
139        f.create_dataset("raw", data=vol, compression="gzip")
140
141        ds = f.create_dataset("labels/vesicles", data=vesicle_seg, compression="gzip")
142        ds.attrs["labels"] = json.dumps(labels)
143        ds.attrs["label_names"] = json.dumps(label_names)
144
145        f.create_dataset("labels/imod/vesicles/coordinates", data=coords)
146        f.create_dataset("labels/imod/vesicles/radii", data=radii)
147
148        if original_path is not None:
149            f.attrs["filename"] = original_path

Extract vesicle annotations from IMOD and write them to an hdf5 file.

By default this will export all point annotations from an imod file. The arguments exclude_labels and exclude_label_patterns can be used to exclude certain point annotations from the export. The argument contour_label_patterns can be used to also export selected contour annotations from the imod file.

Arguments:
  • data_path: The path to the mrc file.
  • imod_path: The path to the mod file with vesicle annotations.
  • output_path: The path to the hdf5 file to save the extracted annotations.
  • original_path: The orignal path name. This parameter is optional, and the path name will be saved as an attribute in the output hdf5 file, in order to map back extracted to original input data.
  • exclude_labels: An optional list of object ids in the mod file that should be excluded from the export.
  • exclude_label_patterns: An optional list of object names in the mode file that should be excluded from the export.
  • contour_label_patterns: An optonal list of object names for contour annotations (= more complex object annotations) that should also be exported as vesicles from the imod file. This can be used in case some vesicles are annotated as objects with contours instead of just being point annotations.
  • visualize: Whether to visualize the exported data with napari instead of saving it. For debugging purposes.
  • resolution: The voxel size of the data in nanometers. It will be used to scale the radius of the point annotations exported from imod. By default the resolution will be read from the mrc header, but can be over-ridden by passing this value in case of wrong resolution information in the header.
def extract_vesicle_training_data( data_folder: str, gt_folder: str, output_folder: str, to_label_path: Optional[<built-in function callable>] = None, skip_no_labels: bool = False, exclude: Optional[Tuple[str]] = None, exclude_labels: Optional[Tuple[int]] = None, exclude_label_patterns: Optional[Tuple[str]] = None, contour_label_patterns: Optional[Tuple[str]] = None, visualize: bool = False, resolution: Optional[Tuple[int, int, int]] = None):
152def extract_vesicle_training_data(
153    data_folder: str,
154    gt_folder: str,
155    output_folder: str,
156    to_label_path: Optional[callable] = None,
157    skip_no_labels: bool = False,
158    exclude: Optional[Tuple[str]] = None,
159    exclude_labels: Optional[Tuple[int]] = None,
160    exclude_label_patterns: Optional[Tuple[str]] = None,
161    contour_label_patterns: Optional[Tuple[str]] = None,
162    visualize: bool = False,
163    resolution: Optional[Tuple[int, int, int]] = None,
164):
165    """Extract all vesicle annotations from a folder hierarchy stored in mrc and imod files
166    and write them to an hdf5 file.
167
168    This function calls `write_vesicle_training_volume` for each mrc/mod file pair it encounters.
169    The output files will be stored with a simple naming pattern 'tomogram00i.h5'.
170    The original filename for each exported file is stored in the attribute 'filename' at
171    the root level of the hdf5.
172
173    Args:
174        data_folder: The root folder containing the mrc files.
175        imod_path: The root folder containing the mod files. can be the same as `data_folder`.
176        output_folder: The output folder where the hdf5 files with exported raw data and
177            vesicle segmentations will be saved.
178        to_label_path: A function for converting the mrc filename to the name of the
179            corresponding .mod file. If not given the file extension .mrc will be replaced
180            with .mod.
181        skip_no_labels: Whether to skip extracting mrc files for which a matching .mod file
182            could not be found. If true will raise a warning for these cases,
183            otherwise will throw an error.
184        exclude: An optional list of filenames to be excluded from the export.
185        exclude_labels: An optional list of object ids in the mod file that should be excluded
186            from the export.
187        exclude_label_patterns: An optional list of object names in the mode file that
188            should be excluded from the export.
189        contour_label_patterns: An optonal list of object names for contour annotations
190            (= more complex object annotations) that should also be exported as vesicles
191            from the imod file. This can be used in case some vesicles are annotated as
192            objects with contours instead of just being point annotations.
193        visualize: Whether to visualize the exported data with napari instead of saving it.
194            For debugging purposes.
195        resolution: The voxel size of the data in nanometers. It will be used to scale the
196            radius of the point annotations exported from imod. By default the resolution
197            will be read from the mrc header, but can be over-ridden by passing this value
198            in case of wrong resolution information in the header.
199    """
200    os.makedirs(output_folder, exist_ok=True)
201
202    train_id = 0
203    for root, dirs, files in os.walk(data_folder):
204        dirs.sort()
205        files.sort()
206
207        # check if we exclude this directory
208        if exclude is not None and root in exclude:
209            print("Skipping", root)
210            continue
211
212        for fname in files:
213            # check if we exclude this file
214            #TODO distinguish between directory and file to skip
215            if exclude is not None and fname in exclude:
216                print("Skipping", fname)
217                continue
218
219            if Path(fname).suffix not in (".mrc", ".rec"):
220                continue
221
222            output_path = os.path.join(output_folder, f"tomogram-{train_id:03}.h5")
223            if os.path.exists(output_path):
224                train_id += 1
225                continue
226
227            file_path = os.path.join(root, fname)
228            relative_path = os.path.relpath(file_path, data_folder)
229
230            if to_label_path is None:
231                imod_path = os.path.join(gt_folder, relative_path.replace(Path(relative_path).suffix, ".imod"))
232            else:
233                imod_path = to_label_path(gt_folder, relative_path)
234
235            if not os.path.exists(imod_path):
236                if skip_no_labels:
237                    print("Skipping", file_path, "because no matching labels were found at", imod_path)
238                    train_id += 1
239                    continue
240                else:
241                    raise RuntimeError(f"Can't find labels for {file_path} at {imod_path}.")
242
243            print("Processing", file_path, "with target", output_path)
244            write_vesicle_training_volume(
245                file_path, imod_path, output_path, relative_path,
246                exclude_labels=exclude_labels,
247                exclude_label_patterns=exclude_label_patterns,
248                contour_label_patterns=contour_label_patterns,
249                visualize=visualize,
250                resolution=resolution,
251            )
252            train_id += 1

Extract all vesicle annotations from a folder hierarchy stored in mrc and imod files and write them to an hdf5 file.

This function calls write_vesicle_training_volume for each mrc/mod file pair it encounters. The output files will be stored with a simple naming pattern 'tomogram00i.h5'. The original filename for each exported file is stored in the attribute 'filename' at the root level of the hdf5.

Arguments:
  • data_folder: The root folder containing the mrc files.
  • imod_path: The root folder containing the mod files. can be the same as data_folder.
  • output_folder: The output folder where the hdf5 files with exported raw data and vesicle segmentations will be saved.
  • to_label_path: A function for converting the mrc filename to the name of the corresponding .mod file. If not given the file extension .mrc will be replaced with .mod.
  • skip_no_labels: Whether to skip extracting mrc files for which a matching .mod file could not be found. If true will raise a warning for these cases, otherwise will throw an error.
  • exclude: An optional list of filenames to be excluded from the export.
  • exclude_labels: An optional list of object ids in the mod file that should be excluded from the export.
  • exclude_label_patterns: An optional list of object names in the mode file that should be excluded from the export.
  • contour_label_patterns: An optonal list of object names for contour annotations (= more complex object annotations) that should also be exported as vesicles from the imod file. This can be used in case some vesicles are annotated as objects with contours instead of just being point annotations.
  • visualize: Whether to visualize the exported data with napari instead of saving it. For debugging purposes.
  • resolution: The voxel size of the data in nanometers. It will be used to scale the radius of the point annotations exported from imod. By default the resolution will be read from the mrc header, but can be over-ridden by passing this value in case of wrong resolution information in the header.