synapse_net.file_utils

 1import os
 2from typing import Dict, List, Optional, Tuple, Union
 3
 4import mrcfile
 5import numpy as np
 6import pooch
 7
 8
 9def get_cache_dir() -> str:
10    """Get the cache directory of synapse net.
11
12    Returns:
13        The cache directory.
14    """
15    cache_dir = os.path.expanduser(pooch.os_cache("synapse-net"))
16    return cache_dir
17
18
19def get_data_path(folder: str, n_tomograms: Optional[int] = 1) -> Union[str, List[str]]:
20    """Get the path to all tomograms stored as .rec or .mrc files in a folder.
21
22    Args:
23        folder: The folder with tomograms.
24        n_tomograms: The expected number of tomograms.
25
26    Returns:
27        The filepath or list of filepaths of the tomograms in the folder.
28    """
29    file_names = os.listdir(folder)
30    tomograms = []
31    for fname in file_names:
32        ext = os.path.splitext(fname)[1]
33        if ext in (".rec", ".mrc"):
34            tomograms.append(os.path.join(folder, fname))
35
36    if n_tomograms is None:
37        return tomograms
38    assert len(tomograms) == n_tomograms, f"{folder}: {len(tomograms)}, {n_tomograms}"
39    return tomograms[0] if n_tomograms == 1 else tomograms
40
41
42def _parse_voxel_size(voxel_size):
43    parsed_voxel_size = None
44    try:
45        # The voxel sizes are stored in Angsrrom in the MRC header, but we want them
46        # in nanometer. Hence we divide by a factor of 10 here.
47        parsed_voxel_size = {
48            "x": voxel_size.x / 10,
49            "y": voxel_size.y / 10,
50            "z": voxel_size.z / 10,
51        }
52    except Exception as e:
53        print(f"Failed to read voxel size: {e}")
54    return parsed_voxel_size
55
56
57def read_voxel_size(path: str) -> Dict[str, float] | None:
58    """Read voxel size from mrc/rec file.
59
60    The original unit of voxel size is Angstrom and we convert it to nanometers by dividing it by ten.
61
62    Args:
63        path: Path to mrc/rec file.
64
65    Returns:
66        Mapping from the axis name to voxel size. None if the voxel size could not be read.
67    """
68    with mrcfile.open(path, permissive=True) as mrc:
69        voxel_size = _parse_voxel_size(mrc.voxel_size)
70    return voxel_size
71
72
73def read_mrc(path: str) -> Tuple[np.ndarray, Dict[str, float]]:
74    """Read data and voxel size from mrc/rec file.
75
76    Args:
77        path: Path to mrc/rec file.
78
79    Returns:
80        The data read from the file.
81        The voxel size read from the file.
82    """
83    with mrcfile.open(path, permissive=True) as mrc:
84        voxel_size = _parse_voxel_size(mrc.voxel_size)
85        data = np.asarray(mrc.data[:])
86    assert data.ndim in (2, 3)
87
88    # Transpose the data to match python axis order.
89    data = np.flip(data, axis=1) if data.ndim == 3 else np.flip(data, axis=0)
90    return data, voxel_size
def get_cache_dir() -> str:
10def get_cache_dir() -> str:
11    """Get the cache directory of synapse net.
12
13    Returns:
14        The cache directory.
15    """
16    cache_dir = os.path.expanduser(pooch.os_cache("synapse-net"))
17    return cache_dir

Get the cache directory of synapse net.

Returns:

The cache directory.

def get_data_path(folder: str, n_tomograms: Optional[int] = 1) -> Union[str, List[str]]:
20def get_data_path(folder: str, n_tomograms: Optional[int] = 1) -> Union[str, List[str]]:
21    """Get the path to all tomograms stored as .rec or .mrc files in a folder.
22
23    Args:
24        folder: The folder with tomograms.
25        n_tomograms: The expected number of tomograms.
26
27    Returns:
28        The filepath or list of filepaths of the tomograms in the folder.
29    """
30    file_names = os.listdir(folder)
31    tomograms = []
32    for fname in file_names:
33        ext = os.path.splitext(fname)[1]
34        if ext in (".rec", ".mrc"):
35            tomograms.append(os.path.join(folder, fname))
36
37    if n_tomograms is None:
38        return tomograms
39    assert len(tomograms) == n_tomograms, f"{folder}: {len(tomograms)}, {n_tomograms}"
40    return tomograms[0] if n_tomograms == 1 else tomograms

Get the path to all tomograms stored as .rec or .mrc files in a folder.

Arguments:
  • folder: The folder with tomograms.
  • n_tomograms: The expected number of tomograms.
Returns:

The filepath or list of filepaths of the tomograms in the folder.

def read_voxel_size(path: str) -> Optional[Dict[str, float]]:
58def read_voxel_size(path: str) -> Dict[str, float] | None:
59    """Read voxel size from mrc/rec file.
60
61    The original unit of voxel size is Angstrom and we convert it to nanometers by dividing it by ten.
62
63    Args:
64        path: Path to mrc/rec file.
65
66    Returns:
67        Mapping from the axis name to voxel size. None if the voxel size could not be read.
68    """
69    with mrcfile.open(path, permissive=True) as mrc:
70        voxel_size = _parse_voxel_size(mrc.voxel_size)
71    return voxel_size

Read voxel size from mrc/rec file.

The original unit of voxel size is Angstrom and we convert it to nanometers by dividing it by ten.

Arguments:
  • path: Path to mrc/rec file.
Returns:

Mapping from the axis name to voxel size. None if the voxel size could not be read.

def read_mrc(path: str) -> Tuple[numpy.ndarray, Dict[str, float]]:
74def read_mrc(path: str) -> Tuple[np.ndarray, Dict[str, float]]:
75    """Read data and voxel size from mrc/rec file.
76
77    Args:
78        path: Path to mrc/rec file.
79
80    Returns:
81        The data read from the file.
82        The voxel size read from the file.
83    """
84    with mrcfile.open(path, permissive=True) as mrc:
85        voxel_size = _parse_voxel_size(mrc.voxel_size)
86        data = np.asarray(mrc.data[:])
87    assert data.ndim in (2, 3)
88
89    # Transpose the data to match python axis order.
90    data = np.flip(data, axis=1) if data.ndim == 3 else np.flip(data, axis=0)
91    return data, voxel_size

Read data and voxel size from mrc/rec file.

Arguments:
  • path: Path to mrc/rec file.
Returns:

The data read from the file. The voxel size read from the file.