bioimage_py.stats

Block-wise statistics (reductions).

View Source

1"""Block-wise statistics (reductions)."""
2from .stats import max, mean, mean_and_std, min, min_and_max, std
3from .unique import unique
4
5__all__ = ["max", "min", "mean", "std", "mean_and_std", "min_and_max", "unique"]

def max( input: 'SourceLike', num_workers: int = 1, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> float: View Source

47def max(
48    input: SourceLike,
49    num_workers: int = 1,
50    block_shape: Optional[Tuple[int, ...]] = None,
51    job_type: str = "local",
52    job_config: Optional[RunnerConfig] = None,
53    mask: Optional[SourceLike] = None,
54    block_ids: Optional[Sequence[int]] = None,
55    resume_from: Optional[str] = None,
56) -> float:
57    """Compute the maximum value of the data, optionally restricted to a mask.
58
59    Args:
60        input: The input data (a numpy/zarr/n5 array or a `Source`).
61        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
62            backends).
63        block_shape: Shape of the processing blocks. Defaults to the input chunk shape;
64            required for unchunked data.
65        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
66        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
67        mask: Optional binary mask; values outside the mask are excluded from the computation.
68        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
69        resume_from: Distributed only; the preserved temp folder of a failed run to resume and
70            merge (see ``runner.run``). Mutually exclusive with ``block_ids``.
71
72    Returns:
73        The maximum value.
74    """
75    check_rerun_args(job_type, resume_from, block_ids)
76    if check_direct(job_type, num_workers, block_shape, mask, block_ids):
77        src = as_source(input)
78        return float(np.max(src[full_roi(src.ndim)]))
79    runner = get_runner(job_type, job_config)
80    results = runner.run(_max_block, [input], num_workers=num_workers, block_shape=block_shape,
81                         mask=mask, block_ids=block_ids, resume_from=resume_from,
82                         has_return_val=True, name="max")
83    results = [r for r in results if r is not None]
84    if not results:
85        raise ValueError("No values within the mask; cannot compute a maximum.")
86    return float(np.max(results))

Compute the maximum value of the data, optionally restricted to a mask.

Args: input: The input data (a numpy/zarr/n5 array or a Source). num_workers: Number of parallel workers (threads for local, tasks for distributed backends). block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). mask: Optional binary mask; values outside the mask are excluded from the computation. block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks). resume_from: Distributed only; the preserved temp folder of a failed run to resume and merge (see runner.run). Mutually exclusive with block_ids.

Returns: The maximum value.

def min( input: 'SourceLike', num_workers: int = 1, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> float: View Source

145def min(
146    input: SourceLike,
147    num_workers: int = 1,
148    block_shape: Optional[Tuple[int, ...]] = None,
149    job_type: str = "local",
150    job_config: Optional[RunnerConfig] = None,
151    mask: Optional[SourceLike] = None,
152    block_ids: Optional[Sequence[int]] = None,
153    resume_from: Optional[str] = None,
154) -> float:
155    """Compute the minimum value of the data, optionally restricted to a mask.
156
157    Args:
158        input: The input data (a numpy/zarr/n5 array or a `Source`).
159        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
160            backends).
161        block_shape: Shape of the processing blocks. Defaults to the input chunk shape;
162            required for unchunked data.
163        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
164        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
165        mask: Optional binary mask; values outside the mask are excluded from the computation.
166        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
167        resume_from: Distributed only; the preserved temp folder of a failed run to resume and
168            merge (see ``runner.run``). Mutually exclusive with ``block_ids``.
169
170    Returns:
171        The minimum value.
172    """
173    return min_and_max(input, num_workers, block_shape, job_type, job_config, mask, block_ids,
174                       resume_from)[0]

Compute the minimum value of the data, optionally restricted to a mask.

Returns: The minimum value.

def mean( input: 'SourceLike', num_workers: int = 1, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> float: View Source

240def mean(
241    input: SourceLike,
242    num_workers: int = 1,
243    block_shape: Optional[Tuple[int, ...]] = None,
244    job_type: str = "local",
245    job_config: Optional[RunnerConfig] = None,
246    mask: Optional[SourceLike] = None,
247    block_ids: Optional[Sequence[int]] = None,
248    resume_from: Optional[str] = None,
249) -> float:
250    """Compute the mean of the data, optionally restricted to a mask.
251
252    Args:
253        input: The input data (a numpy/zarr/n5 array or a `Source`).
254        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
255            backends).
256        block_shape: Shape of the processing blocks. Defaults to the input chunk shape;
257            required for unchunked data.
258        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
259        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
260        mask: Optional binary mask; values outside the mask are excluded from the computation.
261        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
262        resume_from: Distributed only; the preserved temp folder of a failed run to resume and
263            merge (see ``runner.run``). Mutually exclusive with ``block_ids``.
264
265    Returns:
266        The mean value.
267    """
268    return mean_and_std(input, num_workers, block_shape, job_type, job_config, mask, block_ids,
269                        resume_from)[0]

Compute the mean of the data, optionally restricted to a mask.

Returns: The mean value.

def std( input: 'SourceLike', num_workers: int = 1, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> float: View Source

272def std(
273    input: SourceLike,
274    num_workers: int = 1,
275    block_shape: Optional[Tuple[int, ...]] = None,
276    job_type: str = "local",
277    job_config: Optional[RunnerConfig] = None,
278    mask: Optional[SourceLike] = None,
279    block_ids: Optional[Sequence[int]] = None,
280    resume_from: Optional[str] = None,
281) -> float:
282    """Compute the standard deviation of the data, optionally restricted to a mask.
283
284    Args:
285        input: The input data (a numpy/zarr/n5 array or a `Source`).
286        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
287            backends).
288        block_shape: Shape of the processing blocks. Defaults to the input chunk shape;
289            required for unchunked data.
290        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
291        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
292        mask: Optional binary mask; values outside the mask are excluded from the computation.
293        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
294        resume_from: Distributed only; the preserved temp folder of a failed run to resume and
295            merge (see ``runner.run``). Mutually exclusive with ``block_ids``.
296
297    Returns:
298        The standard deviation.
299    """
300    return mean_and_std(input, num_workers, block_shape, job_type, job_config, mask, block_ids,
301                        resume_from)[1]

Compute the standard deviation of the data, optionally restricted to a mask.

Returns: The standard deviation.

def mean_and_std( input: 'SourceLike', num_workers: int = 1, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> Tuple[float, float]: View Source

188def mean_and_std(
189    input: SourceLike,
190    num_workers: int = 1,
191    block_shape: Optional[Tuple[int, ...]] = None,
192    job_type: str = "local",
193    job_config: Optional[RunnerConfig] = None,
194    mask: Optional[SourceLike] = None,
195    block_ids: Optional[Sequence[int]] = None,
196    resume_from: Optional[str] = None,
197) -> Tuple[float, float]:
198    """Compute the (mean, standard deviation) of the data, optionally restricted to a mask.
199
200    Per-block ``(mean, variance, count)`` triples are combined with the parallel-variance
201    formula, so the result is exact (not an approximation).
202
203    Args:
204        input: The input data (a numpy/zarr/n5 array or a `Source`).
205        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
206            backends).
207        block_shape: Shape of the processing blocks. Defaults to the input chunk shape;
208            required for unchunked data.
209        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
210        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
211        mask: Optional binary mask; values outside the mask are excluded from the computation.
212        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
213        resume_from: Distributed only; the preserved temp folder of a failed run to resume and
214            merge (see ``runner.run``). Mutually exclusive with ``block_ids``.
215
216    Returns:
217        The mean and standard deviation, as a ``(mean, std)`` tuple.
218    """
219    check_rerun_args(job_type, resume_from, block_ids)
220    if check_direct(job_type, num_workers, block_shape, mask, block_ids):
221        src = as_source(input)
222        d = src[full_roi(src.ndim)]
223        return float(np.mean(d)), float(np.std(d))
224    runner = get_runner(job_type, job_config)
225    results = runner.run(_mean_and_std_block, [input], num_workers=num_workers, block_shape=block_shape,
226                         mask=mask, block_ids=block_ids, resume_from=resume_from,
227                         has_return_val=True, name="mean_and_std")
228    results = [r for r in results if r is not None]
229    if not results:
230        raise ValueError("No values within the mask; cannot compute mean/std.")
231    means = np.array([r[0] for r in results])
232    variances = np.array([r[1] for r in results])
233    sizes = np.array([r[2] for r in results], dtype="float64")
234    mean_val = float((sizes * means).sum() / sizes.sum())
235    # Parallel variance combination (mirrors elf): account for the shift of each block mean.
236    var_val = float((sizes * (variances + (means - mean_val) ** 2)).sum() / sizes.sum())
237    return mean_val, sqrt(var_val)

Compute the (mean, standard deviation) of the data, optionally restricted to a mask.

Per-block (mean, variance, count) triples are combined with the parallel-variance formula, so the result is exact (not an approximation).

Returns: The mean and standard deviation, as a (mean, std) tuple.

def min_and_max( input: 'SourceLike', num_workers: int = 1, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> Tuple[float, float]: View Source

100def min_and_max(
101    input: SourceLike,
102    num_workers: int = 1,
103    block_shape: Optional[Tuple[int, ...]] = None,
104    job_type: str = "local",
105    job_config: Optional[RunnerConfig] = None,
106    mask: Optional[SourceLike] = None,
107    block_ids: Optional[Sequence[int]] = None,
108    resume_from: Optional[str] = None,
109) -> Tuple[float, float]:
110    """Compute the (minimum, maximum) of the data, optionally restricted to a mask.
111
112    Args:
113        input: The input data (a numpy/zarr/n5 array or a `Source`).
114        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
115            backends).
116        block_shape: Shape of the processing blocks. Defaults to the input chunk shape;
117            required for unchunked data.
118        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
119        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
120        mask: Optional binary mask; values outside the mask are excluded from the computation.
121        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
122        resume_from: Distributed only; the preserved temp folder of a failed run to resume and
123            merge (see ``runner.run``). Mutually exclusive with ``block_ids``.
124
125    Returns:
126        The minimum and maximum values, as a ``(min, max)`` tuple.
127    """
128    check_rerun_args(job_type, resume_from, block_ids)
129    if check_direct(job_type, num_workers, block_shape, mask, block_ids):
130        src = as_source(input)
131        d = src[full_roi(src.ndim)]
132        return float(np.min(d)), float(np.max(d))
133    runner = get_runner(job_type, job_config)
134    results = runner.run(_min_and_max_block, [input], num_workers=num_workers, block_shape=block_shape,
135                         mask=mask, block_ids=block_ids, resume_from=resume_from,
136                         has_return_val=True, name="min_and_max")
137    results = [r for r in results if r is not None]
138    if not results:
139        raise ValueError("No values within the mask; cannot compute min/max.")
140    mins = np.array([r[0] for r in results])
141    maxs = np.array([r[1] for r in results])
142    return float(mins.min()), float(maxs.max())

Compute the (minimum, maximum) of the data, optionally restricted to a mask.

Returns: The minimum and maximum values, as a (min, max) tuple.

def unique( input: 'SourceLike', return_counts: bool = False, num_workers: int = 1, block_shape: Optional[Tuple[int, ...]] = None, job_type: str = 'local', job_config: Optional[bioimage_py.runner.RunnerConfig] = None, mask: 'Optional[SourceLike]' = None, block_ids: Optional[Sequence[int]] = None, resume_from: Optional[str] = None) -> Union[numpy.ndarray, Tuple[numpy.ndarray, numpy.ndarray]]: View Source

 67def unique(
 68    input: SourceLike,
 69    return_counts: bool = False,
 70    num_workers: int = 1,
 71    block_shape: Optional[Tuple[int, ...]] = None,
 72    job_type: str = "local",
 73    job_config: Optional[RunnerConfig] = None,
 74    mask: Optional[SourceLike] = None,
 75    block_ids: Optional[Sequence[int]] = None,
 76    resume_from: Optional[str] = None,
 77) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
 78    """Compute the unique values of the data, optionally with their counts.
 79
 80    Args:
 81        input: The input data (a numpy/zarr/n5 array or a `Source`).
 82        return_counts: Whether to also return the number of occurrences of each unique value.
 83        num_workers: Number of parallel workers (threads for ``local``, tasks for distributed
 84            backends).
 85        block_shape: Shape of the processing blocks. Defaults to the input chunk shape;
 86            required for unchunked data.
 87        job_type: Execution backend: one of ``"local"``, ``"subprocess"`` or ``"slurm"``.
 88        job_config: Backend configuration (a `RunnerConfig` / `SlurmConfig`).
 89        mask: Optional binary mask; values outside the mask are excluded from the computation.
 90        block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks).
 91        resume_from: Distributed only; the preserved temp folder of a failed run to resume and
 92            merge (see ``runner.run``). Mutually exclusive with ``block_ids``.
 93
 94    Returns:
 95        The sorted unique values. If ``return_counts`` is set, a ``(values, counts)`` tuple, where
 96        ``counts`` is an ``int64`` array aligned with ``values``.
 97    """
 98    check_rerun_args(job_type, resume_from, block_ids)
 99    src = as_source(input)
100    if check_direct(job_type, num_workers, block_shape, mask, block_ids):
101        d = src[full_roi(src.ndim)]
102        if return_counts:
103            values, counts = np.unique(d, return_counts=True)
104            return values, counts.astype("int64")
105        return np.unique(d)
106    runner = get_runner(job_type, job_config)
107    results = runner.run(_make_unique_block(return_counts), [input], num_workers=num_workers,
108                         block_shape=block_shape, mask=mask, block_ids=block_ids,
109                         resume_from=resume_from, has_return_val=True, name="unique")
110    return _merge_unique(results, return_counts, np.dtype(src.dtype))

Compute the unique values of the data, optionally with their counts.

Args: input: The input data (a numpy/zarr/n5 array or a Source). return_counts: Whether to also return the number of occurrences of each unique value. num_workers: Number of parallel workers (threads for local, tasks for distributed backends). block_shape: Shape of the processing blocks. Defaults to the input chunk shape; required for unchunked data. job_type: Execution backend: one of "local", "subprocess" or "slurm". job_config: Backend configuration (a RunnerConfig / SlurmConfig). mask: Optional binary mask; values outside the mask are excluded from the computation. block_ids: Restrict processing to these block ids (e.g. to re-run previously failed blocks). resume_from: Distributed only; the preserved temp folder of a failed run to resume and merge (see runner.run). Mutually exclusive with block_ids.

Returns: The sorted unique values. If return_counts is set, a (values, counts) tuple, where counts is an int64 array aligned with values.