micro_sam.evaluation.evaluation

Evaluation functionality for segmentation predictions from micro_sam.evaluation.automatic_mask_generation and micro_sam.evaluation.inference.

  1"""Evaluation functionality for segmentation predictions from `micro_sam.evaluation.automatic_mask_generation`
  2and `micro_sam.evaluation.inference`.
  3"""
  4
  5import os
  6from glob import glob
  7from tqdm import tqdm
  8from pathlib import Path
  9from natsort import natsorted
 10from typing import List, Optional, Union
 11
 12import numpy as np
 13import pandas as pd
 14import imageio.v3 as imageio
 15from skimage.measure import label
 16
 17from elf.evaluation import mean_segmentation_accuracy
 18
 19from ..util import load_image_data
 20from ..automatic_segmentation import _has_extension
 21
 22
 23def _run_evaluation(gt_paths, prediction_paths, verbose=True, thresholds=None):
 24    assert len(gt_paths) == len(prediction_paths)
 25    msas, sa50s, sa75s = [], [], []
 26
 27    for gt_path, pred_path in tqdm(
 28        zip(gt_paths, prediction_paths), desc="Evaluate predictions", total=len(gt_paths), disable=not verbose
 29    ):
 30
 31        if isinstance(gt_path, np.ndarray):
 32            gt = gt_path
 33        else:
 34            assert os.path.exists(gt_path), gt_path
 35            gt = imageio.imread(gt_path)
 36            gt = label(gt)
 37
 38        if isinstance(pred_path, np.ndarray):
 39            pred = pred_path
 40        else:
 41            assert os.path.exists(pred_path), pred_path
 42            pred = imageio.imread(pred_path)
 43
 44        msa, scores = mean_segmentation_accuracy(pred, gt, thresholds=thresholds, return_accuracies=True)
 45        msas.append(msa)
 46        if thresholds is None:
 47            sa50, sa75 = scores[0], scores[5]
 48            sa50s.append(sa50), sa75s.append(sa75)
 49
 50    if thresholds is None:
 51        return msas, sa50s, sa75s
 52    else:
 53        return msas
 54
 55
 56def run_evaluation(
 57    gt_paths: List[Union[np.ndarray, os.PathLike, str]],
 58    prediction_paths: List[Union[np.ndarray, os.PathLike, str]],
 59    save_path: Optional[Union[os.PathLike, str]] = None,
 60    verbose: bool = True,
 61    thresholds: Optional[List[float]] = None,
 62) -> pd.DataFrame:
 63    """Run evaluation for instance segmentation predictions.
 64
 65    Args:
 66        gt_paths: The list of paths to ground-truth images.
 67        prediction_paths: The list of paths with the instance segmentations to evaluate.
 68        save_path: Optional path for saving the results.
 69        verbose: Whether to print the progress.
 70        thresholds: The choice of overlap thresholds.
 71
 72    Returns:
 73        A DataFrame that contains the evaluation results.
 74    """
 75    assert len(gt_paths) == len(prediction_paths)
 76    # if a save_path is given and it already exists then just load it instead of running the eval
 77    if save_path is not None and os.path.exists(save_path):
 78        return pd.read_csv(save_path)
 79
 80    scores = _run_evaluation(
 81        gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds
 82    )
 83    if thresholds is None:
 84        msas, sa50s, sa75s = scores
 85    else:
 86        msas = scores
 87
 88    results = {"mSA": [np.mean(msas)]}
 89    if thresholds is None:
 90        results["SA50"] = [np.mean(sa50s)]
 91        results["SA75"] = [np.mean(sa75s)]
 92
 93    results = pd.DataFrame.from_dict(results)
 94
 95    if save_path is not None:
 96        os.makedirs(Path(save_path).parent, exist_ok=True)
 97        results.to_csv(save_path, index=False)
 98
 99    return results
100
101
102def run_evaluation_for_iterative_prompting(
103    gt_paths: List[Union[os.PathLike, str]],
104    prediction_root: Union[os.PathLike, str],
105    experiment_folder: Union[os.PathLike, str],
106    start_with_box_prompt: bool = False,
107    overwrite_results: bool = False,
108    use_masks: bool = False,
109) -> pd.DataFrame:
110    """Run evaluation for iterative prompt-based segmentation predictions.
111
112    Args:
113        gt_paths: The list of paths to ground-truth images.
114        prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
115        experiment_folder: The folder where all the experiment results are stored.
116        start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
117        overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
118        use_masks: Whether to use masks for iterative prompting.
119
120    Returns:
121        A DataFrame that contains the evaluation results.
122    """
123    assert os.path.exists(prediction_root), prediction_root
124
125    # Save the results in the experiment folder
126    result_folder = os.path.join(
127        experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask"
128    )
129    os.makedirs(result_folder, exist_ok=True)
130
131    csv_path = os.path.join(
132        result_folder,
133        "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv"
134    )
135
136    # Overwrite the previously saved results
137    if overwrite_results and os.path.exists(csv_path):
138        os.remove(csv_path)
139
140    # If the results have been computed already, it's not needed to re-run it again.
141    if os.path.exists(csv_path):
142        print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.")
143        return
144
145    list_of_results = []
146    prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*")))
147    for pred_folder in prediction_folders:
148        print("Evaluating", os.path.split(pred_folder)[-1])
149        pred_paths = sorted(glob(os.path.join(pred_folder, "*")))
150        result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None)
151        list_of_results.append(result)
152
153    res_df = pd.concat(list_of_results, ignore_index=True)
154    res_df.to_csv(csv_path)
155
156
157def main():
158    """@private"""
159    import argparse
160
161    parser = argparse.ArgumentParser(description="Evaluating segmentations from Segment Anything model on custom data.")
162
163    # labels and predictions for quantitative evaluation.
164    parser.add_argument(
165        "--labels", required=True, type=str, nargs="+",
166        help="Filepath(s) to ground-truth labels or the directory where the label data is stored."
167    )
168    parser.add_argument(
169        "--predictions", required=True, type=str, nargs="+",
170        help="Filepath to predicted labels or the directory where the predicted label data is stored."
171    )
172    parser.add_argument(
173        "--label_key", type=str, default=None,
174        help="The key for accessing predicted label data, either a pattern / wildcard or with 'elf.io.open_file'. "
175    )
176    parser.add_argument(
177        "--prediction_key", type=str, default=None,
178        help="The key for accessing ground-truth label data, either a pattern / wildcard or with 'elf.io.open_file'. "
179    )
180    parser.add_argument(
181        "-o", "--output_path", type=str, default=None,
182        help="The filepath to store the evaluation results. The current support stores results in a 'csv' file."
183    )
184    parser.add_argument(
185        "--threshold", default=None, type=float, nargs="+",
186        help="The choice of overlap threshold(s) for calculating the segmentation accuracy. By default, "
187        "np.arange(0.5, 1., 0.05) is used to provide the mean segmentation accurcy score over all values.",
188    )
189    parser.add_argument(
190        "-v", "--verbose", action="store_true", help="Whether to allow verbosity of evaluation."
191    )
192
193    # TODO: We can extend this in future for other metrics, eg. dice score, etc.
194    # NOTE: This argument is not exposed to the user atm.
195    # parser.add_argument(
196    #     "--metric", type=str, default="segmentation_accuracy", choices=("segmentation_accuracy"),
197    #     help="The choice of metric for evaluation. By default, it computes segmentation accuracy "
198    #     "for instance segmentation."
199    # )
200
201    args = parser.parse_args()
202
203    # Check whether the inputs are as expected.
204    def _get_inputs_from_paths(paths, key):
205        fpaths = []
206        for path in paths:
207            if _has_extension(path):  # it is just one filepath and we check whether we can access it via 'elf'.
208                fpaths.append(path if key is None else load_image_data(path=path, key=key))
209            else:  # otherwise, path is a directory, fetch all inputs provided with a pattern.
210                assert key is not None, \
211                    f"You must provide a wildcard / pattern as the filepath '{os.path.abspath(path)}' is a directory."
212                fpaths.extend(natsorted(glob(os.path.join(path, key))))
213
214        return fpaths
215
216    labels = _get_inputs_from_paths(args.labels, args.label_key)
217    predictions = _get_inputs_from_paths(args.predictions, args.prediction_key)
218    assert labels and len(labels) == len(predictions)
219
220    # Check whether output path is a csv or not, if passed.
221    output_path = args.output_path
222    if output_path is not None:
223        if not _has_extension(output_path):  # If it is a directory, store this in "<OUTPUT_PATH>/results.csv"
224            os.makedirs(output_path, exist_ok=True)
225            output_path = os.path.join(output_path, "results.csv")
226
227        if not output_path.endswith(".csv"):  # If it is a filepath missing extension / with a different extension.
228            output_path = str(Path(output_path).with_suffix(".csv"))  # Limit supports to csv files for now.
229
230    # Run evaluation on labels and predictions.
231    results = run_evaluation(
232        gt_paths=labels,
233        prediction_paths=predictions,
234        save_path=output_path,
235        verbose=args.verbose,
236        thresholds=args.threshold,
237    )
238
239    print("The evaluation results for the predictions are:")
240    print(results)
241
242    if args.verbose and output_path is not None:
243        print(f"The evaluation results have been stored at '{os.path.abspath(output_path)}'.")
def run_evaluation( gt_paths: List[Union[numpy.ndarray, os.PathLike, str]], prediction_paths: List[Union[numpy.ndarray, os.PathLike, str]], save_path: Union[str, os.PathLike, NoneType] = None, verbose: bool = True, thresholds: Optional[List[float]] = None) -> pandas.core.frame.DataFrame:
 57def run_evaluation(
 58    gt_paths: List[Union[np.ndarray, os.PathLike, str]],
 59    prediction_paths: List[Union[np.ndarray, os.PathLike, str]],
 60    save_path: Optional[Union[os.PathLike, str]] = None,
 61    verbose: bool = True,
 62    thresholds: Optional[List[float]] = None,
 63) -> pd.DataFrame:
 64    """Run evaluation for instance segmentation predictions.
 65
 66    Args:
 67        gt_paths: The list of paths to ground-truth images.
 68        prediction_paths: The list of paths with the instance segmentations to evaluate.
 69        save_path: Optional path for saving the results.
 70        verbose: Whether to print the progress.
 71        thresholds: The choice of overlap thresholds.
 72
 73    Returns:
 74        A DataFrame that contains the evaluation results.
 75    """
 76    assert len(gt_paths) == len(prediction_paths)
 77    # if a save_path is given and it already exists then just load it instead of running the eval
 78    if save_path is not None and os.path.exists(save_path):
 79        return pd.read_csv(save_path)
 80
 81    scores = _run_evaluation(
 82        gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds
 83    )
 84    if thresholds is None:
 85        msas, sa50s, sa75s = scores
 86    else:
 87        msas = scores
 88
 89    results = {"mSA": [np.mean(msas)]}
 90    if thresholds is None:
 91        results["SA50"] = [np.mean(sa50s)]
 92        results["SA75"] = [np.mean(sa75s)]
 93
 94    results = pd.DataFrame.from_dict(results)
 95
 96    if save_path is not None:
 97        os.makedirs(Path(save_path).parent, exist_ok=True)
 98        results.to_csv(save_path, index=False)
 99
100    return results

Run evaluation for instance segmentation predictions.

Arguments:
  • gt_paths: The list of paths to ground-truth images.
  • prediction_paths: The list of paths with the instance segmentations to evaluate.
  • save_path: Optional path for saving the results.
  • verbose: Whether to print the progress.
  • thresholds: The choice of overlap thresholds.
Returns:

A DataFrame that contains the evaluation results.

def run_evaluation_for_iterative_prompting( gt_paths: List[Union[os.PathLike, str]], prediction_root: Union[os.PathLike, str], experiment_folder: Union[os.PathLike, str], start_with_box_prompt: bool = False, overwrite_results: bool = False, use_masks: bool = False) -> pandas.core.frame.DataFrame:
103def run_evaluation_for_iterative_prompting(
104    gt_paths: List[Union[os.PathLike, str]],
105    prediction_root: Union[os.PathLike, str],
106    experiment_folder: Union[os.PathLike, str],
107    start_with_box_prompt: bool = False,
108    overwrite_results: bool = False,
109    use_masks: bool = False,
110) -> pd.DataFrame:
111    """Run evaluation for iterative prompt-based segmentation predictions.
112
113    Args:
114        gt_paths: The list of paths to ground-truth images.
115        prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
116        experiment_folder: The folder where all the experiment results are stored.
117        start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
118        overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
119        use_masks: Whether to use masks for iterative prompting.
120
121    Returns:
122        A DataFrame that contains the evaluation results.
123    """
124    assert os.path.exists(prediction_root), prediction_root
125
126    # Save the results in the experiment folder
127    result_folder = os.path.join(
128        experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask"
129    )
130    os.makedirs(result_folder, exist_ok=True)
131
132    csv_path = os.path.join(
133        result_folder,
134        "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv"
135    )
136
137    # Overwrite the previously saved results
138    if overwrite_results and os.path.exists(csv_path):
139        os.remove(csv_path)
140
141    # If the results have been computed already, it's not needed to re-run it again.
142    if os.path.exists(csv_path):
143        print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.")
144        return
145
146    list_of_results = []
147    prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*")))
148    for pred_folder in prediction_folders:
149        print("Evaluating", os.path.split(pred_folder)[-1])
150        pred_paths = sorted(glob(os.path.join(pred_folder, "*")))
151        result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None)
152        list_of_results.append(result)
153
154    res_df = pd.concat(list_of_results, ignore_index=True)
155    res_df.to_csv(csv_path)

Run evaluation for iterative prompt-based segmentation predictions.

Arguments:
  • gt_paths: The list of paths to ground-truth images.
  • prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
  • experiment_folder: The folder where all the experiment results are stored.
  • start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
  • overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
  • use_masks: Whether to use masks for iterative prompting.
Returns:

A DataFrame that contains the evaluation results.