micro_sam.evaluation.evaluation

Evaluation functionality for segmentation predictions from micro_sam.evaluation.automatic_mask_generation and micro_sam.evaluation.inference.

  1"""Evaluation functionality for segmentation predictions from `micro_sam.evaluation.automatic_mask_generation`
  2and `micro_sam.evaluation.inference`.
  3"""
  4
  5import os
  6from glob import glob
  7from tqdm import tqdm
  8from pathlib import Path
  9from natsort import natsorted
 10from typing import List, Optional, Union
 11
 12import numpy as np
 13import pandas as pd
 14import imageio.v3 as imageio
 15from skimage.measure import label
 16
 17from elf.evaluation import mean_segmentation_accuracy
 18
 19from ..util import load_image_data
 20
 21
 22def _run_evaluation(gt_paths, prediction_paths, verbose=True, thresholds=None):
 23    assert len(gt_paths) == len(prediction_paths)
 24    msas, sa50s, sa75s = [], [], []
 25
 26    for gt_path, pred_path in tqdm(
 27        zip(gt_paths, prediction_paths), desc="Evaluate predictions", total=len(gt_paths), disable=not verbose
 28    ):
 29
 30        if isinstance(gt_path, np.ndarray):
 31            gt = gt_path
 32        else:
 33            assert os.path.exists(gt_path), gt_path
 34            gt = imageio.imread(gt_path)
 35            gt = label(gt)
 36
 37        if isinstance(pred_path, np.ndarray):
 38            pred = pred_path
 39        else:
 40            assert os.path.exists(pred_path), pred_path
 41            pred = imageio.imread(pred_path)
 42
 43        msa, scores = mean_segmentation_accuracy(pred, gt, thresholds=thresholds, return_accuracies=True)
 44        msas.append(msa)
 45        if thresholds is None:
 46            sa50, sa75 = scores[0], scores[5]
 47            sa50s.append(sa50), sa75s.append(sa75)
 48
 49    if thresholds is None:
 50        return msas, sa50s, sa75s
 51    else:
 52        return msas
 53
 54
 55def run_evaluation(
 56    gt_paths: List[Union[np.ndarray, os.PathLike, str]],
 57    prediction_paths: List[Union[np.ndarray, os.PathLike, str]],
 58    save_path: Optional[Union[os.PathLike, str]] = None,
 59    verbose: bool = True,
 60    thresholds: Optional[List[float]] = None,
 61) -> pd.DataFrame:
 62    """Run evaluation for instance segmentation predictions.
 63
 64    Args:
 65        gt_paths: The list of paths to ground-truth images.
 66        prediction_paths: The list of paths with the instance segmentations to evaluate.
 67        save_path: Optional path for saving the results.
 68        verbose: Whether to print the progress.
 69        thresholds: The choice of overlap thresholds.
 70
 71    Returns:
 72        A DataFrame that contains the evaluation results.
 73    """
 74    assert len(gt_paths) == len(prediction_paths)
 75    # if a save_path is given and it already exists then just load it instead of running the eval
 76    if save_path is not None and os.path.exists(save_path):
 77        return pd.read_csv(save_path)
 78
 79    scores = _run_evaluation(
 80        gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds
 81    )
 82    if thresholds is None:
 83        msas, sa50s, sa75s = scores
 84    else:
 85        msas = scores
 86
 87    results = {"mSA": [np.mean(msas)]}
 88    if thresholds is None:
 89        results["SA50"] = [np.mean(sa50s)]
 90        results["SA75"] = [np.mean(sa75s)]
 91
 92    results = pd.DataFrame.from_dict(results)
 93
 94    if save_path is not None:
 95        os.makedirs(Path(save_path).parent, exist_ok=True)
 96        results.to_csv(save_path, index=False)
 97
 98    return results
 99
100
101def run_evaluation_for_iterative_prompting(
102    gt_paths: List[Union[os.PathLike, str]],
103    prediction_root: Union[os.PathLike, str],
104    experiment_folder: Union[os.PathLike, str],
105    start_with_box_prompt: bool = False,
106    overwrite_results: bool = False,
107    use_masks: bool = False,
108) -> pd.DataFrame:
109    """Run evaluation for iterative prompt-based segmentation predictions.
110
111    Args:
112        gt_paths: The list of paths to ground-truth images.
113        prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
114        experiment_folder: The folder where all the experiment results are stored.
115        start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
116        overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
117        use_masks: Whether to use masks for iterative prompting.
118
119    Returns:
120        A DataFrame that contains the evaluation results.
121    """
122    assert os.path.exists(prediction_root), prediction_root
123
124    # Save the results in the experiment folder
125    result_folder = os.path.join(
126        experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask"
127    )
128    os.makedirs(result_folder, exist_ok=True)
129
130    csv_path = os.path.join(
131        result_folder,
132        "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv"
133    )
134
135    # Overwrite the previously saved results
136    if overwrite_results and os.path.exists(csv_path):
137        os.remove(csv_path)
138
139    # If the results have been computed already, it's not needed to re-run it again.
140    if os.path.exists(csv_path):
141        print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.")
142        return
143
144    list_of_results = []
145    prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*")))
146    for pred_folder in prediction_folders:
147        print("Evaluating", os.path.split(pred_folder)[-1])
148        pred_paths = sorted(glob(os.path.join(pred_folder, "*")))
149        result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None)
150        list_of_results.append(result)
151
152    res_df = pd.concat(list_of_results, ignore_index=True)
153    res_df.to_csv(csv_path)
154
155    return res_df
156
157
158def main():
159    """@private"""
160    import argparse
161
162    parser = argparse.ArgumentParser(description="Evaluating segmentations from Segment Anything model on custom data.")
163
164    # labels and predictions for quantitative evaluation.
165    parser.add_argument(
166        "--labels", required=True, type=str, nargs="+",
167        help="Filepath(s) to ground-truth labels or the directory where the label data is stored."
168    )
169    parser.add_argument(
170        "--predictions", required=True, type=str, nargs="+",
171        help="Filepath to predicted labels or the directory where the predicted label data is stored."
172    )
173    parser.add_argument(
174        "--label_key", type=str, default=None,
175        help="The key for accessing predicted label data, either a pattern / wildcard or with 'elf.io.open_file'. "
176    )
177    parser.add_argument(
178        "--prediction_key", type=str, default=None,
179        help="The key for accessing ground-truth label data, either a pattern / wildcard or with 'elf.io.open_file'. "
180    )
181    parser.add_argument(
182        "-o", "--output_path", type=str, default=None,
183        help="The filepath to store the evaluation results. The current support stores results in a 'csv' file."
184    )
185    parser.add_argument(
186        "--threshold", default=None, type=float, nargs="+",
187        help="The choice of overlap threshold(s) for calculating the segmentation accuracy. By default, "
188        "np.arange(0.5, 1., 0.05) is used to provide the mean segmentation accurcy score over all values.",
189    )
190    parser.add_argument(
191        "-v", "--verbose", action="store_true", help="Whether to allow verbosity of evaluation."
192    )
193
194    # TODO: We can extend this in future for other metrics, eg. dice score, etc.
195    # NOTE: This argument is not exposed to the user atm.
196    # parser.add_argument(
197    #     "--metric", type=str, default="segmentation_accuracy", choices=("segmentation_accuracy"),
198    #     help="The choice of metric for evaluation. By default, it computes segmentation accuracy "
199    #     "for instance segmentation."
200    # )
201
202    args = parser.parse_args()
203
204    # Check whether the inputs are as expected.
205    def _get_inputs_from_paths(paths, key):
206        fpaths = []
207        for path in paths:
208            if os.path.isfile(path):  # it is just one filepath and we check whether we can access it via 'elf'.
209                fpaths.append(path if key is None else load_image_data(path=path, key=key))
210            else:  # otherwise, path is a directory, fetch all inputs provided with a pattern.
211                assert key is not None, \
212                    f"You must provide a wildcard / pattern as the filepath '{os.path.abspath(path)}' is a directory."
213                fpaths.extend(natsorted(glob(os.path.join(path, key))))
214
215        return fpaths
216
217    labels = _get_inputs_from_paths(args.labels, args.label_key)
218    predictions = _get_inputs_from_paths(args.predictions, args.prediction_key)
219    assert labels and len(labels) == len(predictions)
220
221    # Check whether output path is a csv or not, if passed.
222    output_path = args.output_path
223    if output_path is not None:
224        if not os.path.isfile(output_path):  # If it is a directory, store this in "<OUTPUT_PATH>/results.csv"
225            os.makedirs(output_path, exist_ok=True)
226            output_path = os.path.join(output_path, "results.csv")
227
228        if not output_path.endswith(".csv"):  # If it is a filepath missing extension / with a different extension.
229            output_path = str(Path(output_path).with_suffix(".csv"))  # Limit supports to csv files for now.
230
231    # Run evaluation on labels and predictions.
232    results = run_evaluation(
233        gt_paths=labels,
234        prediction_paths=predictions,
235        save_path=output_path,
236        verbose=args.verbose,
237        thresholds=args.threshold,
238    )
239
240    print("The evaluation results for the predictions are:")
241    print(results)
242
243    if args.verbose and output_path is not None:
244        print(f"The evaluation results have been stored at '{os.path.abspath(output_path)}'.")
def run_evaluation( gt_paths: List[Union[numpy.ndarray, os.PathLike, str]], prediction_paths: List[Union[numpy.ndarray, os.PathLike, str]], save_path: Union[str, os.PathLike, NoneType] = None, verbose: bool = True, thresholds: Optional[List[float]] = None) -> pandas.core.frame.DataFrame:
56def run_evaluation(
57    gt_paths: List[Union[np.ndarray, os.PathLike, str]],
58    prediction_paths: List[Union[np.ndarray, os.PathLike, str]],
59    save_path: Optional[Union[os.PathLike, str]] = None,
60    verbose: bool = True,
61    thresholds: Optional[List[float]] = None,
62) -> pd.DataFrame:
63    """Run evaluation for instance segmentation predictions.
64
65    Args:
66        gt_paths: The list of paths to ground-truth images.
67        prediction_paths: The list of paths with the instance segmentations to evaluate.
68        save_path: Optional path for saving the results.
69        verbose: Whether to print the progress.
70        thresholds: The choice of overlap thresholds.
71
72    Returns:
73        A DataFrame that contains the evaluation results.
74    """
75    assert len(gt_paths) == len(prediction_paths)
76    # if a save_path is given and it already exists then just load it instead of running the eval
77    if save_path is not None and os.path.exists(save_path):
78        return pd.read_csv(save_path)
79
80    scores = _run_evaluation(
81        gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds
82    )
83    if thresholds is None:
84        msas, sa50s, sa75s = scores
85    else:
86        msas = scores
87
88    results = {"mSA": [np.mean(msas)]}
89    if thresholds is None:
90        results["SA50"] = [np.mean(sa50s)]
91        results["SA75"] = [np.mean(sa75s)]
92
93    results = pd.DataFrame.from_dict(results)
94
95    if save_path is not None:
96        os.makedirs(Path(save_path).parent, exist_ok=True)
97        results.to_csv(save_path, index=False)
98
99    return results

Run evaluation for instance segmentation predictions.

Arguments:
  • gt_paths: The list of paths to ground-truth images.
  • prediction_paths: The list of paths with the instance segmentations to evaluate.
  • save_path: Optional path for saving the results.
  • verbose: Whether to print the progress.
  • thresholds: The choice of overlap thresholds.
Returns:

A DataFrame that contains the evaluation results.

def run_evaluation_for_iterative_prompting( gt_paths: List[Union[str, os.PathLike]], prediction_root: Union[os.PathLike, str], experiment_folder: Union[os.PathLike, str], start_with_box_prompt: bool = False, overwrite_results: bool = False, use_masks: bool = False) -> pandas.core.frame.DataFrame:
102def run_evaluation_for_iterative_prompting(
103    gt_paths: List[Union[os.PathLike, str]],
104    prediction_root: Union[os.PathLike, str],
105    experiment_folder: Union[os.PathLike, str],
106    start_with_box_prompt: bool = False,
107    overwrite_results: bool = False,
108    use_masks: bool = False,
109) -> pd.DataFrame:
110    """Run evaluation for iterative prompt-based segmentation predictions.
111
112    Args:
113        gt_paths: The list of paths to ground-truth images.
114        prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
115        experiment_folder: The folder where all the experiment results are stored.
116        start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
117        overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
118        use_masks: Whether to use masks for iterative prompting.
119
120    Returns:
121        A DataFrame that contains the evaluation results.
122    """
123    assert os.path.exists(prediction_root), prediction_root
124
125    # Save the results in the experiment folder
126    result_folder = os.path.join(
127        experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask"
128    )
129    os.makedirs(result_folder, exist_ok=True)
130
131    csv_path = os.path.join(
132        result_folder,
133        "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv"
134    )
135
136    # Overwrite the previously saved results
137    if overwrite_results and os.path.exists(csv_path):
138        os.remove(csv_path)
139
140    # If the results have been computed already, it's not needed to re-run it again.
141    if os.path.exists(csv_path):
142        print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.")
143        return
144
145    list_of_results = []
146    prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*")))
147    for pred_folder in prediction_folders:
148        print("Evaluating", os.path.split(pred_folder)[-1])
149        pred_paths = sorted(glob(os.path.join(pred_folder, "*")))
150        result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None)
151        list_of_results.append(result)
152
153    res_df = pd.concat(list_of_results, ignore_index=True)
154    res_df.to_csv(csv_path)
155
156    return res_df

Run evaluation for iterative prompt-based segmentation predictions.

Arguments:
  • gt_paths: The list of paths to ground-truth images.
  • prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
  • experiment_folder: The folder where all the experiment results are stored.
  • start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
  • overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
  • use_masks: Whether to use masks for iterative prompting.
Returns:

A DataFrame that contains the evaluation results.