micro_sam.evaluation.evaluation

Evaluation functionality for segmentation predictions from micro_sam.evaluation.automatic_mask_generation and micro_sam.evaluation.inference.

  1"""Evaluation functionality for segmentation predictions from `micro_sam.evaluation.automatic_mask_generation`
  2and `micro_sam.evaluation.inference`.
  3"""
  4
  5import os
  6from glob import glob
  7from tqdm import tqdm
  8from pathlib import Path
  9from natsort import natsorted
 10from typing import List, Optional, Union
 11
 12import numpy as np
 13import pandas as pd
 14import imageio.v3 as imageio
 15from skimage.measure import label
 16
 17from elf.evaluation import mean_segmentation_accuracy, matching
 18
 19from ..util import load_image_data
 20
 21
 22def _run_evaluation(gt_paths, prediction_paths, verbose=True, thresholds=None):
 23    assert len(gt_paths) == len(prediction_paths)
 24
 25    msas, sa50s, sa75s, precisions, recalls, f1s = [], [], [], [], [], []
 26    for gt_path, pred_path in tqdm(
 27        zip(gt_paths, prediction_paths), desc="Evaluate predictions", total=len(gt_paths), disable=not verbose
 28    ):
 29
 30        if isinstance(gt_path, np.ndarray):
 31            gt = gt_path
 32        else:
 33            assert os.path.exists(gt_path), gt_path
 34            gt = imageio.imread(gt_path)
 35            gt = label(gt)
 36
 37        if isinstance(pred_path, np.ndarray):
 38            pred = pred_path
 39        else:
 40            assert os.path.exists(pred_path), pred_path
 41            pred = imageio.imread(pred_path)
 42
 43        assert gt.shape == pred.shape, f"Expected {gt.shape}, got {pred.shape}"
 44        msa, scores = mean_segmentation_accuracy(pred, gt, thresholds=thresholds, return_accuracies=True)
 45        stats = matching(pred, gt)
 46        msas.append(msa)
 47        if thresholds is None:
 48            sa50, sa75 = scores[0], scores[5]
 49            sa50s.append(sa50), sa75s.append(sa75)
 50        precisions.append(stats["precision"])
 51        recalls.append(stats["recall"])
 52        f1s.append(stats["f1"])
 53
 54    if thresholds is None:
 55        return (msas, sa50s, sa75s), (precisions, recalls, f1s)
 56    else:
 57        return msas, (precisions, recalls, f1s)
 58
 59
 60def run_evaluation(
 61    gt_paths: List[Union[np.ndarray, os.PathLike, str]],
 62    prediction_paths: List[Union[np.ndarray, os.PathLike, str]],
 63    save_path: Optional[Union[os.PathLike, str]] = None,
 64    verbose: bool = True,
 65    thresholds: Optional[List[float]] = None,
 66) -> pd.DataFrame:
 67    """Run evaluation for instance segmentation predictions.
 68
 69    Args:
 70        gt_paths: The list of paths to ground-truth images.
 71        prediction_paths: The list of paths with the instance segmentations to evaluate.
 72        save_path: Optional path for saving the results.
 73        verbose: Whether to print the progress.
 74        thresholds: The choice of overlap thresholds.
 75
 76    Returns:
 77        A DataFrame that contains the evaluation results.
 78    """
 79    assert len(gt_paths) == len(prediction_paths)
 80    # if a save_path is given and it already exists then just load it instead of running the eval
 81    if save_path is not None and os.path.exists(save_path):
 82        return pd.read_csv(save_path)
 83
 84    scores = _run_evaluation(
 85        gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds
 86    )
 87
 88    sas, other_scores = scores  # Separate all score into two categories.
 89    precisions, recalls, f1s = other_scores
 90
 91    if thresholds is None:
 92        msas, sa50s, sa75s = sas
 93    else:
 94        msas = sas
 95
 96    results = {"mSA": [np.mean(msas)]}
 97    if thresholds is None:
 98        results["SA50"] = [np.mean(sa50s)]
 99        results["SA75"] = [np.mean(sa75s)]
100    results["Precision"] = [np.mean(precisions)]
101    results["Recall"] = [np.mean(recalls)]
102    results["F1 Score"] = [np.mean(f1s)]
103
104    results = pd.DataFrame.from_dict(results)
105
106    if save_path is not None:
107        os.makedirs(Path(save_path).parent, exist_ok=True)
108        results.to_csv(save_path, index=False)
109
110    return results
111
112
113def run_evaluation_for_iterative_prompting(
114    gt_paths: List[Union[os.PathLike, str]],
115    prediction_root: Union[os.PathLike, str],
116    experiment_folder: Union[os.PathLike, str],
117    start_with_box_prompt: bool = False,
118    overwrite_results: bool = False,
119    use_masks: bool = False,
120) -> pd.DataFrame:
121    """Run evaluation for iterative prompt-based segmentation predictions.
122
123    Args:
124        gt_paths: The list of paths to ground-truth images.
125        prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
126        experiment_folder: The folder where all the experiment results are stored.
127        start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
128        overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
129        use_masks: Whether to use masks for iterative prompting.
130
131    Returns:
132        A DataFrame that contains the evaluation results.
133    """
134    assert os.path.exists(prediction_root), prediction_root
135
136    # Save the results in the experiment folder
137    result_folder = os.path.join(
138        experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask"
139    )
140    os.makedirs(result_folder, exist_ok=True)
141
142    csv_path = os.path.join(
143        result_folder,
144        "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv"
145    )
146
147    # Overwrite the previously saved results
148    if overwrite_results and os.path.exists(csv_path):
149        os.remove(csv_path)
150
151    # If the results have been computed already, it's not needed to re-run it again.
152    if os.path.exists(csv_path):
153        print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.")
154        return
155
156    list_of_results = []
157    prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*")))
158    for pred_folder in prediction_folders:
159        print("Evaluating", os.path.split(pred_folder)[-1])
160        pred_paths = sorted(glob(os.path.join(pred_folder, "*")))
161        result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None)
162        list_of_results.append(result)
163
164    res_df = pd.concat(list_of_results, ignore_index=True)
165    res_df.to_csv(csv_path)
166
167    return res_df
168
169
170def main():
171    """@private"""
172    import argparse
173
174    parser = argparse.ArgumentParser(description="Evaluating segmentations from Segment Anything model on custom data.")
175
176    # labels and predictions for quantitative evaluation.
177    parser.add_argument(
178        "--labels", required=True, type=str, nargs="+",
179        help="Filepath(s) to ground-truth labels or the directory where the label data is stored."
180    )
181    parser.add_argument(
182        "--predictions", required=True, type=str, nargs="+",
183        help="Filepath to predicted labels or the directory where the predicted label data is stored."
184    )
185    parser.add_argument(
186        "--label_key", type=str, default=None,
187        help="The key for accessing predicted label data, either a pattern / wildcard or with 'elf.io.open_file'. "
188    )
189    parser.add_argument(
190        "--prediction_key", type=str, default=None,
191        help="The key for accessing ground-truth label data, either a pattern / wildcard or with 'elf.io.open_file'. "
192    )
193    parser.add_argument(
194        "-o", "--output_path", type=str, default=None,
195        help="The filepath to store the evaluation results. The current support stores results in a 'csv' file."
196    )
197    parser.add_argument(
198        "--threshold", default=None, type=float, nargs="+",
199        help="The choice of overlap threshold(s) for calculating the segmentation accuracy. By default, "
200        "np.arange(0.5, 1., 0.05) is used to provide the mean segmentation accurcy score over all values.",
201    )
202    parser.add_argument(
203        "-v", "--verbose", action="store_true", help="Whether to allow verbosity of evaluation."
204    )
205
206    # TODO: We can extend this in future for other metrics, eg. dice score, etc.
207    # NOTE: This argument is not exposed to the user atm.
208    # parser.add_argument(
209    #     "--metric", type=str, default="segmentation_accuracy", choices=("segmentation_accuracy"),
210    #     help="The choice of metric for evaluation. By default, it computes segmentation accuracy "
211    #     "for instance segmentation."
212    # )
213
214    args = parser.parse_args()
215
216    # Check whether the inputs are as expected.
217    def _get_inputs_from_paths(paths, key):
218        fpaths = []
219        for path in paths:
220            if os.path.isfile(path):  # it is just one filepath and we check whether we can access it via 'elf'.
221                fpaths.append(path if key is None else load_image_data(path=path, key=key))
222            else:  # otherwise, path is a directory, fetch all inputs provided with a pattern.
223                assert key is not None, \
224                    f"You must provide a wildcard / pattern as the filepath '{os.path.abspath(path)}' is a directory."
225                fpaths.extend(natsorted(glob(os.path.join(path, key))))
226
227        return fpaths
228
229    labels = _get_inputs_from_paths(args.labels, args.label_key)
230    predictions = _get_inputs_from_paths(args.predictions, args.prediction_key)
231    assert labels and len(labels) == len(predictions)
232
233    # Check whether output path is a csv or not, if passed.
234    output_path = args.output_path
235    if output_path is not None:
236        if not os.path.isfile(output_path):  # If it is a directory, store this in "<OUTPUT_PATH>/results.csv"
237            os.makedirs(output_path, exist_ok=True)
238            output_path = os.path.join(output_path, "results.csv")
239
240        if not output_path.endswith(".csv"):  # If it is a filepath missing extension / with a different extension.
241            output_path = str(Path(output_path).with_suffix(".csv"))  # Limit supports to csv files for now.
242
243    # Run evaluation on labels and predictions.
244    results = run_evaluation(
245        gt_paths=labels,
246        prediction_paths=predictions,
247        save_path=output_path,
248        verbose=args.verbose,
249        thresholds=args.threshold,
250    )
251
252    print("The evaluation results for the predictions are:")
253    print(results)
254
255    if args.verbose and output_path is not None:
256        print(f"The evaluation results have been stored at '{os.path.abspath(output_path)}'.")
def run_evaluation( gt_paths: List[Union[numpy.ndarray, os.PathLike, str]], prediction_paths: List[Union[numpy.ndarray, os.PathLike, str]], save_path: Union[str, os.PathLike, NoneType] = None, verbose: bool = True, thresholds: Optional[List[float]] = None) -> pandas.core.frame.DataFrame:
 61def run_evaluation(
 62    gt_paths: List[Union[np.ndarray, os.PathLike, str]],
 63    prediction_paths: List[Union[np.ndarray, os.PathLike, str]],
 64    save_path: Optional[Union[os.PathLike, str]] = None,
 65    verbose: bool = True,
 66    thresholds: Optional[List[float]] = None,
 67) -> pd.DataFrame:
 68    """Run evaluation for instance segmentation predictions.
 69
 70    Args:
 71        gt_paths: The list of paths to ground-truth images.
 72        prediction_paths: The list of paths with the instance segmentations to evaluate.
 73        save_path: Optional path for saving the results.
 74        verbose: Whether to print the progress.
 75        thresholds: The choice of overlap thresholds.
 76
 77    Returns:
 78        A DataFrame that contains the evaluation results.
 79    """
 80    assert len(gt_paths) == len(prediction_paths)
 81    # if a save_path is given and it already exists then just load it instead of running the eval
 82    if save_path is not None and os.path.exists(save_path):
 83        return pd.read_csv(save_path)
 84
 85    scores = _run_evaluation(
 86        gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds
 87    )
 88
 89    sas, other_scores = scores  # Separate all score into two categories.
 90    precisions, recalls, f1s = other_scores
 91
 92    if thresholds is None:
 93        msas, sa50s, sa75s = sas
 94    else:
 95        msas = sas
 96
 97    results = {"mSA": [np.mean(msas)]}
 98    if thresholds is None:
 99        results["SA50"] = [np.mean(sa50s)]
100        results["SA75"] = [np.mean(sa75s)]
101    results["Precision"] = [np.mean(precisions)]
102    results["Recall"] = [np.mean(recalls)]
103    results["F1 Score"] = [np.mean(f1s)]
104
105    results = pd.DataFrame.from_dict(results)
106
107    if save_path is not None:
108        os.makedirs(Path(save_path).parent, exist_ok=True)
109        results.to_csv(save_path, index=False)
110
111    return results

Run evaluation for instance segmentation predictions.

Arguments:
  • gt_paths: The list of paths to ground-truth images.
  • prediction_paths: The list of paths with the instance segmentations to evaluate.
  • save_path: Optional path for saving the results.
  • verbose: Whether to print the progress.
  • thresholds: The choice of overlap thresholds.
Returns:

A DataFrame that contains the evaluation results.

def run_evaluation_for_iterative_prompting( gt_paths: List[Union[str, os.PathLike]], prediction_root: Union[os.PathLike, str], experiment_folder: Union[os.PathLike, str], start_with_box_prompt: bool = False, overwrite_results: bool = False, use_masks: bool = False) -> pandas.core.frame.DataFrame:
114def run_evaluation_for_iterative_prompting(
115    gt_paths: List[Union[os.PathLike, str]],
116    prediction_root: Union[os.PathLike, str],
117    experiment_folder: Union[os.PathLike, str],
118    start_with_box_prompt: bool = False,
119    overwrite_results: bool = False,
120    use_masks: bool = False,
121) -> pd.DataFrame:
122    """Run evaluation for iterative prompt-based segmentation predictions.
123
124    Args:
125        gt_paths: The list of paths to ground-truth images.
126        prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
127        experiment_folder: The folder where all the experiment results are stored.
128        start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
129        overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
130        use_masks: Whether to use masks for iterative prompting.
131
132    Returns:
133        A DataFrame that contains the evaluation results.
134    """
135    assert os.path.exists(prediction_root), prediction_root
136
137    # Save the results in the experiment folder
138    result_folder = os.path.join(
139        experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask"
140    )
141    os.makedirs(result_folder, exist_ok=True)
142
143    csv_path = os.path.join(
144        result_folder,
145        "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv"
146    )
147
148    # Overwrite the previously saved results
149    if overwrite_results and os.path.exists(csv_path):
150        os.remove(csv_path)
151
152    # If the results have been computed already, it's not needed to re-run it again.
153    if os.path.exists(csv_path):
154        print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.")
155        return
156
157    list_of_results = []
158    prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*")))
159    for pred_folder in prediction_folders:
160        print("Evaluating", os.path.split(pred_folder)[-1])
161        pred_paths = sorted(glob(os.path.join(pred_folder, "*")))
162        result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None)
163        list_of_results.append(result)
164
165    res_df = pd.concat(list_of_results, ignore_index=True)
166    res_df.to_csv(csv_path)
167
168    return res_df

Run evaluation for iterative prompt-based segmentation predictions.

Arguments:
  • gt_paths: The list of paths to ground-truth images.
  • prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
  • experiment_folder: The folder where all the experiment results are stored.
  • start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
  • overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
  • use_masks: Whether to use masks for iterative prompting.
Returns:

A DataFrame that contains the evaluation results.