micro_sam.evaluation.evaluation
Evaluation functionality for segmentation predictions from micro_sam.evaluation.automatic_mask_generation
and micro_sam.evaluation.inference
.
1"""Evaluation functionality for segmentation predictions from `micro_sam.evaluation.automatic_mask_generation` 2and `micro_sam.evaluation.inference`. 3""" 4 5import os 6from glob import glob 7from tqdm import tqdm 8from pathlib import Path 9from natsort import natsorted 10from typing import List, Optional, Union 11 12import numpy as np 13import pandas as pd 14import imageio.v3 as imageio 15from skimage.measure import label 16 17from elf.evaluation import mean_segmentation_accuracy 18 19from ..util import load_image_data 20 21 22def _run_evaluation(gt_paths, prediction_paths, verbose=True, thresholds=None): 23 assert len(gt_paths) == len(prediction_paths) 24 msas, sa50s, sa75s = [], [], [] 25 26 for gt_path, pred_path in tqdm( 27 zip(gt_paths, prediction_paths), desc="Evaluate predictions", total=len(gt_paths), disable=not verbose 28 ): 29 30 if isinstance(gt_path, np.ndarray): 31 gt = gt_path 32 else: 33 assert os.path.exists(gt_path), gt_path 34 gt = imageio.imread(gt_path) 35 gt = label(gt) 36 37 if isinstance(pred_path, np.ndarray): 38 pred = pred_path 39 else: 40 assert os.path.exists(pred_path), pred_path 41 pred = imageio.imread(pred_path) 42 43 msa, scores = mean_segmentation_accuracy(pred, gt, thresholds=thresholds, return_accuracies=True) 44 msas.append(msa) 45 if thresholds is None: 46 sa50, sa75 = scores[0], scores[5] 47 sa50s.append(sa50), sa75s.append(sa75) 48 49 if thresholds is None: 50 return msas, sa50s, sa75s 51 else: 52 return msas 53 54 55def run_evaluation( 56 gt_paths: List[Union[np.ndarray, os.PathLike, str]], 57 prediction_paths: List[Union[np.ndarray, os.PathLike, str]], 58 save_path: Optional[Union[os.PathLike, str]] = None, 59 verbose: bool = True, 60 thresholds: Optional[List[float]] = None, 61) -> pd.DataFrame: 62 """Run evaluation for instance segmentation predictions. 63 64 Args: 65 gt_paths: The list of paths to ground-truth images. 66 prediction_paths: The list of paths with the instance segmentations to evaluate. 67 save_path: Optional path for saving the results. 68 verbose: Whether to print the progress. 69 thresholds: The choice of overlap thresholds. 70 71 Returns: 72 A DataFrame that contains the evaluation results. 73 """ 74 assert len(gt_paths) == len(prediction_paths) 75 # if a save_path is given and it already exists then just load it instead of running the eval 76 if save_path is not None and os.path.exists(save_path): 77 return pd.read_csv(save_path) 78 79 scores = _run_evaluation( 80 gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds 81 ) 82 if thresholds is None: 83 msas, sa50s, sa75s = scores 84 else: 85 msas = scores 86 87 results = {"mSA": [np.mean(msas)]} 88 if thresholds is None: 89 results["SA50"] = [np.mean(sa50s)] 90 results["SA75"] = [np.mean(sa75s)] 91 92 results = pd.DataFrame.from_dict(results) 93 94 if save_path is not None: 95 os.makedirs(Path(save_path).parent, exist_ok=True) 96 results.to_csv(save_path, index=False) 97 98 return results 99 100 101def run_evaluation_for_iterative_prompting( 102 gt_paths: List[Union[os.PathLike, str]], 103 prediction_root: Union[os.PathLike, str], 104 experiment_folder: Union[os.PathLike, str], 105 start_with_box_prompt: bool = False, 106 overwrite_results: bool = False, 107 use_masks: bool = False, 108) -> pd.DataFrame: 109 """Run evaluation for iterative prompt-based segmentation predictions. 110 111 Args: 112 gt_paths: The list of paths to ground-truth images. 113 prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate. 114 experiment_folder: The folder where all the experiment results are stored. 115 start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box. 116 overwrite_results: Whether to overwrite the results to update them with the new evaluation run. 117 use_masks: Whether to use masks for iterative prompting. 118 119 Returns: 120 A DataFrame that contains the evaluation results. 121 """ 122 assert os.path.exists(prediction_root), prediction_root 123 124 # Save the results in the experiment folder 125 result_folder = os.path.join( 126 experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask" 127 ) 128 os.makedirs(result_folder, exist_ok=True) 129 130 csv_path = os.path.join( 131 result_folder, 132 "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv" 133 ) 134 135 # Overwrite the previously saved results 136 if overwrite_results and os.path.exists(csv_path): 137 os.remove(csv_path) 138 139 # If the results have been computed already, it's not needed to re-run it again. 140 if os.path.exists(csv_path): 141 print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.") 142 return 143 144 list_of_results = [] 145 prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*"))) 146 for pred_folder in prediction_folders: 147 print("Evaluating", os.path.split(pred_folder)[-1]) 148 pred_paths = sorted(glob(os.path.join(pred_folder, "*"))) 149 result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None) 150 list_of_results.append(result) 151 152 res_df = pd.concat(list_of_results, ignore_index=True) 153 res_df.to_csv(csv_path) 154 155 return res_df 156 157 158def main(): 159 """@private""" 160 import argparse 161 162 parser = argparse.ArgumentParser(description="Evaluating segmentations from Segment Anything model on custom data.") 163 164 # labels and predictions for quantitative evaluation. 165 parser.add_argument( 166 "--labels", required=True, type=str, nargs="+", 167 help="Filepath(s) to ground-truth labels or the directory where the label data is stored." 168 ) 169 parser.add_argument( 170 "--predictions", required=True, type=str, nargs="+", 171 help="Filepath to predicted labels or the directory where the predicted label data is stored." 172 ) 173 parser.add_argument( 174 "--label_key", type=str, default=None, 175 help="The key for accessing predicted label data, either a pattern / wildcard or with 'elf.io.open_file'. " 176 ) 177 parser.add_argument( 178 "--prediction_key", type=str, default=None, 179 help="The key for accessing ground-truth label data, either a pattern / wildcard or with 'elf.io.open_file'. " 180 ) 181 parser.add_argument( 182 "-o", "--output_path", type=str, default=None, 183 help="The filepath to store the evaluation results. The current support stores results in a 'csv' file." 184 ) 185 parser.add_argument( 186 "--threshold", default=None, type=float, nargs="+", 187 help="The choice of overlap threshold(s) for calculating the segmentation accuracy. By default, " 188 "np.arange(0.5, 1., 0.05) is used to provide the mean segmentation accurcy score over all values.", 189 ) 190 parser.add_argument( 191 "-v", "--verbose", action="store_true", help="Whether to allow verbosity of evaluation." 192 ) 193 194 # TODO: We can extend this in future for other metrics, eg. dice score, etc. 195 # NOTE: This argument is not exposed to the user atm. 196 # parser.add_argument( 197 # "--metric", type=str, default="segmentation_accuracy", choices=("segmentation_accuracy"), 198 # help="The choice of metric for evaluation. By default, it computes segmentation accuracy " 199 # "for instance segmentation." 200 # ) 201 202 args = parser.parse_args() 203 204 # Check whether the inputs are as expected. 205 def _get_inputs_from_paths(paths, key): 206 fpaths = [] 207 for path in paths: 208 if os.path.isfile(path): # it is just one filepath and we check whether we can access it via 'elf'. 209 fpaths.append(path if key is None else load_image_data(path=path, key=key)) 210 else: # otherwise, path is a directory, fetch all inputs provided with a pattern. 211 assert key is not None, \ 212 f"You must provide a wildcard / pattern as the filepath '{os.path.abspath(path)}' is a directory." 213 fpaths.extend(natsorted(glob(os.path.join(path, key)))) 214 215 return fpaths 216 217 labels = _get_inputs_from_paths(args.labels, args.label_key) 218 predictions = _get_inputs_from_paths(args.predictions, args.prediction_key) 219 assert labels and len(labels) == len(predictions) 220 221 # Check whether output path is a csv or not, if passed. 222 output_path = args.output_path 223 if output_path is not None: 224 if not os.path.isfile(output_path): # If it is a directory, store this in "<OUTPUT_PATH>/results.csv" 225 os.makedirs(output_path, exist_ok=True) 226 output_path = os.path.join(output_path, "results.csv") 227 228 if not output_path.endswith(".csv"): # If it is a filepath missing extension / with a different extension. 229 output_path = str(Path(output_path).with_suffix(".csv")) # Limit supports to csv files for now. 230 231 # Run evaluation on labels and predictions. 232 results = run_evaluation( 233 gt_paths=labels, 234 prediction_paths=predictions, 235 save_path=output_path, 236 verbose=args.verbose, 237 thresholds=args.threshold, 238 ) 239 240 print("The evaluation results for the predictions are:") 241 print(results) 242 243 if args.verbose and output_path is not None: 244 print(f"The evaluation results have been stored at '{os.path.abspath(output_path)}'.")
def
run_evaluation( gt_paths: List[Union[numpy.ndarray, os.PathLike, str]], prediction_paths: List[Union[numpy.ndarray, os.PathLike, str]], save_path: Union[str, os.PathLike, NoneType] = None, verbose: bool = True, thresholds: Optional[List[float]] = None) -> pandas.core.frame.DataFrame:
56def run_evaluation( 57 gt_paths: List[Union[np.ndarray, os.PathLike, str]], 58 prediction_paths: List[Union[np.ndarray, os.PathLike, str]], 59 save_path: Optional[Union[os.PathLike, str]] = None, 60 verbose: bool = True, 61 thresholds: Optional[List[float]] = None, 62) -> pd.DataFrame: 63 """Run evaluation for instance segmentation predictions. 64 65 Args: 66 gt_paths: The list of paths to ground-truth images. 67 prediction_paths: The list of paths with the instance segmentations to evaluate. 68 save_path: Optional path for saving the results. 69 verbose: Whether to print the progress. 70 thresholds: The choice of overlap thresholds. 71 72 Returns: 73 A DataFrame that contains the evaluation results. 74 """ 75 assert len(gt_paths) == len(prediction_paths) 76 # if a save_path is given and it already exists then just load it instead of running the eval 77 if save_path is not None and os.path.exists(save_path): 78 return pd.read_csv(save_path) 79 80 scores = _run_evaluation( 81 gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds 82 ) 83 if thresholds is None: 84 msas, sa50s, sa75s = scores 85 else: 86 msas = scores 87 88 results = {"mSA": [np.mean(msas)]} 89 if thresholds is None: 90 results["SA50"] = [np.mean(sa50s)] 91 results["SA75"] = [np.mean(sa75s)] 92 93 results = pd.DataFrame.from_dict(results) 94 95 if save_path is not None: 96 os.makedirs(Path(save_path).parent, exist_ok=True) 97 results.to_csv(save_path, index=False) 98 99 return results
Run evaluation for instance segmentation predictions.
Arguments:
- gt_paths: The list of paths to ground-truth images.
- prediction_paths: The list of paths with the instance segmentations to evaluate.
- save_path: Optional path for saving the results.
- verbose: Whether to print the progress.
- thresholds: The choice of overlap thresholds.
Returns:
A DataFrame that contains the evaluation results.
def
run_evaluation_for_iterative_prompting( gt_paths: List[Union[str, os.PathLike]], prediction_root: Union[os.PathLike, str], experiment_folder: Union[os.PathLike, str], start_with_box_prompt: bool = False, overwrite_results: bool = False, use_masks: bool = False) -> pandas.core.frame.DataFrame:
102def run_evaluation_for_iterative_prompting( 103 gt_paths: List[Union[os.PathLike, str]], 104 prediction_root: Union[os.PathLike, str], 105 experiment_folder: Union[os.PathLike, str], 106 start_with_box_prompt: bool = False, 107 overwrite_results: bool = False, 108 use_masks: bool = False, 109) -> pd.DataFrame: 110 """Run evaluation for iterative prompt-based segmentation predictions. 111 112 Args: 113 gt_paths: The list of paths to ground-truth images. 114 prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate. 115 experiment_folder: The folder where all the experiment results are stored. 116 start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box. 117 overwrite_results: Whether to overwrite the results to update them with the new evaluation run. 118 use_masks: Whether to use masks for iterative prompting. 119 120 Returns: 121 A DataFrame that contains the evaluation results. 122 """ 123 assert os.path.exists(prediction_root), prediction_root 124 125 # Save the results in the experiment folder 126 result_folder = os.path.join( 127 experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask" 128 ) 129 os.makedirs(result_folder, exist_ok=True) 130 131 csv_path = os.path.join( 132 result_folder, 133 "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv" 134 ) 135 136 # Overwrite the previously saved results 137 if overwrite_results and os.path.exists(csv_path): 138 os.remove(csv_path) 139 140 # If the results have been computed already, it's not needed to re-run it again. 141 if os.path.exists(csv_path): 142 print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.") 143 return 144 145 list_of_results = [] 146 prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*"))) 147 for pred_folder in prediction_folders: 148 print("Evaluating", os.path.split(pred_folder)[-1]) 149 pred_paths = sorted(glob(os.path.join(pred_folder, "*"))) 150 result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None) 151 list_of_results.append(result) 152 153 res_df = pd.concat(list_of_results, ignore_index=True) 154 res_df.to_csv(csv_path) 155 156 return res_df
Run evaluation for iterative prompt-based segmentation predictions.
Arguments:
- gt_paths: The list of paths to ground-truth images.
- prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
- experiment_folder: The folder where all the experiment results are stored.
- start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
- overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
- use_masks: Whether to use masks for iterative prompting.
Returns:
A DataFrame that contains the evaluation results.