micro_sam.evaluation.evaluation
Evaluation functionality for segmentation predictions from micro_sam.evaluation.automatic_mask_generation
and micro_sam.evaluation.inference.
1"""Evaluation functionality for segmentation predictions from `micro_sam.evaluation.automatic_mask_generation` 2and `micro_sam.evaluation.inference`. 3""" 4 5import os 6from glob import glob 7from tqdm import tqdm 8from pathlib import Path 9from natsort import natsorted 10from typing import List, Optional, Union 11 12import numpy as np 13import pandas as pd 14import imageio.v3 as imageio 15from skimage.measure import label 16 17from elf.evaluation import mean_segmentation_accuracy, matching 18 19from ..util import load_image_data 20 21 22def _run_evaluation(gt_paths, prediction_paths, verbose=True, thresholds=None): 23 assert len(gt_paths) == len(prediction_paths) 24 25 msas, sa50s, sa75s, precisions, recalls, f1s = [], [], [], [], [], [] 26 for gt_path, pred_path in tqdm( 27 zip(gt_paths, prediction_paths), desc="Evaluate predictions", total=len(gt_paths), disable=not verbose 28 ): 29 30 if isinstance(gt_path, np.ndarray): 31 gt = gt_path 32 else: 33 assert os.path.exists(gt_path), gt_path 34 gt = imageio.imread(gt_path) 35 gt = label(gt) 36 37 if isinstance(pred_path, np.ndarray): 38 pred = pred_path 39 else: 40 assert os.path.exists(pred_path), pred_path 41 pred = imageio.imread(pred_path) 42 43 assert gt.shape == pred.shape, f"Expected {gt.shape}, got {pred.shape}" 44 msa, scores = mean_segmentation_accuracy(pred, gt, thresholds=thresholds, return_accuracies=True) 45 stats = matching(pred, gt) 46 msas.append(msa) 47 if thresholds is None: 48 sa50, sa75 = scores[0], scores[5] 49 sa50s.append(sa50), sa75s.append(sa75) 50 precisions.append(stats["precision"]) 51 recalls.append(stats["recall"]) 52 f1s.append(stats["f1"]) 53 54 if thresholds is None: 55 return (msas, sa50s, sa75s), (precisions, recalls, f1s) 56 else: 57 return msas, (precisions, recalls, f1s) 58 59 60def run_evaluation( 61 gt_paths: List[Union[np.ndarray, os.PathLike, str]], 62 prediction_paths: List[Union[np.ndarray, os.PathLike, str]], 63 save_path: Optional[Union[os.PathLike, str]] = None, 64 verbose: bool = True, 65 thresholds: Optional[List[float]] = None, 66) -> pd.DataFrame: 67 """Run evaluation for instance segmentation predictions. 68 69 Args: 70 gt_paths: The list of paths to ground-truth images. 71 prediction_paths: The list of paths with the instance segmentations to evaluate. 72 save_path: Optional path for saving the results. 73 verbose: Whether to print the progress. 74 thresholds: The choice of overlap thresholds. 75 76 Returns: 77 A DataFrame that contains the evaluation results. 78 """ 79 assert len(gt_paths) == len(prediction_paths) 80 # if a save_path is given and it already exists then just load it instead of running the eval 81 if save_path is not None and os.path.exists(save_path): 82 return pd.read_csv(save_path) 83 84 scores = _run_evaluation( 85 gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds 86 ) 87 88 sas, other_scores = scores # Separate all score into two categories. 89 precisions, recalls, f1s = other_scores 90 91 if thresholds is None: 92 msas, sa50s, sa75s = sas 93 else: 94 msas = sas 95 96 results = {"mSA": [np.mean(msas)]} 97 if thresholds is None: 98 results["SA50"] = [np.mean(sa50s)] 99 results["SA75"] = [np.mean(sa75s)] 100 results["Precision"] = [np.mean(precisions)] 101 results["Recall"] = [np.mean(recalls)] 102 results["F1 Score"] = [np.mean(f1s)] 103 104 results = pd.DataFrame.from_dict(results) 105 106 if save_path is not None: 107 os.makedirs(Path(save_path).parent, exist_ok=True) 108 results.to_csv(save_path, index=False) 109 110 return results 111 112 113def run_evaluation_for_iterative_prompting( 114 gt_paths: List[Union[os.PathLike, str]], 115 prediction_root: Union[os.PathLike, str], 116 experiment_folder: Union[os.PathLike, str], 117 start_with_box_prompt: bool = False, 118 overwrite_results: bool = False, 119 use_masks: bool = False, 120) -> pd.DataFrame: 121 """Run evaluation for iterative prompt-based segmentation predictions. 122 123 Args: 124 gt_paths: The list of paths to ground-truth images. 125 prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate. 126 experiment_folder: The folder where all the experiment results are stored. 127 start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box. 128 overwrite_results: Whether to overwrite the results to update them with the new evaluation run. 129 use_masks: Whether to use masks for iterative prompting. 130 131 Returns: 132 A DataFrame that contains the evaluation results. 133 """ 134 assert os.path.exists(prediction_root), prediction_root 135 136 # Save the results in the experiment folder 137 result_folder = os.path.join( 138 experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask" 139 ) 140 os.makedirs(result_folder, exist_ok=True) 141 142 csv_path = os.path.join( 143 result_folder, 144 "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv" 145 ) 146 147 # Overwrite the previously saved results 148 if overwrite_results and os.path.exists(csv_path): 149 os.remove(csv_path) 150 151 # If the results have been computed already, it's not needed to re-run it again. 152 if os.path.exists(csv_path): 153 print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.") 154 return 155 156 list_of_results = [] 157 prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*"))) 158 for pred_folder in prediction_folders: 159 print("Evaluating", os.path.split(pred_folder)[-1]) 160 pred_paths = sorted(glob(os.path.join(pred_folder, "*"))) 161 result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None) 162 list_of_results.append(result) 163 164 res_df = pd.concat(list_of_results, ignore_index=True) 165 res_df.to_csv(csv_path) 166 167 return res_df 168 169 170def main(): 171 """@private""" 172 import argparse 173 174 parser = argparse.ArgumentParser(description="Evaluating segmentations from Segment Anything model on custom data.") 175 176 # labels and predictions for quantitative evaluation. 177 parser.add_argument( 178 "--labels", required=True, type=str, nargs="+", 179 help="Filepath(s) to ground-truth labels or the directory where the label data is stored." 180 ) 181 parser.add_argument( 182 "--predictions", required=True, type=str, nargs="+", 183 help="Filepath to predicted labels or the directory where the predicted label data is stored." 184 ) 185 parser.add_argument( 186 "--label_key", type=str, default=None, 187 help="The key for accessing predicted label data, either a pattern / wildcard or with 'elf.io.open_file'. " 188 ) 189 parser.add_argument( 190 "--prediction_key", type=str, default=None, 191 help="The key for accessing ground-truth label data, either a pattern / wildcard or with 'elf.io.open_file'. " 192 ) 193 parser.add_argument( 194 "-o", "--output_path", type=str, default=None, 195 help="The filepath to store the evaluation results. The current support stores results in a 'csv' file." 196 ) 197 parser.add_argument( 198 "--threshold", default=None, type=float, nargs="+", 199 help="The choice of overlap threshold(s) for calculating the segmentation accuracy. By default, " 200 "np.arange(0.5, 1., 0.05) is used to provide the mean segmentation accurcy score over all values.", 201 ) 202 parser.add_argument( 203 "-v", "--verbose", action="store_true", help="Whether to allow verbosity of evaluation." 204 ) 205 206 # TODO: We can extend this in future for other metrics, eg. dice score, etc. 207 # NOTE: This argument is not exposed to the user atm. 208 # parser.add_argument( 209 # "--metric", type=str, default="segmentation_accuracy", choices=("segmentation_accuracy"), 210 # help="The choice of metric for evaluation. By default, it computes segmentation accuracy " 211 # "for instance segmentation." 212 # ) 213 214 args = parser.parse_args() 215 216 # Check whether the inputs are as expected. 217 def _get_inputs_from_paths(paths, key): 218 fpaths = [] 219 for path in paths: 220 if os.path.isfile(path): # it is just one filepath and we check whether we can access it via 'elf'. 221 fpaths.append(path if key is None else load_image_data(path=path, key=key)) 222 else: # otherwise, path is a directory, fetch all inputs provided with a pattern. 223 assert key is not None, \ 224 f"You must provide a wildcard / pattern as the filepath '{os.path.abspath(path)}' is a directory." 225 fpaths.extend(natsorted(glob(os.path.join(path, key)))) 226 227 return fpaths 228 229 labels = _get_inputs_from_paths(args.labels, args.label_key) 230 predictions = _get_inputs_from_paths(args.predictions, args.prediction_key) 231 assert labels and len(labels) == len(predictions) 232 233 # Check whether output path is a csv or not, if passed. 234 output_path = args.output_path 235 if output_path is not None: 236 if not os.path.isfile(output_path): # If it is a directory, store this in "<OUTPUT_PATH>/results.csv" 237 os.makedirs(output_path, exist_ok=True) 238 output_path = os.path.join(output_path, "results.csv") 239 240 if not output_path.endswith(".csv"): # If it is a filepath missing extension / with a different extension. 241 output_path = str(Path(output_path).with_suffix(".csv")) # Limit supports to csv files for now. 242 243 # Run evaluation on labels and predictions. 244 results = run_evaluation( 245 gt_paths=labels, 246 prediction_paths=predictions, 247 save_path=output_path, 248 verbose=args.verbose, 249 thresholds=args.threshold, 250 ) 251 252 print("The evaluation results for the predictions are:") 253 print(results) 254 255 if args.verbose and output_path is not None: 256 print(f"The evaluation results have been stored at '{os.path.abspath(output_path)}'.")
def
run_evaluation( gt_paths: List[Union[numpy.ndarray, os.PathLike, str]], prediction_paths: List[Union[numpy.ndarray, os.PathLike, str]], save_path: Union[str, os.PathLike, NoneType] = None, verbose: bool = True, thresholds: Optional[List[float]] = None) -> pandas.core.frame.DataFrame:
61def run_evaluation( 62 gt_paths: List[Union[np.ndarray, os.PathLike, str]], 63 prediction_paths: List[Union[np.ndarray, os.PathLike, str]], 64 save_path: Optional[Union[os.PathLike, str]] = None, 65 verbose: bool = True, 66 thresholds: Optional[List[float]] = None, 67) -> pd.DataFrame: 68 """Run evaluation for instance segmentation predictions. 69 70 Args: 71 gt_paths: The list of paths to ground-truth images. 72 prediction_paths: The list of paths with the instance segmentations to evaluate. 73 save_path: Optional path for saving the results. 74 verbose: Whether to print the progress. 75 thresholds: The choice of overlap thresholds. 76 77 Returns: 78 A DataFrame that contains the evaluation results. 79 """ 80 assert len(gt_paths) == len(prediction_paths) 81 # if a save_path is given and it already exists then just load it instead of running the eval 82 if save_path is not None and os.path.exists(save_path): 83 return pd.read_csv(save_path) 84 85 scores = _run_evaluation( 86 gt_paths=gt_paths, prediction_paths=prediction_paths, verbose=verbose, thresholds=thresholds 87 ) 88 89 sas, other_scores = scores # Separate all score into two categories. 90 precisions, recalls, f1s = other_scores 91 92 if thresholds is None: 93 msas, sa50s, sa75s = sas 94 else: 95 msas = sas 96 97 results = {"mSA": [np.mean(msas)]} 98 if thresholds is None: 99 results["SA50"] = [np.mean(sa50s)] 100 results["SA75"] = [np.mean(sa75s)] 101 results["Precision"] = [np.mean(precisions)] 102 results["Recall"] = [np.mean(recalls)] 103 results["F1 Score"] = [np.mean(f1s)] 104 105 results = pd.DataFrame.from_dict(results) 106 107 if save_path is not None: 108 os.makedirs(Path(save_path).parent, exist_ok=True) 109 results.to_csv(save_path, index=False) 110 111 return results
Run evaluation for instance segmentation predictions.
Arguments:
- gt_paths: The list of paths to ground-truth images.
- prediction_paths: The list of paths with the instance segmentations to evaluate.
- save_path: Optional path for saving the results.
- verbose: Whether to print the progress.
- thresholds: The choice of overlap thresholds.
Returns:
A DataFrame that contains the evaluation results.
def
run_evaluation_for_iterative_prompting( gt_paths: List[Union[str, os.PathLike]], prediction_root: Union[os.PathLike, str], experiment_folder: Union[os.PathLike, str], start_with_box_prompt: bool = False, overwrite_results: bool = False, use_masks: bool = False) -> pandas.core.frame.DataFrame:
114def run_evaluation_for_iterative_prompting( 115 gt_paths: List[Union[os.PathLike, str]], 116 prediction_root: Union[os.PathLike, str], 117 experiment_folder: Union[os.PathLike, str], 118 start_with_box_prompt: bool = False, 119 overwrite_results: bool = False, 120 use_masks: bool = False, 121) -> pd.DataFrame: 122 """Run evaluation for iterative prompt-based segmentation predictions. 123 124 Args: 125 gt_paths: The list of paths to ground-truth images. 126 prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate. 127 experiment_folder: The folder where all the experiment results are stored. 128 start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box. 129 overwrite_results: Whether to overwrite the results to update them with the new evaluation run. 130 use_masks: Whether to use masks for iterative prompting. 131 132 Returns: 133 A DataFrame that contains the evaluation results. 134 """ 135 assert os.path.exists(prediction_root), prediction_root 136 137 # Save the results in the experiment folder 138 result_folder = os.path.join( 139 experiment_folder, "results", "iterative_prompting_" + ("with" if use_masks else "without") + "_mask" 140 ) 141 os.makedirs(result_folder, exist_ok=True) 142 143 csv_path = os.path.join( 144 result_folder, 145 "iterative_prompts_start_box.csv" if start_with_box_prompt else "iterative_prompts_start_point.csv" 146 ) 147 148 # Overwrite the previously saved results 149 if overwrite_results and os.path.exists(csv_path): 150 os.remove(csv_path) 151 152 # If the results have been computed already, it's not needed to re-run it again. 153 if os.path.exists(csv_path): 154 print(f"Results with iterative prompting for interactive segmentation are already stored at '{csv_path}'.") 155 return 156 157 list_of_results = [] 158 prediction_folders = sorted(glob(os.path.join(prediction_root, "iteration*"))) 159 for pred_folder in prediction_folders: 160 print("Evaluating", os.path.split(pred_folder)[-1]) 161 pred_paths = sorted(glob(os.path.join(pred_folder, "*"))) 162 result = run_evaluation(gt_paths=gt_paths, prediction_paths=pred_paths, save_path=None) 163 list_of_results.append(result) 164 165 res_df = pd.concat(list_of_results, ignore_index=True) 166 res_df.to_csv(csv_path) 167 168 return res_df
Run evaluation for iterative prompt-based segmentation predictions.
Arguments:
- gt_paths: The list of paths to ground-truth images.
- prediction_root: The folder with the iterative prompt-based instance segmentations to evaluate.
- experiment_folder: The folder where all the experiment results are stored.
- start_with_box_prompt: Whether to evaluate on experiments with iterative prompting starting with box.
- overwrite_results: Whether to overwrite the results to update them with the new evaluation run.
- use_masks: Whether to use masks for iterative prompting.
Returns:
A DataFrame that contains the evaluation results.