diff --git a/datasets/toy/configs.py b/datasets/toy/configs.py index b30a08e..cb985e9 100644 --- a/datasets/toy/configs.py +++ b/datasets/toy/configs.py @@ -1,490 +1,490 @@ #!/usr/bin/env python # Copyright 2019 Division of Medical Image Computing, German Cancer Research Center (DKFZ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import sys import os sys.path.append(os.path.dirname(os.path.realpath(__file__))) import numpy as np from default_configs import DefaultConfigs from collections import namedtuple boxLabel = namedtuple('boxLabel', ["name", "color"]) Label = namedtuple("Label", ['id', 'name', 'shape', 'radius', 'color', 'regression', 'ambiguities', 'gt_distortion']) binLabel = namedtuple("binLabel", ['id', 'name', 'color', 'bin_vals']) class Configs(DefaultConfigs): def __init__(self, server_env=None): super(Configs, self).__init__(server_env) ######################### # Prepro # ######################### self.pp_rootdir = os.path.join('/home/gregor/datasets/toy', "cyl1ps_dev") self.pp_npz_dir = self.pp_rootdir+"_npz" self.pre_crop_size = [320,320,8] #y,x,z; determines pp data shape (2D easily implementable, but only 3D for now) self.min_2d_radius = 6 #in pixels self.n_train_samples, self.n_test_samples = 1200, 1000 # not actually real one-hot encoding (ohe) but contains more info: roi-overlap only within classes. self.pp_create_ohe_seg = False self.pp_empty_samples_ratio = 0.1 self.pp_place_radii_mid_bin = True self.pp_only_distort_2d = True # outer-most intensity of blurred radii, relative to inner-object intensity. <1 for decreasing, > 1 for increasing. # e.g.: setting 0.1 means blurred edge has min intensity 10% as large as inner-object intensity. self.pp_blur_min_intensity = 0.2 self.max_instances_per_sample = 1 #how many max instances over all classes per sample (img if 2d, vol if 3d) self.max_instances_per_class = self.max_instances_per_sample # how many max instances per image per class self.noise_scale = 0. # std-dev of gaussian noise self.ambigs_sampling = "gaussian" #"gaussian" or "uniform" """ radius_calib: gt distort for calibrating uncertainty. Range of gt distortion is inferable from image by distinguishing it from the rest of the object. blurring width around edge will be shifted so that symmetric rel to orig radius. blurring scale: if self.ambigs_sampling is uniform, distribution's non-zero range (b-a) will be sqrt(12)*scale since uniform dist has variance (b-a)²/12. b,a will be placed symmetrically around unperturbed radius. if sampling is gaussian, then scale parameter sets one std dev, i.e., blurring width will be orig_radius * std_dev * 2. """ self.ambiguities = { #set which classes to apply which ambs to below in class labels #choose out of: 'outer_radius', 'inner_radius', 'radii_relations'. #kind #probability #scale (gaussian std, relative to unperturbed value) #"outer_radius": (1., 0.5), #"outer_radius_xy": (1., 0.5), #"inner_radius": (0.5, 0.1), #"radii_relations": (0.5, 0.1), "radius_calib": (1., 1./6) } # shape choices: 'cylinder', 'block' # id, name, shape, radius, color, regression, ambiguities, gt_distortion self.pp_classes = [Label(1, 'cylinder', 'cylinder', ((6,6,1),(40,40,8)), (*self.blue, 1.), "radius_2d", (), ()), #Label(2, 'block', 'block', ((6,6,1),(40,40,8)), (*self.aubergine,1.), "radii_2d", (), ('radius_calib',)) ] ######################### # I/O # ######################### self.data_sourcedir = '/home/gregor/datasets/toy/cyl1ps_dev' if server_env: self.data_sourcedir = '/datasets/data_ramien/toy/cyl1ps_dev_npz' self.test_data_sourcedir = os.path.join(self.data_sourcedir, 'test') self.data_sourcedir = os.path.join(self.data_sourcedir, "train") self.info_df_name = 'info_df.pickle' # one out of ['mrcnn', 'retina_net', 'retina_unet', 'detection_unet', 'ufrcnn', 'detection_fpn']. self.model = 'mrcnn' self.model_path = 'models/{}.py'.format(self.model if not 'retina' in self.model else 'retina_net') self.model_path = os.path.join(self.source_dir, self.model_path) ######################### # Architecture # ######################### # one out of [2, 3]. dimension the model operates in. self.dim = 2 # 'class', 'regression', 'regression_bin', 'regression_ken_gal' # currently only tested mode is a single-task at a time (i.e., only one task in below list) # but, in principle, tasks could be combined (e.g., object classes and regression per class) self.prediction_tasks = ['class', ] self.start_filts = 48 if self.dim == 2 else 18 self.end_filts = self.start_filts * 4 if self.dim == 2 else self.start_filts * 2 self.res_architecture = 'resnet50' # 'resnet101' , 'resnet50' self.norm = 'instance_norm' # one of None, 'instance_norm', 'batch_norm' self.relu = 'relu' # one of 'xavier_uniform', 'xavier_normal', or 'kaiming_normal', None (=default = 'kaiming_uniform') self.weight_init = None self.regression_n_features = 1 # length of regressor target vector ######################### # Data Loader # ######################### self.num_epochs = 32 self.num_train_batches = 120 if self.dim == 2 else 180 self.batch_size = 8 if self.dim == 2 else 4 self.n_cv_splits = 4 # select modalities from preprocessed data self.channels = [0] self.n_channels = len(self.channels) # which channel (mod) to show as bg in plotting, will be extra added to batch if not in self.channels self.plot_bg_chan = 0 self.crop_margin = [20, 20, 1] # has to be smaller than respective patch_size//2 self.patch_size_2D = self.pre_crop_size[:2] self.patch_size_3D = self.pre_crop_size[:2]+[8] # patch_size to be used for training. pre_crop_size is the patch_size before data augmentation. self.patch_size = self.patch_size_2D if self.dim == 2 else self.patch_size_3D # ratio of free sampled batch elements before class balancing is triggered # (>0 to include "empty"/background patches.) self.batch_random_ratio = 0.2 self.balance_target = "class_targets" if 'class' in self.prediction_tasks else "rg_bin_targets" self.observables_patient = [] self.observables_rois = [] self.seed = 3 #for generating folds ############################# # Colors, Classes, Legends # ############################# self.plot_frequency = 4 binary_bin_labels = [binLabel(1, 'r<=25', (*self.green, 1.), (1,25)), binLabel(2, 'r>25', (*self.red, 1.), (25,))] quintuple_bin_labels = [binLabel(1, 'r2-10', (*self.green, 1.), (2,10)), binLabel(2, 'r10-20', (*self.yellow, 1.), (10,20)), binLabel(3, 'r20-30', (*self.orange, 1.), (20,30)), binLabel(4, 'r30-40', (*self.bright_red, 1.), (30,40)), binLabel(5, 'r>40', (*self.red, 1.), (40,))] # choose here if to do 2-way or 5-way regression-bin classification task_spec_bin_labels = quintuple_bin_labels self.class_labels = [ # regression: regression-task label, either value or "(x,y,z)_radius" or "radii". # ambiguities: name of above defined ambig to apply to image data (not gt); need to be iterables! # gt_distortion: name of ambig to apply to gt only; needs to be iterable! # #id #name #shape #radius #color #regression #ambiguities #gt_distortion Label( 0, 'bg', None, (0, 0, 0), (*self.white, 0.), (0, 0, 0), (), ())] if "class" in self.prediction_tasks: self.class_labels += self.pp_classes else: self.class_labels += [Label(1, 'object', 'object', ('various',), (*self.orange, 1.), ('radius_2d',), ("various",), ('various',))] if any(['regression' in task for task in self.prediction_tasks]): self.bin_labels = [binLabel(0, 'bg', (*self.white, 1.), (0,))] self.bin_labels += task_spec_bin_labels self.bin_id2label = {label.id: label for label in self.bin_labels} bins = [(min(label.bin_vals), max(label.bin_vals)) for label in self.bin_labels] self.bin_id2rg_val = {ix: [np.mean(bin)] for ix, bin in enumerate(bins)} self.bin_edges = [(bins[i][1] + bins[i + 1][0]) / 2 for i in range(len(bins) - 1)] self.bin_dict = {label.id: label.name for label in self.bin_labels if label.id != 0} if self.class_specific_seg: self.seg_labels = self.class_labels self.box_type2label = {label.name: label for label in self.box_labels} self.class_id2label = {label.id: label for label in self.class_labels} self.class_dict = {label.id: label.name for label in self.class_labels if label.id != 0} self.seg_id2label = {label.id: label for label in self.seg_labels} self.cmap = {label.id: label.color for label in self.seg_labels} self.plot_prediction_histograms = True self.plot_stat_curves = False self.has_colorchannels = False self.plot_class_ids = True self.num_classes = len(self.class_dict) self.num_seg_classes = len(self.seg_labels) ######################### # Data Augmentation # ######################### self.do_aug = True self.da_kwargs = { 'mirror': True, 'mirror_axes': tuple(np.arange(0, self.dim, 1)), 'do_elastic_deform': False, 'alpha': (500., 1500.), 'sigma': (40., 45.), 'do_rotation': False, 'angle_x': (0., 2 * np.pi), 'angle_y': (0., 0), 'angle_z': (0., 0), 'do_scale': False, 'scale': (0.8, 1.1), 'random_crop': False, 'rand_crop_dist': (self.patch_size[0] / 2. - 3, self.patch_size[1] / 2. - 3), 'border_mode_data': 'constant', 'border_cval_data': 0, 'order_data': 1 } if self.dim == 3: self.da_kwargs['do_elastic_deform'] = False self.da_kwargs['angle_x'] = (0, 0.0) self.da_kwargs['angle_y'] = (0, 0.0) # must be 0!! self.da_kwargs['angle_z'] = (0., 2 * np.pi) ######################### # Schedule / Selection # ######################### # decide whether to validate on entire patient volumes (like testing) or sampled patches (like training) # the former is morge accurate, while the latter is faster (depending on volume size) self.val_mode = 'val_sampling' # one of 'val_sampling' , 'val_patient' if self.val_mode == 'val_patient': self.max_val_patients = 220 # if 'all' iterates over entire val_set once. if self.val_mode == 'val_sampling': self.num_val_batches = 35 if self.dim==2 else 25 self.save_n_models = 2 self.min_save_thresh = 1 if self.dim == 2 else 1 # =wait time in epochs if "class" in self.prediction_tasks: self.model_selection_criteria = {name + "_ap": 1. for name in self.class_dict.values()} elif any("regression" in task for task in self.prediction_tasks): self.model_selection_criteria = {name + "_ap": 0.2 for name in self.class_dict.values()} self.model_selection_criteria.update({name + "_avp": 0.8 for name in self.class_dict.values()}) self.lr_decay_factor = 0.5 self.scheduling_patience = np.ceil(1800 / (self.num_train_batches * self.batch_size)) self.weight_decay = 1e-5 self.clip_norm = None # number or None ######################### # Testing / Plotting # ######################### self.test_aug_axes = (0,1,(0,1)) # None or list: choices are 0,1,(0,1) self.held_out_test_set = True self.max_test_patients = "all" # number or "all" for all self.test_against_exact_gt = True # only True implemented self.val_against_exact_gt = False # True is an unrealistic --> irrelevant scenario. self.report_score_level = ['rois'] # 'patient' or 'rois' (incl) self.patient_class_of_interest = 1 self.patient_bin_of_interest = 2 self.eval_bins_separately = False#"additionally" if not 'class' in self.prediction_tasks else False self.metrics = ['ap', 'auc', 'dice'] if any(['regression' in task for task in self.prediction_tasks]): self.metrics += ['avp', 'rg_MAE_weighted', 'rg_MAE_weighted_tp', 'rg_bin_accuracy_weighted', 'rg_bin_accuracy_weighted_tp'] if 'aleatoric' in self.model: self.metrics += ['rg_uncertainty', 'rg_uncertainty_tp', 'rg_uncertainty_tp_weighted'] self.evaluate_fold_means = True self.ap_match_ious = [0.5] # threshold(s) for considering a prediction as true positive self.min_det_thresh = 0.3 self.model_max_iou_resolution = 0.2 # aggregation method for test and val_patient predictions. # wbc = weighted box clustering as in https://arxiv.org/pdf/1811.08661.pdf, # nms = standard non-maximum suppression, or None = no clustering self.clustering = 'wbc' # iou thresh (exclusive!) for regarding two preds as concerning the same ROI self.clustering_iou = self.model_max_iou_resolution # has to be larger than desired possible overlap iou of model predictions self.merge_2D_to_3D_preds = False self.merge_3D_iou = self.model_max_iou_resolution self.n_test_plots = 1 # per fold and rank self.test_n_epochs = self.save_n_models # should be called n_test_ens, since is number of models to ensemble over during testing # is multiplied by (1 + nr of test augs) ######################### # Assertions # ######################### if not 'class' in self.prediction_tasks: assert self.num_classes == 1 ######################### # Add model specifics # ######################### {'mrcnn': self.add_mrcnn_configs, 'mrcnn_aleatoric': self.add_mrcnn_configs, 'retina_net': self.add_mrcnn_configs, 'retina_unet': self.add_mrcnn_configs, 'detection_unet': self.add_det_unet_configs, 'detection_fpn': self.add_det_fpn_configs }[self.model]() def rg_val_to_bin_id(self, rg_val): #only meant for isotropic radii!! # only 2D radii (x and y dims) or 1D (x or y) are expected return np.round(np.digitize(rg_val, self.bin_edges).mean()) def add_det_fpn_configs(self): self.learning_rate = [1 * 1e-4] * self.num_epochs self.dynamic_lr_scheduling = True self.scheduling_criterion = 'torch_loss' self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max' self.n_roi_candidates = 4 if self.dim == 2 else 6 # max number of roi candidates to identify per image (slice in 2D, volume in 3D) # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce') self.seg_loss_mode = 'wce' self.wce_weights = [1] * self.num_seg_classes if 'dice' in self.seg_loss_mode else [0.1, 1] self.fp_dice_weight = 1 if self.dim == 2 else 1 # if <1, false positive predictions in foreground are penalized less. self.detection_min_confidence = 0.05 # how to determine score of roi: 'max' or 'median' self.score_det = 'max' def add_det_unet_configs(self): self.learning_rate = [1 * 1e-4] * self.num_epochs self.dynamic_lr_scheduling = True self.scheduling_criterion = "torch_loss" self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max' # max number of roi candidates to identify per image (slice in 2D, volume in 3D) self.n_roi_candidates = 4 if self.dim == 2 else 6 # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce') self.seg_loss_mode = 'wce' self.wce_weights = [1] * self.num_seg_classes if 'dice' in self.seg_loss_mode else [0.1, 1] # if <1, false positive predictions in foreground are penalized less. self.fp_dice_weight = 1 if self.dim == 2 else 1 self.detection_min_confidence = 0.05 # how to determine score of roi: 'max' or 'median' self.score_det = 'max' self.init_filts = 32 self.kernel_size = 3 # ks for horizontal, normal convs self.kernel_size_m = 2 # ks for max pool self.pad = "same" # "same" or integer, padding of horizontal convs def add_mrcnn_configs(self): self.learning_rate = [1e-4] * self.num_epochs self.dynamic_lr_scheduling = True # with scheduler set in exec self.scheduling_criterion = max(self.model_selection_criteria, key=self.model_selection_criteria.get) self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max' # number of classes for network heads: n_foreground_classes + 1 (background) self.head_classes = self.num_classes + 1 if 'class' in self.prediction_tasks else 2 # feed +/- n neighbouring slices into channel dimension. set to None for no context. self.n_3D_context = None if self.n_3D_context is not None and self.dim == 2: self.n_channels *= (self.n_3D_context * 2 + 1) self.detect_while_training = True # disable the re-sampling of mask proposals to original size for speed-up. # since evaluation is detection-driven (box-matching) and not instance segmentation-driven (iou-matching), # mask outputs are optional. self.return_masks_in_train = True self.return_masks_in_val = True self.return_masks_in_test = True # feature map strides per pyramid level are inferred from architecture. anchor scales are set accordingly. self.backbone_strides = {'xy': [4, 8, 16, 32], 'z': [1, 2, 4, 8]} # anchor scales are chosen according to expected object sizes in data set. Default uses only one anchor scale # per pyramid level. (outer list are pyramid levels (corresponding to BACKBONE_STRIDES), inner list are scales per level.) self.rpn_anchor_scales = {'xy': [[4], [8], [16], [32]], 'z': [[1], [2], [4], [8]]} # choose which pyramid levels to extract features from: P2: 0, P3: 1, P4: 2, P5: 3. self.pyramid_levels = [0, 1, 2, 3] # number of feature maps in rpn. typically lowered in 3D to save gpu-memory. self.n_rpn_features = 512 if self.dim == 2 else 64 # anchor ratios and strides per position in feature maps. self.rpn_anchor_ratios = [0.5, 1., 2.] self.rpn_anchor_stride = 1 # Threshold for first stage (RPN) non-maximum suppression (NMS): LOWER == HARDER SELECTION self.rpn_nms_threshold = max(0.8, self.model_max_iou_resolution) # loss sampling settings. - self.rpn_train_anchors_per_image = 4 + self.rpn_train_anchors_per_image = 64 self.train_rois_per_image = 6 # per batch_instance self.roi_positive_ratio = 0.5 self.anchor_matching_iou = 0.8 # k negative example candidates are drawn from a pool of size k*shem_poolsize (stochastic hard-example mining), # where k<=#positive examples. self.shem_poolsize = 2 self.pool_size = (7, 7) if self.dim == 2 else (7, 7, 3) self.mask_pool_size = (14, 14) if self.dim == 2 else (14, 14, 5) self.mask_shape = (28, 28) if self.dim == 2 else (28, 28, 10) self.rpn_bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2]) self.bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2]) self.window = np.array([0, 0, self.patch_size[0], self.patch_size[1], 0, self.patch_size_3D[2]]) self.scale = np.array([self.patch_size[0], self.patch_size[1], self.patch_size[0], self.patch_size[1], self.patch_size_3D[2], self.patch_size_3D[2]]) # y1,x1,y2,x2,z1,z2 if self.dim == 2: self.rpn_bbox_std_dev = self.rpn_bbox_std_dev[:4] self.bbox_std_dev = self.bbox_std_dev[:4] self.window = self.window[:4] self.scale = self.scale[:4] self.plot_y_max = 1.5 self.n_plot_rpn_props = 5 if self.dim == 2 else 30 # per batch_instance (slice in 2D / patient in 3D) # pre-selection in proposal-layer (stage 1) for NMS-speedup. applied per batch element. self.pre_nms_limit = 2000 if self.dim == 2 else 4000 # n_proposals to be selected after NMS per batch element. too high numbers blow up memory if "detect_while_training" is True, # since proposals of the entire batch are forwarded through second stage as one "batch". self.roi_chunk_size = 1300 if self.dim == 2 else 500 self.post_nms_rois_training = 200 * (self.head_classes-1) if self.dim == 2 else 400 self.post_nms_rois_inference = 200 * (self.head_classes-1) # Final selection of detections (refine_detections) self.model_max_instances_per_batch_element = 9 if self.dim == 2 else 18 # per batch element and class. self.detection_nms_threshold = self.model_max_iou_resolution # needs to be > 0, otherwise all predictions are one cluster. self.model_min_confidence = 0.2 # iou for nms in box refining (directly after heads), should be >0 since ths>=x in mrcnn.py if self.dim == 2: self.backbone_shapes = np.array( [[int(np.ceil(self.patch_size[0] / stride)), int(np.ceil(self.patch_size[1] / stride))] for stride in self.backbone_strides['xy']]) else: self.backbone_shapes = np.array( [[int(np.ceil(self.patch_size[0] / stride)), int(np.ceil(self.patch_size[1] / stride)), int(np.ceil(self.patch_size[2] / stride_z))] for stride, stride_z in zip(self.backbone_strides['xy'], self.backbone_strides['z'] )]) if self.model == 'retina_net' or self.model == 'retina_unet': # whether to use focal loss or SHEM for loss-sample selection self.focal_loss = False # implement extra anchor-scales according to https://arxiv.org/abs/1708.02002 self.rpn_anchor_scales['xy'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in self.rpn_anchor_scales['xy']] self.rpn_anchor_scales['z'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in self.rpn_anchor_scales['z']] self.n_anchors_per_pos = len(self.rpn_anchor_ratios) * 3 # pre-selection of detections for NMS-speedup. per entire batch. self.pre_nms_limit = (500 if self.dim == 2 else 6250) * self.batch_size # anchor matching iou is lower than in Mask R-CNN according to https://arxiv.org/abs/1708.02002 self.anchor_matching_iou = 0.7 if self.model == 'retina_unet': self.operate_stride1 = True diff --git a/datasets/toy_mdt/configs.py b/datasets/toy_mdt/configs.py index 7ca0d8e..3c8748d 100644 --- a/datasets/toy_mdt/configs.py +++ b/datasets/toy_mdt/configs.py @@ -1,363 +1,355 @@ #!/usr/bin/env python # Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import sys import os sys.path.append(os.path.dirname(os.path.realpath(__file__))) import numpy as np from collections import namedtuple from default_configs import DefaultConfigs Label = namedtuple("Label", ['id', 'name', 'color']) class Configs(DefaultConfigs): def __init__(self, server_env=None): ######################### # Preprocessing # ######################### self.root_dir = '/home/gregor/datasets/toy_mdt' ######################### # I/O # ######################### # one out of [2, 3]. dimension the model operates in. self.dim = 2 DefaultConfigs.__init__(self, server_env, self.dim) # one out of ['mrcnn', 'retina_net', 'retina_unet', 'detection_unet', 'ufrcnn']. - self.model = 'retina_net' + self.model = 'detection_fpn' self.model_path = 'models/{}.py'.format(self.model if not 'retina' in self.model else 'retina_net') self.model_path = os.path.join(self.source_dir, self.model_path) # int [0 < dataset_size]. select n patients from dataset for prototyping. self.select_prototype_subset = None self.held_out_test_set = True self.n_train_data = 2500 # choose one of the 3 toy experiments described in https://arxiv.org/pdf/1811.08661.pdf # one of ['donuts_shape', 'donuts_pattern', 'circles_scale']. toy_mode = 'donuts_shape_noise' # path to preprocessed data. self.info_df_name = 'info_df.pickle' self.pp_name = os.path.join(toy_mode, 'train') self.data_sourcedir = os.path.join(self.root_dir, self.pp_name) self.pp_test_name = os.path.join(toy_mode, 'test') self.test_data_sourcedir = os.path.join(self.root_dir, self.pp_test_name) # settings for deployment in cloud. if server_env: # path to preprocessed data. pp_root_dir = '/datasets/datasets_ramien/toy_exp/data' self.pp_name = os.path.join(toy_mode, 'train') - self.pp_data_path = os.path.join(pp_root_dir, self.pp_name) + self.data_sourcedir = os.path.join(pp_root_dir, self.pp_name) self.pp_test_name = os.path.join(toy_mode, 'test') self.test_data_sourcedir = os.path.join(pp_root_dir, self.pp_test_name) self.select_prototype_subset = None ######################### # Data Loader # ######################### # select modalities from preprocessed data self.channels = [0] self.n_channels = len(self.channels) self.plot_bg_chan = 0 # patch_size to be used for training. pre_crop_size is the patch_size before data augmentation. self.pre_crop_size_2D = [320, 320] self.patch_size_2D = [320, 320] self.patch_size = self.patch_size_2D if self.dim == 2 else self.patch_size_3D self.pre_crop_size = self.pre_crop_size_2D if self.dim == 2 else self.pre_crop_size_3D # ratio of free sampled batch elements before class balancing is triggered # (>0 to include "empty"/background patches.) self.batch_random_ratio = 0.2 # set 2D network to operate in 3D images. self.merge_2D_to_3D_preds = False # feed +/- n neighbouring slices into channel dimension. set to None for no context. self.n_3D_context = None if self.n_3D_context is not None and self.dim == 2: self.n_channels *= (self.n_3D_context * 2 + 1) ######################### # Architecture # ######################### self.start_filts = 48 if self.dim == 2 else 18 self.end_filts = self.start_filts * 4 if self.dim == 2 else self.start_filts * 2 self.res_architecture = 'resnet50' # 'resnet101' , 'resnet50' self.norm = None # one of None, 'instance_norm', 'batch_norm' # one of 'xavier_uniform', 'xavier_normal', or 'kaiming_normal', None (=default = 'kaiming_uniform') self.weight_init = None # compatibility self.regression_n_features = 1 + self.num_classes = 2 # excluding bg + self.num_seg_classes = 3 # incl bg ######################### # Schedule / Selection # ######################### - self.num_epochs = 23 + self.num_epochs = 32 self.num_train_batches = 100 if self.dim == 2 else 200 self.batch_size = 20 if self.dim == 2 else 8 self.do_validation = True # decide whether to validate on entire patient volumes (like testing) or sampled patches (like training) # the former is morge accurate, while the latter is faster (depending on volume size) self.val_mode = 'val_patient' # one of 'val_sampling' , 'val_patient' if self.val_mode == 'val_patient': self.max_val_patients = "all" # if 'None' iterates over entire val_set once. if self.val_mode == 'val_sampling': self.num_val_batches = 50 # set dynamic_lr_scheduling to True to apply LR scheduling with below settings. self.dynamic_lr_scheduling = True self.lr_decay_factor = 0.5 - self.scheduling_patience = np.ceil(3600 / (self.num_train_batches * self.batch_size)) + self.scheduling_patience = np.ceil(2400 / (self.num_train_batches * self.batch_size)) self.scheduling_criterion = 'donuts_ap' self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max' self.weight_decay = 0 self.clip_norm = None ######################### # Testing / Plotting # ######################### # set the top-n-epochs to be saved for temporal averaging in testing. self.save_n_models = 5 self.test_n_epochs = 5 self.test_aug_axes = (0, 1, (0, 1)) self.n_test_plots = 2 self.clustering = "wbc" self.clustering_iou = 1e-5 # set a minimum epoch number for saving in case of instabilities in the first phase of training. self.min_save_thresh = 0 if self.dim == 2 else 0 self.report_score_level = ['patient', 'rois'] # choose list from 'patient', 'rois' self.class_labels = [Label(0, 'bg', (*self.white, 0.)), Label(1, 'circles', (*self.orange, .9)), Label(2, 'donuts', (*self.blue, .9)),] if self.class_specific_seg: self.seg_labels = self.class_labels self.box_type2label = {label.name: label for label in self.box_labels} self.class_id2label = {label.id: label for label in self.class_labels} self.class_dict = {label.id: label.name for label in self.class_labels if label.id != 0} self.seg_id2label = {label.id: label for label in self.seg_labels} self.cmap = {label.id: label.color for label in self.seg_labels} self.patient_class_of_interest = 2 # patient metrics are only plotted for one class. self.ap_match_ious = [0.1] # list of ious to be evaluated for ap-scoring. self.model_selection_criteria = {name + "_ap": 1. for name in self.class_dict.values()}# criteria to average over for saving epochs. self.min_det_thresh = 0.1 # minimum confidence value to select predictions for evaluation. self.plot_prediction_histograms = True self.plot_stat_curves = False self.plot_class_ids = True ######################### # Data Augmentation # ######################### self.do_aug = False self.da_kwargs={ 'do_elastic_deform': True, 'alpha':(0., 1500.), 'sigma':(30., 50.), 'do_rotation':True, 'angle_x': (0., 2 * np.pi), 'angle_y': (0., 0), 'angle_z': (0., 0), 'do_scale': True, 'scale':(0.8, 1.1), 'random_crop':False, 'rand_crop_dist': (self.patch_size[0] / 2. - 3, self.patch_size[1] / 2. - 3), 'border_mode_data': 'constant', 'border_cval_data': 0, 'order_data': 1 } if self.dim == 3: self.da_kwargs['do_elastic_deform'] = False self.da_kwargs['angle_x'] = (0, 0.0) self.da_kwargs['angle_y'] = (0, 0.0) #must be 0!! self.da_kwargs['angle_z'] = (0., 2 * np.pi) ######################### # Add model specifics # ######################### - {'detection_unet': self.add_det_unet_configs, + {'detection_fpn': self.add_det_fpn_configs, 'mrcnn': self.add_mrcnn_configs, 'retina_net': self.add_mrcnn_configs, 'retina_unet': self.add_mrcnn_configs, }[self.model]() - def add_det_unet_configs(self): + def add_det_fpn_configs(self): - self.learning_rate = [1e-4] * self.num_epochs + self.learning_rate = [1 * 1e-3] * self.num_epochs + self.dynamic_lr_scheduling = True + self.scheduling_criterion = 'torch_loss' + self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max' - # aggregation from pixel perdiction to object scores (connected component). One of ['max', 'median'] - self.aggregation_operation = 'max' + self.n_roi_candidates = 4 if self.dim == 2 else 6 + # max number of roi candidates to identify per image (slice in 2D, volume in 3D) - # max number of roi candidates to identify per image (slice in 2D, volume in 3D) - self.n_roi_candidates = 3 if self.dim == 2 else 8 + # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce') + self.seg_loss_mode = 'wce' + self.wce_weights = [1] * self.num_seg_classes if 'dice' in self.seg_loss_mode else [0.1, 1., 1.] - # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce') - self.seg_loss_mode = 'wce' + self.fp_dice_weight = 1 if self.dim == 2 else 1 + # if <1, false positive predictions in foreground are penalized less. - # if <1, false positive predictions in foreground are penalized less. - self.fp_dice_weight = 1 if self.dim == 2 else 1 + self.detection_min_confidence = 0.05 + # how to determine score of roi: 'max' or 'median' + self.score_det = 'max' - self.wce_weights = [1, 1, 1] - self.detection_min_confidence = self.min_det_thresh - # if 'True', loss distinguishes all classes, else only foreground vs. background (class agnostic). - self.num_seg_classes = 3 - self.head_classes = self.num_seg_classes def add_mrcnn_configs(self): # learning rate is a list with one entry per epoch. self.learning_rate = [1e-3] * self.num_epochs # disable mask head loss. (e.g. if no pixelwise annotations available) self.frcnn_mode = False # disable the re-sampling of mask proposals to original size for speed-up. # since evaluation is detection-driven (box-matching) and not instance segmentation-driven (iou-matching), # mask-outputs are optional. self.return_masks_in_val = True self.return_masks_in_test = False # set number of proposal boxes to plot after each epoch. self.n_plot_rpn_props = 0 if self.dim == 2 else 0 # number of classes for head networks: n_foreground_classes + 1 (background) - self.head_classes = 3 - - # seg_classes hier refers to the first stage classifier (RPN) - self.num_seg_classes = 2 # foreground vs. background + self.head_classes = self.num_classes + 1 # feature map strides per pyramid level are inferred from architecture. self.backbone_strides = {'xy': [4, 8, 16, 32], 'z': [1, 2, 4, 8]} # anchor scales are chosen according to expected object sizes in data set. Default uses only one anchor scale # per pyramid level. (outer list are pyramid levels (corresponding to BACKBONE_STRIDES), inner list are scales per level.) self.rpn_anchor_scales = {'xy': [[8], [16], [32], [64]], 'z': [[2], [4], [8], [16]]} # choose which pyramid levels to extract features from: P2: 0, P3: 1, P4: 2, P5: 3. self.pyramid_levels = [0, 1, 2, 3] # number of feature maps in rpn. typically lowered in 3D to save gpu-memory. self.n_rpn_features = 512 if self.dim == 2 else 128 # anchor ratios and strides per position in feature maps. self.rpn_anchor_ratios = [0.5, 1., 2.] self.rpn_anchor_stride = 1 # Threshold for first stage (RPN) non-maximum suppression (NMS): LOWER == HARDER SELECTION self.rpn_nms_threshold = 0.7 if self.dim == 2 else 0.7 # loss sampling settings. self.rpn_train_anchors_per_image = 64 #per batch element self.train_rois_per_image = 2 #per batch element self.roi_positive_ratio = 0.5 self.anchor_matching_iou = 0.7 # factor of top-k candidates to draw from per negative sample (stochastic-hard-example-mining). # poolsize to draw top-k candidates from will be shem_poolsize * n_negative_samples. self.shem_poolsize = 10 self.pool_size = (7, 7) if self.dim == 2 else (7, 7, 3) self.mask_pool_size = (14, 14) if self.dim == 2 else (14, 14, 5) self.mask_shape = (28, 28) if self.dim == 2 else (28, 28, 10) self.rpn_bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2]) self.bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2]) self.window = np.array([0, 0, self.patch_size[0], self.patch_size[1]]) self.scale = np.array([self.patch_size[0], self.patch_size[1], self.patch_size[0], self.patch_size[1]]) if self.dim == 2: self.rpn_bbox_std_dev = self.rpn_bbox_std_dev[:4] self.bbox_std_dev = self.bbox_std_dev[:4] self.window = self.window[:4] self.scale = self.scale[:4] # pre-selection in proposal-layer (stage 1) for NMS-speedup. applied per batch element. self.pre_nms_limit = 3000 if self.dim == 2 else 6000 # n_proposals to be selected after NMS per batch element. too high numbers blow up memory if "detect_while_training" is True, # since proposals of the entire batch are forwarded through second stage in as one "batch". self.roi_chunk_size = 800 if self.dim == 2 else 600 self.post_nms_rois_training = 500 if self.dim == 2 else 75 self.post_nms_rois_inference = 500 # Final selection of detections (refine_detections) self.model_max_instances_per_batch_element = 10 if self.dim == 2 else 30 # per batch element and class. self.detection_nms_threshold = 1e-5 # needs to be > 0, otherwise all predictions are one cluster. self.model_min_confidence = 0.1 if self.dim == 2: self.backbone_shapes = np.array( [[int(np.ceil(self.patch_size[0] / stride)), int(np.ceil(self.patch_size[1] / stride))] for stride in self.backbone_strides['xy']]) else: self.backbone_shapes = np.array( [[int(np.ceil(self.patch_size[0] / stride)), int(np.ceil(self.patch_size[1] / stride)), int(np.ceil(self.patch_size[2] / stride_z))] for stride, stride_z in zip(self.backbone_strides['xy'], self.backbone_strides['z'] )]) - if self.model == 'ufrcnn': - self.operate_stride1 = True - self.num_seg_classes = 3 - self.frcnn_mode = True - if self.model == 'retina_net' or self.model == 'retina_unet' or self.model == 'prob_detector': + if self.model == 'retina_net' or self.model == 'retina_unet': # implement extra anchor-scales according to retina-net publication. self.rpn_anchor_scales['xy'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in self.rpn_anchor_scales['xy']] self.rpn_anchor_scales['z'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in self.rpn_anchor_scales['z']] self.n_anchors_per_pos = len(self.rpn_anchor_ratios) * 3 self.n_rpn_features = 256 if self.dim == 2 else 64 # pre-selection of detections for NMS-speedup. per entire batch. self.pre_nms_limit = 10000 if self.dim == 2 else 50000 # anchor matching iou is lower than in Mask R-CNN according to https://arxiv.org/abs/1708.02002 self.anchor_matching_iou = 0.5 - # if 'True', seg loss distinguishes all classes, else only foreground vs. background (class agnostic). - self.num_seg_classes = 3 - if self.model == 'retina_unet': self.operate_stride1 = True diff --git a/predictor.py b/predictor.py index d8063f2..a5cff11 100644 --- a/predictor.py +++ b/predictor.py @@ -1,1006 +1,1007 @@ #!/usr/bin/env python # Copyright 2019 Division of Medical Image Computing, German Cancer Research Center (DKFZ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import os from multiprocessing import Pool import pickle import time import numpy as np import torch from scipy.stats import norm from collections import OrderedDict import plotting as plg import utils.model_utils as mutils import utils.exp_utils as utils def get_mirrored_patch_crops(patch_crops, org_img_shape): mirrored_patch_crops = [] mirrored_patch_crops.append([[org_img_shape[2] - ii[1], org_img_shape[2] - ii[0], ii[2], ii[3]] if len(ii) == 4 else [org_img_shape[2] - ii[1], org_img_shape[2] - ii[0], ii[2], ii[3], ii[4], ii[5]] for ii in patch_crops]) mirrored_patch_crops.append([[ii[0], ii[1], org_img_shape[3] - ii[3], org_img_shape[3] - ii[2]] if len(ii) == 4 else [ii[0], ii[1], org_img_shape[3] - ii[3], org_img_shape[3] - ii[2], ii[4], ii[5]] for ii in patch_crops]) mirrored_patch_crops.append([[org_img_shape[2] - ii[1], org_img_shape[2] - ii[0], org_img_shape[3] - ii[3], org_img_shape[3] - ii[2]] if len(ii) == 4 else [org_img_shape[2] - ii[1], org_img_shape[2] - ii[0], org_img_shape[3] - ii[3], org_img_shape[3] - ii[2], ii[4], ii[5]] for ii in patch_crops]) return mirrored_patch_crops def get_mirrored_patch_crops_ax_dep(patch_crops, org_img_shape, mirror_axes): mirrored_patch_crops = [] for ax_ix, axes in enumerate(mirror_axes): if isinstance(axes, (int, float)) and int(axes) == 0: mirrored_patch_crops.append([[org_img_shape[2] - ii[1], org_img_shape[2] - ii[0], ii[2], ii[3]] if len(ii) == 4 else [org_img_shape[2] - ii[1], org_img_shape[2] - ii[0], ii[2], ii[3], ii[4], ii[5]] for ii in patch_crops]) elif isinstance(axes, (int, float)) and int(axes) == 1: mirrored_patch_crops.append([[ii[0], ii[1], org_img_shape[3] - ii[3], org_img_shape[3] - ii[2]] if len(ii) == 4 else [ii[0], ii[1], org_img_shape[3] - ii[3], org_img_shape[3] - ii[2], ii[4], ii[5]] for ii in patch_crops]) elif hasattr(axes, "__iter__") and (tuple(axes) == (0, 1) or tuple(axes) == (1, 0)): mirrored_patch_crops.append([[org_img_shape[2] - ii[1], org_img_shape[2] - ii[0], org_img_shape[3] - ii[3], org_img_shape[3] - ii[2]] if len(ii) == 4 else [org_img_shape[2] - ii[1], org_img_shape[2] - ii[0], org_img_shape[3] - ii[3], org_img_shape[3] - ii[2], ii[4], ii[5]] for ii in patch_crops]) else: raise Exception("invalid mirror axes {} in get mirrored patch crops".format(axes)) return mirrored_patch_crops def apply_wbc_to_patient(inputs): """ wrapper around prediction box consolidation: weighted box clustering (wbc). processes a single patient. loops over batch elements in patient results (1 in 3D, slices in 2D) and foreground classes, aggregates and stores results in new list. :return. patient_results_list: list over batch elements. each element is a list over boxes, where each box is one dictionary: [[box_0, ...], [box_n,...]]. batch elements are slices for 2D predictions, and a dummy batch dimension of 1 for 3D predictions. :return. pid: string. patient id. """ regress_flag, in_patient_results_list, pid, class_dict, clustering_iou, n_ens = inputs out_patient_results_list = [[] for _ in range(len(in_patient_results_list))] for bix, b in enumerate(in_patient_results_list): for cl in list(class_dict.keys()): boxes = [(ix, box) for ix, box in enumerate(b) if (box['box_type'] == 'det' and box['box_pred_class_id'] == cl)] box_coords = np.array([b[1]['box_coords'] for b in boxes]) box_scores = np.array([b[1]['box_score'] for b in boxes]) box_center_factor = np.array([b[1]['box_patch_center_factor'] for b in boxes]) box_n_overlaps = np.array([b[1]['box_n_overlaps'] for b in boxes]) try: box_patch_id = np.array([b[1]['patch_id'] for b in boxes]) except KeyError: #backward compatibility for already saved pred results ... omg box_patch_id = np.array([b[1]['ens_ix'] for b in boxes]) box_regressions = np.array([b[1]['regression'] for b in boxes]) if regress_flag else None box_rg_bins = np.array([b[1]['rg_bin'] if 'rg_bin' in b[1].keys() else float('NaN') for b in boxes]) box_rg_uncs = np.array([b[1]['rg_uncertainty'] if 'rg_uncertainty' in b[1].keys() else float('NaN') for b in boxes]) if 0 not in box_scores.shape: keep_scores, keep_coords, keep_n_missing, keep_regressions, keep_rg_bins, keep_rg_uncs = \ weighted_box_clustering(box_coords, box_scores, box_center_factor, box_n_overlaps, box_rg_bins, box_rg_uncs, box_regressions, box_patch_id, clustering_iou, n_ens) for boxix in range(len(keep_scores)): clustered_box = {'box_type': 'det', 'box_coords': keep_coords[boxix], 'box_score': keep_scores[boxix], 'cluster_n_missing': keep_n_missing[boxix], 'box_pred_class_id': cl} if regress_flag: clustered_box.update({'regression': keep_regressions[boxix], 'rg_uncertainty': keep_rg_uncs[boxix], 'rg_bin': keep_rg_bins[boxix]}) out_patient_results_list[bix].append(clustered_box) # add gt boxes back to new output list. out_patient_results_list[bix].extend([box for box in b if box['box_type'] == 'gt']) return [out_patient_results_list, pid] def weighted_box_clustering(box_coords, scores, box_pc_facts, box_n_ovs, box_rg_bins, box_rg_uncs, box_regress, box_patch_id, thresh, n_ens): """Consolidates overlapping predictions resulting from patch overlaps, test data augmentations and temporal ensembling. clusters predictions together with iou > thresh (like in NMS). Output score and coordinate for one cluster are the average weighted by individual patch center factors (how trustworthy is this candidate measured by how centered its position within the patch is) and the size of the corresponding box. The number of expected predictions at a position is n_data_aug * n_temp_ens * n_overlaps_at_position (1 prediction per unique patch). Missing predictions at a cluster position are defined as the number of unique patches in the cluster, which did not contribute any predict any boxes. :param dets: (n_dets, (y1, x1, y2, x2, (z1), (z2), scores, box_pc_facts, box_n_ovs). :param box_coords: y1, x1, y2, x2, (z1), (z2). :param scores: confidence scores. :param box_pc_facts: patch-center factors from position on patch tiles. :param box_n_ovs: number of patch overlaps at box position. :param box_rg_bins: regression bin predictions. :param box_rg_uncs: (n_dets,) regression uncertainties (from model mrcnn_aleatoric). :param box_regress: (n_dets, n_regression_features). :param box_patch_id: ensemble index. :param thresh: threshold for iou_matching. :param n_ens: number of models, that are ensembled. (-> number of expected predictions per position). :return: keep_scores: (n_keep) new scores of boxes to be kept. :return: keep_coords: (n_keep, (y1, x1, y2, x2, (z1), (z2)) new coordinates of boxes to be kept. """ dim = 2 if box_coords.shape[1] == 4 else 3 y1 = box_coords[:,0] x1 = box_coords[:,1] y2 = box_coords[:,2] x2 = box_coords[:,3] areas = (y2 - y1 + 1) * (x2 - x1 + 1) if dim == 3: z1 = box_coords[:, 4] z2 = box_coords[:, 5] areas *= (z2 - z1 + 1) # order is the sorted index. maps order to index o[1] = 24 (rank1, ix 24) order = scores.argsort()[::-1] keep_scores = [] keep_coords = [] keep_n_missing = [] keep_regress = [] keep_rg_bins = [] keep_rg_uncs = [] while order.size > 0: i = order[0] # highest scoring element yy1 = np.maximum(y1[i], y1[order]) xx1 = np.maximum(x1[i], x1[order]) yy2 = np.minimum(y2[i], y2[order]) xx2 = np.minimum(x2[i], x2[order]) w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) inter = w * h if dim == 3: zz1 = np.maximum(z1[i], z1[order]) zz2 = np.minimum(z2[i], z2[order]) d = np.maximum(0, zz2 - zz1 + 1) inter *= d # overlap between currently highest scoring box and all boxes. ovr = inter / (areas[i] + areas[order] - inter) ovr_fl = inter.astype('float64') / (areas[i] + areas[order] - inter.astype('float64')) assert np.all(ovr==ovr_fl), "ovr {}\n ovr_float {}".format(ovr, ovr_fl) # get all the predictions that match the current box to build one cluster. matches = np.nonzero(ovr > thresh)[0] match_n_ovs = box_n_ovs[order[matches]] match_pc_facts = box_pc_facts[order[matches]] match_patch_id = box_patch_id[order[matches]] match_ov_facts = ovr[matches] match_areas = areas[order[matches]] match_scores = scores[order[matches]] # weight all scores in cluster by patch factors, and size. match_score_weights = match_ov_facts * match_areas * match_pc_facts match_scores *= match_score_weights # for the weighted average, scores have to be divided by the number of total expected preds at the position # of the current cluster. 1 Prediction per patch is expected. therefore, the number of ensembled models is # multiplied by the mean overlaps of patches at this position (boxes of the cluster might partly be # in areas of different overlaps). n_expected_preds = n_ens * np.mean(match_n_ovs) # the number of missing predictions is obtained as the number of patches, # which did not contribute any prediction to the current cluster. n_missing_preds = np.max((0, n_expected_preds - np.unique(match_patch_id).shape[0])) # missing preds are given the mean weighting # (expected prediction is the mean over all predictions in cluster). denom = np.sum(match_score_weights) + n_missing_preds * np.mean(match_score_weights) # compute weighted average score for the cluster avg_score = np.sum(match_scores) / denom # compute weighted average of coordinates for the cluster. now only take existing # predictions into account. avg_coords = [np.sum(y1[order[matches]] * match_scores) / np.sum(match_scores), np.sum(x1[order[matches]] * match_scores) / np.sum(match_scores), np.sum(y2[order[matches]] * match_scores) / np.sum(match_scores), np.sum(x2[order[matches]] * match_scores) / np.sum(match_scores)] if dim == 3: avg_coords.append(np.sum(z1[order[matches]] * match_scores) / np.sum(match_scores)) avg_coords.append(np.sum(z2[order[matches]] * match_scores) / np.sum(match_scores)) if box_regress is not None: # compute wt. avg. of regression vectors (component-wise average) avg_regress = np.sum(box_regress[order[matches]] * match_scores[:, np.newaxis], axis=0) / np.sum( match_scores) avg_rg_bins = np.round(np.sum(box_rg_bins[order[matches]] * match_scores) / np.sum(match_scores)) avg_rg_uncs = np.sum(box_rg_uncs[order[matches]] * match_scores) / np.sum(match_scores) else: avg_regress = np.array(float('NaN')) avg_rg_bins = np.array(float('NaN')) avg_rg_uncs = np.array(float('NaN')) # some clusters might have very low scores due to high amounts of missing predictions. # filter out the with a conservative threshold, to speed up evaluation. if avg_score > 0.01: keep_scores.append(avg_score) keep_coords.append(avg_coords) keep_n_missing.append((n_missing_preds / n_expected_preds * 100)) # relative keep_regress.append(avg_regress) keep_rg_uncs.append(avg_rg_uncs) keep_rg_bins.append(avg_rg_bins) # get index of all elements that were not matched and discard all others. inds = np.nonzero(ovr <= thresh)[0] inds_where = np.where(ovr<=thresh)[0] assert np.all(inds == inds_where), "inds_nonzero {} \ninds_where {}".format(inds, inds_where) order = order[inds] return keep_scores, keep_coords, keep_n_missing, keep_regress, keep_rg_bins, keep_rg_uncs def apply_nms_to_patient(inputs): in_patient_results_list, pid, class_dict, iou_thresh = inputs out_patient_results_list = [] # collect box predictions over batch dimension (slices) and store slice info as slice_ids. for batch in in_patient_results_list: batch_el_boxes = [] for cl in list(class_dict.keys()): det_boxes = [box for box in batch if (box['box_type'] == 'det' and box['box_pred_class_id'] == cl)] box_coords = np.array([box['box_coords'] for box in det_boxes]) box_scores = np.array([box['box_score'] for box in det_boxes]) if 0 not in box_scores.shape: keep_ix = mutils.nms_numpy(box_coords, box_scores, iou_thresh) else: keep_ix = [] batch_el_boxes += [det_boxes[ix] for ix in keep_ix] batch_el_boxes += [box for box in batch if box['box_type'] == 'gt'] out_patient_results_list.append(batch_el_boxes) assert len(in_patient_results_list) == len(out_patient_results_list), "batch dim needs to be maintained, in: {}, out {}".format(len(in_patient_results_list), len(out_patient_results_list)) return [out_patient_results_list, pid] def nms_2to3D(dets, thresh): """ Merges 2D boxes to 3D cubes. For this purpose, boxes of all slices are regarded as lying in one slice. An adaptation of Non-maximum suppression is applied where clusters are found (like in NMS) with the extra constraint that suppressed boxes have to have 'connected' z coordinates w.r.t the core slice (cluster center, highest scoring box, the prevailing box). 'connected' z-coordinates are determined as the z-coordinates with predictions until the first coordinate for which no prediction is found. example: a cluster of predictions was found overlap > iou thresh in xy (like NMS). The z-coordinate of the highest scoring box is 50. Other predictions have 23, 46, 48, 49, 51, 52, 53, 56, 57. Only the coordinates connected with 50 are clustered to one cube: 48, 49, 51, 52, 53. (46 not because nothing was found in 47, so 47 is a 'hole', which interrupts the connection). Only the boxes corresponding to these coordinates are suppressed. All others are kept for building of further clusters. This algorithm works better with a certain min_confidence of predictions, because low confidence (e.g. noisy/cluttery) predictions can break the relatively strong assumption of defining cubes' z-boundaries at the first 'hole' in the cluster. :param dets: (n_detections, (y1, x1, y2, x2, scores, slice_id) :param thresh: iou matchin threshold (like in NMS). :return: keep: (n_keep,) 1D tensor of indices to be kept. :return: keep_z: (n_keep, [z1, z2]) z-coordinates to be added to boxes, which are kept in order to form cubes. """ y1 = dets[:, 0] x1 = dets[:, 1] y2 = dets[:, 2] x2 = dets[:, 3] assert np.all(y1 <= y2) and np.all(x1 <= x2), """"the definition of the coordinates is crucially important here: where maximum is taken needs to be the lower coordinate""" scores = dets[:, -2] slice_id = dets[:, -1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] keep_z = [] while order.size > 0: # order is the sorted index. maps order to index: order[1] = 24 means (rank1, ix 24) i = order[0] # highest scoring element yy1 = np.maximum(y1[i], y1[order]) # highest scoring element still in >order<, is compared to itself: okay? xx1 = np.maximum(x1[i], x1[order]) yy2 = np.minimum(y2[i], y2[order]) xx2 = np.minimum(x2[i], x2[order]) h = np.maximum(0.0, yy2 - yy1 + 1) w = np.maximum(0.0, xx2 - xx1 + 1) inter = h * w iou = inter / (areas[i] + areas[order] - inter) matches = np.argwhere( iou > thresh) # get all the elements that match the current box and have a lower score slice_ids = slice_id[order[matches]] core_slice = slice_id[int(i)] upper_holes = [ii for ii in np.arange(core_slice, np.max(slice_ids)) if ii not in slice_ids] lower_holes = [ii for ii in np.arange(np.min(slice_ids), core_slice) if ii not in slice_ids] max_valid_slice_id = np.min(upper_holes) if len(upper_holes) > 0 else np.max(slice_ids) min_valid_slice_id = np.max(lower_holes) if len(lower_holes) > 0 else np.min(slice_ids) z_matches = matches[(slice_ids <= max_valid_slice_id) & (slice_ids >= min_valid_slice_id)] # expand by one z voxel since box content is surrounded w/o overlap, i.e., z-content computed as z2-z1 z1 = np.min(slice_id[order[z_matches]]) - 1 z2 = np.max(slice_id[order[z_matches]]) + 1 keep.append(i) keep_z.append([z1, z2]) order = np.delete(order, z_matches, axis=0) return keep, keep_z def apply_2d_3d_merging_to_patient(inputs): """ wrapper around 2Dto3D merging operation. Processes a single patient. Takes 2D patient results (slices in batch dimension) and returns 3D patient results (dummy batch dimension of 1). Applies an adaption of Non-Maximum Surpression (Detailed methodology is described in nms_2to3D). :return. results_dict_boxes: list over batch elements (1 in 3D). each element is a list over boxes, where each box is one dictionary: [[box_0, ...], [box_n,...]]. :return. pid: string. patient id. """ in_patient_results_list, pid, class_dict, merge_3D_iou = inputs out_patient_results_list = [] for cl in list(class_dict.keys()): det_boxes, slice_ids = [], [] # collect box predictions over batch dimension (slices) and store slice info as slice_ids. for batch_ix, batch in enumerate(in_patient_results_list): batch_element_det_boxes = [(ix, box) for ix, box in enumerate(batch) if (box['box_type'] == 'det' and box['box_pred_class_id'] == cl)] det_boxes += batch_element_det_boxes slice_ids += [batch_ix] * len(batch_element_det_boxes) box_coords = np.array([batch[1]['box_coords'] for batch in det_boxes]) box_scores = np.array([batch[1]['box_score'] for batch in det_boxes]) slice_ids = np.array(slice_ids) if 0 not in box_scores.shape: keep_ix, keep_z = nms_2to3D( np.concatenate((box_coords, box_scores[:, None], slice_ids[:, None]), axis=1), merge_3D_iou) else: keep_ix, keep_z = [], [] # store kept predictions in new results list and add corresponding z-dimension info to coordinates. for kix, kz in zip(keep_ix, keep_z): keep_box = det_boxes[kix][1] keep_box['box_coords'] = list(keep_box['box_coords']) + kz out_patient_results_list.append(keep_box) gt_boxes = [box for b in in_patient_results_list for box in b if box['box_type'] == 'gt'] if len(gt_boxes) > 0: assert np.all([len(box["box_coords"]) == 6 for box in gt_boxes]), "expanded preds to 3D but GT is 2D." out_patient_results_list += gt_boxes return [[out_patient_results_list], pid] # additional list wrapping is extra batch dim. class Predictor: """ Prediction pipeline: - receives a patched patient image (n_patches, c, y, x, (z)) from patient data loader. - forwards patches through model in chunks of batch_size. (method: batch_tiling_forward) - unmolds predictions (boxes and segmentations) to original patient coordinates. (method: spatial_tiling_forward) Ensembling (mode == 'test'): - for inference, forwards 4 mirrored versions of image to through model and unmolds predictions afterwards accordingly (method: data_aug_forward) - for inference, loads multiple parameter-sets of the trained model corresponding to different epochs. for each parameter-set loops over entire test set, runs prediction pipeline for each patient. (method: predict_test_set) Consolidation of predictions: - consolidates a patient's predictions (boxes, segmentations) collected over patches, data_aug- and temporal ensembling, performs clustering and weighted averaging (external function: apply_wbc_to_patient) to obtain consistent outptus. - for 2D networks, consolidates box predictions to 3D cubes via clustering (adaption of non-maximum surpression). (external function: apply_2d_3d_merging_to_patient) Ground truth handling: - dissmisses any ground truth boxes returned by the model (happens in validation mode, patch-based groundtruth) - if provided by data loader, adds patient-wise ground truth to the final predictions to be passed to the evaluator. """ def __init__(self, cf, net, logger, mode): self.cf = cf self.batch_size = cf.batch_size self.logger = logger self.mode = mode self.net = net self.n_ens = 1 self.rank_ix = '0' self.regress_flag = any(['regression' in task for task in self.cf.prediction_tasks]) if self.cf.merge_2D_to_3D_preds: assert self.cf.dim == 2, "Merge 2Dto3D only valid for 2D preds, but current dim is {}.".format(self.cf.dim) if self.mode == 'test': last_state_path = os.path.join(self.cf.fold_dir, 'last_state.pth') try: self.model_index = torch.load(last_state_path)["model_index"] self.model_index = self.model_index[self.model_index["rank"] <= self.cf.test_n_epochs] except FileNotFoundError: raise FileNotFoundError('no last_state/model_index file in fold directory. ' 'seems like you are trying to run testing without prior training...') self.n_ens = cf.test_n_epochs if self.cf.test_aug_axes is not None: self.n_ens *= (len(self.cf.test_aug_axes)+1) self.example_plot_dir = os.path.join(cf.test_dir, "example_plots") os.makedirs(self.example_plot_dir, exist_ok=True) def batch_tiling_forward(self, batch): """ calls the actual network forward method. in patch-based prediction, the batch dimension might be overladed with n_patches >> batch_size, which would exceed gpu memory. In this case, batches are processed in chunks of batch_size. validation mode calls the train method to monitor losses (returned ground truth objects are discarded). test mode calls the test forward method, no ground truth required / involved. :return. results_dict: stores the results for one patient. dictionary with keys: - 'boxes': list over batch elements. each element is a list over boxes, where each box is one dictionary: [[box_0, ...], [box_n,...]]. batch elements are slices for 2D predictions, and a dummy batch dimension of 1 for 3D predictions. - 'seg_preds': pixel-wise predictions. (b, 1, y, x, (z)) - loss / class_loss (only in validation mode) """ img = batch['data'] if img.shape[0] <= self.batch_size: if self.mode == 'val': # call training method to monitor losses results_dict = self.net.train_forward(batch, is_validation=True) # discard returned ground-truth boxes (also training info boxes). results_dict['boxes'] = [[box for box in b if box['box_type'] == 'det'] for b in results_dict['boxes']] elif self.mode == 'test': results_dict = self.net.test_forward(batch, return_masks=self.cf.return_masks_in_test) else: # needs batch tiling split_ixs = np.split(np.arange(img.shape[0]), np.arange(img.shape[0])[::self.batch_size]) chunk_dicts = [] for chunk_ixs in split_ixs[1:]: # first split is elements before 0, so empty b = {k: batch[k][chunk_ixs] for k in batch.keys() if (isinstance(batch[k], np.ndarray) and batch[k].shape[0] == img.shape[0])} if self.mode == 'val': chunk_dicts += [self.net.train_forward(b, is_validation=True)] else: chunk_dicts += [self.net.test_forward(b, return_masks=self.cf.return_masks_in_test)] results_dict = {} # flatten out batch elements from chunks ([chunk, chunk] -> [b, b, b, b, ...]) results_dict['boxes'] = [item for d in chunk_dicts for item in d['boxes']] results_dict['seg_preds'] = np.array([item for d in chunk_dicts for item in d['seg_preds']]) if self.mode == 'val': # if hasattr(self.cf, "losses_to_monitor"): # loss_names = self.cf.losses_to_monitor # else: # loss_names = {name for dic in chunk_dicts for name in dic if 'loss' in name} # estimate patient loss by mean over batch_chunks. Most similar to training loss. results_dict['torch_loss'] = torch.mean(torch.cat([d['torch_loss'] for d in chunk_dicts])) results_dict['class_loss'] = np.mean([d['class_loss'] for d in chunk_dicts]) # discard returned ground-truth boxes (also training info boxes). results_dict['boxes'] = [[box for box in b if box['box_type'] == 'det'] for b in results_dict['boxes']] return results_dict def spatial_tiling_forward(self, batch, patch_crops = None, n_aug='0'): """ forwards batch to batch_tiling_forward method and receives and returns a dictionary with results. if patch-based prediction, the results received from batch_tiling_forward will be on a per-patch-basis. this method uses the provided patch_crops to re-transform all predictions to whole-image coordinates. Patch-origin information of all box-predictions will be needed for consolidation, hence it is stored as 'patch_id', which is a unique string for each patch (also takes current data aug and temporal epoch instances into account). all box predictions get additional information about the amount overlapping patches at the respective position (used for consolidation). :return. results_dict: stores the results for one patient. dictionary with keys: - 'boxes': list over batch elements. each element is a list over boxes, where each box is one dictionary: [[box_0, ...], [box_n,...]]. batch elements are slices for 2D predictions, and a dummy batch dimension of 1 for 3D predictions. - 'seg_preds': pixel-wise predictions. (b, 1, y, x, (z)) - monitor_values (only in validation mode) returned dict is a flattened version with 1 batch instance (3D) or slices (2D) """ if patch_crops is not None: #print("patch_crops not None, applying patch center factor") patches_dict = self.batch_tiling_forward(batch) results_dict = {'boxes': [[] for _ in range(batch['original_img_shape'][0])]} #bc of ohe--> channel dim of seg has size num_classes out_seg_shape = list(batch['original_img_shape']) out_seg_shape[1] = patches_dict["seg_preds"].shape[1] out_seg_preds = np.zeros(out_seg_shape, dtype=np.float16) patch_overlap_map = np.zeros_like(out_seg_preds, dtype='uint8') for pix, pc in enumerate(patch_crops): if self.cf.dim == 3: out_seg_preds[:, :, pc[0]:pc[1], pc[2]:pc[3], pc[4]:pc[5]] += patches_dict['seg_preds'][pix] patch_overlap_map[:, :, pc[0]:pc[1], pc[2]:pc[3], pc[4]:pc[5]] += 1 elif self.cf.dim == 2: out_seg_preds[pc[4]:pc[5], :, pc[0]:pc[1], pc[2]:pc[3], ] += patches_dict['seg_preds'][pix] patch_overlap_map[pc[4]:pc[5], :, pc[0]:pc[1], pc[2]:pc[3], ] += 1 out_seg_preds[patch_overlap_map > 0] /= patch_overlap_map[patch_overlap_map > 0] results_dict['seg_preds'] = out_seg_preds for pix, pc in enumerate(patch_crops): patch_boxes = patches_dict['boxes'][pix] for box in patch_boxes: # add unique patch id for consolidation of predictions. box['patch_id'] = self.rank_ix + '_' + n_aug + '_' + str(pix) # boxes from the edges of a patch have a lower prediction quality, than the ones at patch-centers. # hence they will be down-weighted for consolidation, using the 'box_patch_center_factor', which is # obtained by a gaussian distribution over positions in the patch and average over spatial dimensions. # Also the info 'box_n_overlaps' is stored for consolidation, which represents the amount of # overlapping patches at the box's position. c = box['box_coords'] #box_centers = np.array([(c[ii] + c[ii+2])/2 for ii in range(len(c)//2)]) box_centers = [(c[ii] + c[ii + 2]) / 2 for ii in range(2)] if self.cf.dim == 3: box_centers.append((c[4] + c[5]) / 2) box['box_patch_center_factor'] = np.mean( [norm.pdf(bc, loc=pc, scale=pc * 0.8) * np.sqrt(2 * np.pi) * pc * 0.8 for bc, pc in zip(box_centers, np.array(self.cf.patch_size) / 2)]) if self.cf.dim == 3: c += np.array([pc[0], pc[2], pc[0], pc[2], pc[4], pc[4]]) int_c = [int(np.floor(ii)) if ix%2 == 0 else int(np.ceil(ii)) for ix, ii in enumerate(c)] box['box_n_overlaps'] = np.mean(patch_overlap_map[:, :, int_c[1]:int_c[3], int_c[0]:int_c[2], int_c[4]:int_c[5]]) results_dict['boxes'][0].append(box) else: c += np.array([pc[0], pc[2], pc[0], pc[2]]) int_c = [int(np.floor(ii)) if ix % 2 == 0 else int(np.ceil(ii)) for ix, ii in enumerate(c)] box['box_n_overlaps'] = np.mean( patch_overlap_map[pc[4], :, int_c[1]:int_c[3], int_c[0]:int_c[2]]) results_dict['boxes'][pc[4]].append(box) if self.mode == 'val': results_dict['torch_loss'] = patches_dict['torch_loss'] results_dict['class_loss'] = patches_dict['class_loss'] else: results_dict = self.batch_tiling_forward(batch) for b in results_dict['boxes']: for box in b: box['box_patch_center_factor'] = 1 box['box_n_overlaps'] = 1 box['patch_id'] = self.rank_ix + '_' + n_aug return results_dict def data_aug_forward(self, batch): """ in val_mode: passes batch through to spatial_tiling method without data_aug. in test_mode: if cf.test_aug is set in configs, createst 4 mirrored versions of the input image, passes all of them to the next processing step (spatial_tiling method) and re-transforms returned predictions to original image version. :return. results_dict: stores the results for one patient. dictionary with keys: - 'boxes': list over batch elements. each element is a list over boxes, where each box is one dictionary: [[box_0, ...], [box_n,...]]. batch elements are slices for 2D predictions, and a dummy batch dimension of 1 for 3D predictions. - 'seg_preds': pixel-wise predictions. (b, 1, y, x, (z)) - loss / class_loss (only in validation mode) """ patch_crops = batch['patch_crop_coords'] if self.patched_patient else None results_list = [self.spatial_tiling_forward(batch, patch_crops)] org_img_shape = batch['original_img_shape'] if self.mode == 'test' and self.cf.test_aug_axes is not None: if isinstance(self.cf.test_aug_axes, (int, float)): self.cf.test_aug_axes = (self.cf.test_aug_axes,) #assert np.all(np.array(self.cf.test_aug_axes)= coords[0], [coords, chunk_dict['boxes'][ix][boxix]['box_coords']] assert coords[3] >= coords[1], [coords, chunk_dict['boxes'][ix][boxix]['box_coords']] chunk_dict['boxes'][ix][boxix]['box_coords'] = coords # re-transform segmentation predictions. chunk_dict['seg_preds'] = np.flip(chunk_dict['seg_preds'], axis=axis) elif hasattr(sp_axis, "__iter__") and tuple(sp_axis)==(0,1) or tuple(sp_axis)==(1,0): #NEED: mirrored patch crops are given as [(y-axis), (x-axis), (y-,x-axis)], obey this order! # mirroring along two axes at same time batch['data'] = np.flip(np.flip(img, axis=axis[0]), axis=axis[1]).copy() chunk_dict = self.spatial_tiling_forward(batch, mirrored_patch_crops[n_aug], n_aug=str(n_aug)) # re-transform coordinates. for ix in range(len(chunk_dict['boxes'])): for boxix in range(len(chunk_dict['boxes'][ix])): coords = chunk_dict['boxes'][ix][boxix]['box_coords'].copy() coords[sp_axis[0]] = org_img_shape[axis[0]] - chunk_dict['boxes'][ix][boxix]['box_coords'][sp_axis[0]+2] coords[sp_axis[0]+2] = org_img_shape[axis[0]] - chunk_dict['boxes'][ix][boxix]['box_coords'][sp_axis[0]] coords[sp_axis[1]] = org_img_shape[axis[1]] - chunk_dict['boxes'][ix][boxix]['box_coords'][sp_axis[1]+2] coords[sp_axis[1]+2] = org_img_shape[axis[1]] - chunk_dict['boxes'][ix][boxix]['box_coords'][sp_axis[1]] assert coords[2] >= coords[0], [coords, chunk_dict['boxes'][ix][boxix]['box_coords']] assert coords[3] >= coords[1], [coords, chunk_dict['boxes'][ix][boxix]['box_coords']] chunk_dict['boxes'][ix][boxix]['box_coords'] = coords # re-transform segmentation predictions. chunk_dict['seg_preds'] = np.flip(np.flip(chunk_dict['seg_preds'], axis=axis[0]), axis=axis[1]).copy() else: raise Exception("Invalid axis type {} in test augs".format(type(axis))) results_list.append(chunk_dict) batch['data'] = img # aggregate all boxes/seg_preds per batch element from data_aug predictions. results_dict = {} results_dict['boxes'] = [[item for d in results_list for item in d['boxes'][batch_instance]] for batch_instance in range(org_img_shape[0])] # results_dict['seg_preds'] = np.array([[item for d in results_list for item in d['seg_preds'][batch_instance]] # for batch_instance in range(org_img_shape[0])]) results_dict['seg_preds'] = np.stack([dic['seg_preds'] for dic in results_list], axis=1) # needs segs probs in seg_preds entry: results_dict['seg_preds'] = np.sum(results_dict['seg_preds'], axis=1) #add up seg probs from different augs per class if self.mode == 'val': results_dict['torch_loss'] = results_list[0]['torch_loss'] results_dict['class_loss'] = results_list[0]['class_loss'] return results_dict def load_saved_predictions(self): """loads raw predictions saved by self.predict_test_set. aggregates and/or merges 2D boxes to 3D cubes for evaluation (if model predicts 2D but evaluation is run in 3D), according to settings config. :return: list_of_results_per_patient: list over patient results. each entry is a dict with keys: - 'boxes': list over batch elements. each element is a list over boxes, where each box is one dictionary: [[box_0, ...], [box_n,...]]. batch elements are slices for 2D predictions (if not merged to 3D), and a dummy batch dimension of 1 for 3D predictions. - 'batch_dices': dice scores as recorded in raw prediction results. - 'seg_preds': not implemented yet. could replace dices by seg preds to have raw seg info available, however would consume critically large memory amount. todo evaluation of instance/semantic segmentation. """ results_file = 'pred_results.pkl' if not self.cf.held_out_test_set else 'pred_results_held_out.pkl' if not self.cf.held_out_test_set or self.cf.eval_test_fold_wise: self.logger.info("loading saved predictions of fold {}".format(self.cf.fold)) with open(os.path.join(self.cf.fold_dir, results_file), 'rb') as handle: results_list = pickle.load(handle) box_results_list = [(res_dict["boxes"], pid) for res_dict, pid in results_list] da_factor = len(self.cf.test_aug_axes)+1 if self.cf.test_aug_axes is not None else 1 self.n_ens = self.cf.test_n_epochs * da_factor self.logger.info('loaded raw test set predictions with n_patients = {} and n_ens = {}'.format( len(results_list), self.n_ens)) else: self.logger.info("loading saved predictions of hold-out test set") fold_dirs = sorted([os.path.join(self.cf.exp_dir, f) for f in os.listdir(self.cf.exp_dir) if os.path.isdir(os.path.join(self.cf.exp_dir, f)) and f.startswith("fold")]) results_list = [] folds_loaded = 0 for fold in range(self.cf.n_cv_splits): fold_dir = os.path.join(self.cf.exp_dir, 'fold_{}'.format(fold)) if fold_dir in fold_dirs: with open(os.path.join(fold_dir, results_file), 'rb') as handle: fold_list = pickle.load(handle) results_list += fold_list folds_loaded += 1 else: self.logger.info("Skipping fold {} since no saved predictions found.".format(fold)) box_results_list = [] for res_dict, pid in results_list: #without filtering gt out: box_results_list.append((res_dict['boxes'], pid)) #it's usually not right to filter out gts here, is it? da_factor = len(self.cf.test_aug_axes)+1 if self.cf.test_aug_axes is not None else 1 self.n_ens = self.cf.test_n_epochs * da_factor * folds_loaded # -------------- aggregation of boxes via clustering ----------------- if self.cf.clustering == "wbc": self.logger.info('applying WBC to test-set predictions with iou {} and n_ens {} over {} patients'.format( self.cf.clustering_iou, self.n_ens, len(box_results_list))) mp_inputs = [[self.regress_flag, ii[0], ii[1], self.cf.class_dict, self.cf.clustering_iou, self.n_ens] for ii in box_results_list] del box_results_list pool = Pool(processes=self.cf.n_workers) box_results_list = pool.map(apply_wbc_to_patient, mp_inputs, chunksize=1) pool.close() pool.join() del mp_inputs elif self.cf.clustering == "nms": self.logger.info('applying standard NMS to test-set predictions with iou {} over {} patients.'.format( self.cf.clustering_iou, len(box_results_list))) pool = Pool(processes=self.cf.n_workers) mp_inputs = [[ii[0], ii[1], self.cf.class_dict, self.cf.clustering_iou] for ii in box_results_list] del box_results_list box_results_list = pool.map(apply_nms_to_patient, mp_inputs, chunksize=1) pool.close() pool.join() del mp_inputs if self.cf.merge_2D_to_3D_preds: self.logger.info('applying 2Dto3D merging to test-set predictions with iou = {}.'.format(self.cf.merge_3D_iou)) pool = Pool(processes=self.cf.n_workers) mp_inputs = [[ii[0], ii[1], self.cf.class_dict, self.cf.merge_3D_iou] for ii in box_results_list] box_results_list = pool.map(apply_2d_3d_merging_to_patient, mp_inputs, chunksize=1) pool.close() pool.join() del mp_inputs for ix in range(len(results_list)): assert np.all(results_list[ix][1] == box_results_list[ix][1]), "pid mismatch between loaded and aggregated results" results_list[ix][0]["boxes"] = box_results_list[ix][0] return results_list # holds (results_dict, pid) def predict_patient(self, batch): """ predicts one patient. called either directly via loop over validation set in exec.py (mode=='val') or from self.predict_test_set (mode=='test). in val mode: adds 3D ground truth info to predictions and runs consolidation and 2Dto3D merging of predictions. in test mode: returns raw predictions (ground truth addition, consolidation, 2D to 3D merging are done in self.predict_test_set, because patient predictions across several epochs might be needed to be collected first, in case of temporal ensembling). :return. results_dict: stores the results for one patient. dictionary with keys: - 'boxes': list over batch elements. each element is a list over boxes, where each box is one dictionary: [[box_0, ...], [box_n,...]]. batch elements are slices for 2D predictions (if not merged to 3D), and a dummy batch dimension of 1 for 3D predictions. - 'seg_preds': pixel-wise predictions. (b, 1, y, x, (z)) - loss / class_loss (only in validation mode) """ #if self.mode=="test": # self.logger.info('predicting patient {} for fold {} '.format(np.unique(batch['pid']), self.cf.fold)) # True if patient is provided in patches and predictions need to be tiled. self.patched_patient = 'patch_crop_coords' in list(batch.keys()) # forward batch through prediction pipeline. results_dict = self.data_aug_forward(batch) #has seg probs in entry 'seg_preds' if self.mode == 'val': for b in range(batch['patient_bb_target'].shape[0]): for t in range(len(batch['patient_bb_target'][b])): gt_box = {'box_type': 'gt', 'box_coords': batch['patient_bb_target'][b][t], 'class_targets': batch['patient_class_targets'][b][t]} for name in self.cf.roi_items: gt_box.update({name : batch['patient_'+name][b][t]}) results_dict['boxes'][b].append(gt_box) if 'dice' in self.cf.metrics: if self.patched_patient: assert 'patient_seg' in batch.keys(), "Results_dict preds are in original patient shape." results_dict['batch_dices'] = mutils.dice_per_batch_and_class( results_dict['seg_preds'], batch["patient_seg"] if self.patched_patient else batch['seg'], self.cf.num_seg_classes, convert_to_ohe=True) if self.patched_patient and self.cf.clustering == "wbc": wbc_input = [self.regress_flag, results_dict['boxes'], 'dummy_pid', self.cf.class_dict, self.cf.clustering_iou, self.n_ens] results_dict['boxes'] = apply_wbc_to_patient(wbc_input)[0] elif self.patched_patient: nms_inputs = [results_dict['boxes'], 'dummy_pid', self.cf.class_dict, self.cf.clustering_iou] results_dict['boxes'] = apply_nms_to_patient(nms_inputs)[0] if self.cf.merge_2D_to_3D_preds: results_dict['2D_boxes'] = results_dict['boxes'] merge_dims_inputs = [results_dict['boxes'], 'dummy_pid', self.cf.class_dict, self.cf.merge_3D_iou] results_dict['boxes'] = apply_2d_3d_merging_to_patient(merge_dims_inputs)[0] return results_dict def predict_test_set(self, batch_gen, return_results=True): """ wrapper around test method, which loads multiple (or one) epoch parameters (temporal ensembling), loops through the test set and collects predictions per patient. Also flattens the results per patient and epoch and adds optional ground truth boxes for evaluation. Saves out the raw result list for later analysis and optionally consolidates and returns predictions immediately. :return: (optionally) list_of_results_per_patient: list over patient results. each entry is a dict with keys: - 'boxes': list over batch elements. each element is a list over boxes, where each box is one dictionary: [[box_0, ...], [box_n,...]]. batch elements are slices for 2D predictions (if not merged to 3D), and a dummy batch dimension of 1 for 3D predictions. - 'seg_preds': not implemented yet. todo evaluation of instance/semantic segmentation. """ # -------------- raw predicting ----------------- dict_of_patients_results = OrderedDict() set_of_result_types = set() self.model_index = self.model_index.sort_values(by="rank") # get paths of all parameter sets to be loaded for temporal ensembling. (or just one for no temp. ensembling). weight_paths = [os.path.join(self.cf.fold_dir, file_name) for file_name in self.model_index["file_name"]] for rank_ix, weight_path in enumerate(weight_paths): self.logger.info(('tmp ensembling over rank_ix:{} epoch:{}'.format(rank_ix, weight_path))) self.net.load_state_dict(torch.load(weight_path)) self.net.eval() self.rank_ix = str(rank_ix) with torch.no_grad(): plot_batches = np.random.choice(np.arange(batch_gen['n_test']), size=min(batch_gen['n_test'], self.cf.n_test_plots), replace=False) for i in range(batch_gen['n_test']): batch = next(batch_gen['test']) pid = np.unique(batch['pid']) assert len(pid)==1 pid = pid[0] if not pid in dict_of_patients_results.keys(): # store batch info in patient entry of results dict. dict_of_patients_results[pid] = {} dict_of_patients_results[pid]['results_dicts'] = [] dict_of_patients_results[pid]['patient_bb_target'] = batch['patient_bb_target'] for name in self.cf.roi_items: dict_of_patients_results[pid]["patient_"+name] = batch["patient_"+name] stime = time.time() results_dict = self.predict_patient(batch) #only holds "boxes", "seg_preds" # needs ohe seg probs in seg_preds entry: results_dict['seg_preds'] = np.argmax(results_dict['seg_preds'], axis=1)[:,np.newaxis] - self.logger.info("predicting patient {} with weight rank {} (progress: {}/{}) took {:.2f}s".format( - str(pid), rank_ix, (rank_ix)*batch_gen['n_test']+(i+1), len(weight_paths)*batch_gen['n_test'], time.time()-stime)) + print("\rpredicting patient {} with weight rank {} (progress: {}/{}) took {:.2f}s".format( + str(pid), rank_ix, (rank_ix)*batch_gen['n_test']+(i+1), len(weight_paths)*batch_gen['n_test'], + time.time()-stime), end="", flush=True) if i in plot_batches and (not self.patched_patient or 'patient_data' in batch.keys()): try: # view qualitative results of random test case self.logger.time("test_plot") out_file = os.path.join(self.example_plot_dir, 'batch_example_test_{}_rank_{}.png'.format(self.cf.fold, rank_ix)) utils.split_off_process(plg.view_batch, self.cf, batch, results_dict, has_colorchannels=self.cf.has_colorchannels, show_gt_labels=True, show_seg_ids='dice' in self.cf.metrics, get_time="test-example plot", out_file=out_file) except Exception as e: self.logger.info("WARNING: error in view_batch: {}".format(e)) if 'dice' in self.cf.metrics: if self.patched_patient: assert 'patient_seg' in batch.keys(), "Results_dict preds are in original patient shape." results_dict['batch_dices'] = mutils.dice_per_batch_and_class( results_dict['seg_preds'], batch["patient_seg"] if self.patched_patient else batch['seg'], self.cf.num_seg_classes, convert_to_ohe=True) dict_of_patients_results[pid]['results_dicts'].append({k:v for k,v in results_dict.items() if k in ["boxes", "batch_dices"]}) # collect result types to know which ones to look for when saving set_of_result_types.update(dict_of_patients_results[pid]['results_dicts'][-1].keys()) # -------------- re-order, save raw results ----------------- self.logger.info('finished predicting test set. starting aggregation of predictions.') results_per_patient = [] for pid, p_dict in dict_of_patients_results.items(): # dict_of_patients_results[pid]['results_list'] has length batch['n_test'] results_dict = {} # collect all boxes/seg_preds of same batch_instance over temporal instances. b_size = len(p_dict['results_dicts'][0]["boxes"]) for res_type in [rtype for rtype in set_of_result_types if rtype in ["boxes", "batch_dices"]]:#, "seg_preds"]]: if not 'batch' in res_type: #assume it's results on batch-element basis results_dict[res_type] = [[item for rank_dict in p_dict['results_dicts'] for item in rank_dict[res_type][batch_instance]] for batch_instance in range(b_size)] else: results_dict[res_type] = [] for dict in p_dict['results_dicts']: if 'dice' in res_type: item = dict[res_type] #dict['batch_dices'] has shape (num_seg_classes,) assert len(item) == self.cf.num_seg_classes, \ "{}, {}".format(len(item), self.cf.num_seg_classes) else: raise NotImplementedError results_dict[res_type].append(item) # rdict[dice] shape (n_rank_epochs (n_saved_ranks), nsegclasses) # calc mean over test epochs so inline with shape from sampling results_dict[res_type] = np.mean(results_dict[res_type], axis=0) #maybe error type with other than dice if not hasattr(self.cf, "eval_test_separately") or not self.cf.eval_test_separately: # add unpatched 2D or 3D (if dim==3 or merge_2D_to_3D) ground truth boxes for evaluation. for b in range(p_dict['patient_bb_target'].shape[0]): for targ in range(len(p_dict['patient_bb_target'][b])): gt_box = {'box_type': 'gt', 'box_coords':p_dict['patient_bb_target'][b][targ], 'class_targets': p_dict['patient_class_targets'][b][targ]} for name in self.cf.roi_items: gt_box.update({name: p_dict["patient_"+name][b][targ]}) results_dict['boxes'][b].append(gt_box) results_per_patient.append([results_dict, pid]) out_string = 'pred_results_held_out' if self.cf.held_out_test_set else 'pred_results' with open(os.path.join(self.cf.fold_dir, '{}.pkl'.format(out_string)), 'wb') as handle: pickle.dump(results_per_patient, handle) if return_results: # -------------- results processing, clustering, etc. ----------------- final_patient_box_results = [ (res_dict["boxes"], pid) for res_dict,pid in results_per_patient ] if self.cf.clustering == "wbc": self.logger.info('applying WBC to test-set predictions with iou = {} and n_ens = {}.'.format( self.cf.clustering_iou, self.n_ens)) mp_inputs = [[self.regress_flag, ii[0], ii[1], self.cf.class_dict, self.cf.clustering_iou, self.n_ens] for ii in final_patient_box_results] del final_patient_box_results pool = Pool(processes=self.cf.n_workers) final_patient_box_results = pool.map(apply_wbc_to_patient, mp_inputs, chunksize=1) pool.close() pool.join() del mp_inputs elif self.cf.clustering == "nms": self.logger.info('applying standard NMS to test-set predictions with iou = {}.'.format(self.cf.clustering_iou)) pool = Pool(processes=self.cf.n_workers) mp_inputs = [[ii[0], ii[1], self.cf.class_dict, self.cf.clustering_iou] for ii in final_patient_box_results] del final_patient_box_results final_patient_box_results = pool.map(apply_nms_to_patient, mp_inputs, chunksize=1) pool.close() pool.join() del mp_inputs if self.cf.merge_2D_to_3D_preds: self.logger.info('applying 2D-to-3D merging to test-set predictions with iou = {}.'.format(self.cf.merge_3D_iou)) mp_inputs = [[ii[0], ii[1], self.cf.class_dict, self.cf.merge_3D_iou] for ii in final_patient_box_results] del final_patient_box_results pool = Pool(processes=self.cf.n_workers) final_patient_box_results = pool.map(apply_2d_3d_merging_to_patient, mp_inputs, chunksize=1) pool.close() pool.join() del mp_inputs # final_patient_box_results holds [avg_boxes, pid] if wbc for ix in range(len(results_per_patient)): assert results_per_patient[ix][1] == final_patient_box_results[ix][1], "should be same pid" results_per_patient[ix][0]["boxes"] = final_patient_box_results[ix][0] # results_per_patient = [(res_dict["boxes"] = boxes, pid) for (boxes,pid) in final_patient_box_results] return results_per_patient # holds list of (results_dict, pid)