diff --git a/datasets/toy/configs.py b/datasets/toy/configs.py index 6ae0db0..e67a249 100644 --- a/datasets/toy/configs.py +++ b/datasets/toy/configs.py @@ -1,490 +1,490 @@ #!/usr/bin/env python # Copyright 2019 Division of Medical Image Computing, German Cancer Research Center (DKFZ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import sys import os sys.path.append(os.path.dirname(os.path.realpath(__file__))) import numpy as np from default_configs import DefaultConfigs from collections import namedtuple boxLabel = namedtuple('boxLabel', ["name", "color"]) Label = namedtuple("Label", ['id', 'name', 'shape', 'radius', 'color', 'regression', 'ambiguities', 'gt_distortion']) binLabel = namedtuple("binLabel", ['id', 'name', 'color', 'bin_vals']) class Configs(DefaultConfigs): def __init__(self, server_env=None): super(Configs, self).__init__(server_env) ######################### # Prepro # ######################### self.pp_rootdir = os.path.join('/mnt/HDD2TB/Documents/data/toy', "cyl1ps_dev_exact") self.pp_npz_dir = self.pp_rootdir+"_npz" self.pre_crop_size = [320,320,8] #y,x,z; determines pp data shape (2D easily implementable, but only 3D for now) self.min_2d_radius = 6 #in pixels self.n_train_samples, self.n_test_samples = 80, 80 # not actually real one-hot encoding (ohe) but contains more info: roi-overlap only within classes. self.pp_create_ohe_seg = False self.pp_empty_samples_ratio = 0.1 self.pp_place_radii_mid_bin = True self.pp_only_distort_2d = True # outer-most intensity of blurred radii, relative to inner-object intensity. <1 for decreasing, > 1 for increasing. # e.g.: setting 0.1 means blurred edge has min intensity 10% as large as inner-object intensity. self.pp_blur_min_intensity = 0.2 self.max_instances_per_sample = 3 #how many max instances over all classes per sample (img if 2d, vol if 3d) self.max_instances_per_class = self.max_instances_per_sample # how many max instances per image per class self.noise_scale = 0. # std-dev of gaussian noise self.ambigs_sampling = "gaussian" #"gaussian" or "uniform" """ radius_calib: gt distort for calibrating uncertainty. Range of gt distortion is inferable from image by distinguishing it from the rest of the object. blurring width around edge will be shifted so that symmetric rel to orig radius. blurring scale: if self.ambigs_sampling is uniform, distribution's non-zero range (b-a) will be sqrt(12)*scale since uniform dist has variance (b-a)²/12. b,a will be placed symmetrically around unperturbed radius. if sampling is gaussian, then scale parameter sets one std dev, i.e., blurring width will be orig_radius * std_dev * 2. """ self.ambiguities = { #set which classes to apply which ambs to below in class labels #choose out of: 'outer_radius', 'inner_radius', 'radii_relations'. #kind #probability #scale (gaussian std, relative to unperturbed value) #"outer_radius": (1., 0.5), #"outer_radius_xy": (1., 0.5), #"inner_radius": (0.5, 0.1), #"radii_relations": (0.5, 0.1), "radius_calib": (1., 1./6) } # shape choices: 'cylinder', 'block' # id, name, shape, radius, color, regression, ambiguities, gt_distortion self.pp_classes = [Label(1, 'cylinder', 'cylinder', ((6,6,1),(40,40,8)), (*self.blue, 1.), "radius_2d", (), ()), #Label(2, 'block', 'block', ((6,6,1),(40,40,8)), (*self.aubergine,1.), "radii_2d", (), ('radius_calib',)) ] ######################### # I/O # ######################### self.data_sourcedir = '/mnt/HDD2TB/Documents/data/toy/cyl1ps_exact' if server_env: self.data_sourcedir = '/datasets/data_ramien/toy/cyl1ps_exact_npz' self.test_data_sourcedir = os.path.join(self.data_sourcedir, 'test') self.data_sourcedir = os.path.join(self.data_sourcedir, "train") self.info_df_name = 'info_df.pickle' # one out of ['mrcnn', 'retina_net', 'retina_unet', 'detection_unet', 'ufrcnn', 'detection_fpn']. - self.model = 'mrcnn' + self.model = 'retina_unet' self.model_path = 'models/{}.py'.format(self.model if not 'retina' in self.model else 'retina_net') self.model_path = os.path.join(self.source_dir, self.model_path) ######################### # Architecture # ######################### # one out of [2, 3]. dimension the model operates in. self.dim = 2 # 'class', 'regression', 'regression_bin', 'regression_ken_gal' # currently only tested mode is a single-task at a time (i.e., only one task in below list) # but, in principle, tasks could be combined (e.g., object classes and regression per class) self.prediction_tasks = ['class',] self.start_filts = 48 if self.dim == 2 else 18 self.end_filts = self.start_filts * 4 if self.dim == 2 else self.start_filts * 2 self.res_architecture = 'resnet50' # 'resnet101' , 'resnet50' self.norm = 'instance_norm' # one of None, 'instance_norm', 'batch_norm' self.relu = 'relu' # one of 'xavier_uniform', 'xavier_normal', or 'kaiming_normal', None (=default = 'kaiming_uniform') self.weight_init = None self.regression_n_features = 1 # length of regressor target vector ######################### # Data Loader # ######################### self.num_epochs = 32 self.num_train_batches = 120 if self.dim == 2 else 80 - self.batch_size = 16 if self.dim == 2 else 8 + self.batch_size = 12 if self.dim == 2 else 8 self.n_cv_splits = 4 # select modalities from preprocessed data self.channels = [0] self.n_channels = len(self.channels) # which channel (mod) to show as bg in plotting, will be extra added to batch if not in self.channels self.plot_bg_chan = 0 self.crop_margin = [20, 20, 1] # has to be smaller than respective patch_size//2 self.patch_size_2D = self.pre_crop_size[:2] self.patch_size_3D = self.pre_crop_size[:2]+[8] # patch_size to be used for training. pre_crop_size is the patch_size before data augmentation. self.patch_size = self.patch_size_2D if self.dim == 2 else self.patch_size_3D # ratio of free sampled batch elements before class balancing is triggered # (>0 to include "empty"/background patches.) self.batch_random_ratio = 0.2 self.balance_target = "class_targets" if 'class' in self.prediction_tasks else "rg_bin_targets" self.observables_patient = [] self.observables_rois = [] self.seed = 3 #for generating folds ############################# # Colors, Classes, Legends # ############################# self.plot_frequency = 1 binary_bin_labels = [binLabel(1, 'r<=25', (*self.green, 1.), (1,25)), binLabel(2, 'r>25', (*self.red, 1.), (25,))] quintuple_bin_labels = [binLabel(1, 'r2-10', (*self.green, 1.), (2,10)), binLabel(2, 'r10-20', (*self.yellow, 1.), (10,20)), binLabel(3, 'r20-30', (*self.orange, 1.), (20,30)), binLabel(4, 'r30-40', (*self.bright_red, 1.), (30,40)), binLabel(5, 'r>40', (*self.red, 1.), (40,))] # choose here if to do 2-way or 5-way regression-bin classification task_spec_bin_labels = quintuple_bin_labels self.class_labels = [ # regression: regression-task label, either value or "(x,y,z)_radius" or "radii". # ambiguities: name of above defined ambig to apply to image data (not gt); need to be iterables! # gt_distortion: name of ambig to apply to gt only; needs to be iterable! # #id #name #shape #radius #color #regression #ambiguities #gt_distortion Label( 0, 'bg', None, (0, 0, 0), (*self.white, 0.), (0, 0, 0), (), ())] if "class" in self.prediction_tasks: self.class_labels += self.pp_classes else: self.class_labels += [Label(1, 'object', 'object', ('various',), (*self.orange, 1.), ('radius_2d',), ("various",), ('various',))] if any(['regression' in task for task in self.prediction_tasks]): self.bin_labels = [binLabel(0, 'bg', (*self.white, 1.), (0,))] self.bin_labels += task_spec_bin_labels self.bin_id2label = {label.id: label for label in self.bin_labels} bins = [(min(label.bin_vals), max(label.bin_vals)) for label in self.bin_labels] self.bin_id2rg_val = {ix: [np.mean(bin)] for ix, bin in enumerate(bins)} self.bin_edges = [(bins[i][1] + bins[i + 1][0]) / 2 for i in range(len(bins) - 1)] self.bin_dict = {label.id: label.name for label in self.bin_labels if label.id != 0} if self.class_specific_seg: self.seg_labels = self.class_labels self.box_type2label = {label.name: label for label in self.box_labels} self.class_id2label = {label.id: label for label in self.class_labels} self.class_dict = {label.id: label.name for label in self.class_labels if label.id != 0} self.seg_id2label = {label.id: label for label in self.seg_labels} self.cmap = {label.id: label.color for label in self.seg_labels} self.plot_prediction_histograms = True self.plot_stat_curves = False self.has_colorchannels = False self.plot_class_ids = True self.num_classes = len(self.class_dict) self.num_seg_classes = len(self.seg_labels) ######################### # Data Augmentation # ######################### self.do_aug = True self.da_kwargs = { 'mirror': True, 'mirror_axes': tuple(np.arange(0, self.dim, 1)), 'do_elastic_deform': False, 'alpha': (500., 1500.), 'sigma': (40., 45.), 'do_rotation': False, 'angle_x': (0., 2 * np.pi), 'angle_y': (0., 0), 'angle_z': (0., 0), 'do_scale': False, 'scale': (0.8, 1.1), 'random_crop': False, 'rand_crop_dist': (self.patch_size[0] / 2. - 3, self.patch_size[1] / 2. - 3), 'border_mode_data': 'constant', 'border_cval_data': 0, 'order_data': 1 } if self.dim == 3: self.da_kwargs['do_elastic_deform'] = False self.da_kwargs['angle_x'] = (0, 0.0) self.da_kwargs['angle_y'] = (0, 0.0) # must be 0!! self.da_kwargs['angle_z'] = (0., 2 * np.pi) ######################### # Schedule / Selection # ######################### # decide whether to validate on entire patient volumes (like testing) or sampled patches (like training) # the former is morge accurate, while the latter is faster (depending on volume size) self.val_mode = 'val_sampling' # one of 'val_sampling' , 'val_patient' if self.val_mode == 'val_patient': self.max_val_patients = 220 # if 'all' iterates over entire val_set once. if self.val_mode == 'val_sampling': self.num_val_batches = 25 if self.dim==2 else 15 self.save_n_models = 2 self.min_save_thresh = 1 if self.dim == 2 else 1 # =wait time in epochs if "class" in self.prediction_tasks: self.model_selection_criteria = {name + "_ap": 1. for name in self.class_dict.values()} elif any("regression" in task for task in self.prediction_tasks): self.model_selection_criteria = {name + "_ap": 0.2 for name in self.class_dict.values()} self.model_selection_criteria.update({name + "_avp": 0.8 for name in self.class_dict.values()}) self.lr_decay_factor = 0.5 self.scheduling_patience = int(self.num_epochs / 5) self.weight_decay = 1e-5 self.clip_norm = None # number or None ######################### # Testing / Plotting # ######################### self.test_aug_axes = (0,1,(0,1)) # None or list: choices are 0,1,(0,1) self.held_out_test_set = True self.max_test_patients = "all" # number or "all" for all self.test_against_exact_gt = not 'exact' in self.data_sourcedir self.val_against_exact_gt = False # True is an unrealistic --> irrelevant scenario. self.report_score_level = ['rois'] # 'patient' or 'rois' (incl) self.patient_class_of_interest = 1 self.patient_bin_of_interest = 2 self.eval_bins_separately = False#"additionally" if not 'class' in self.prediction_tasks else False self.metrics = ['ap', 'auc', 'dice'] if any(['regression' in task for task in self.prediction_tasks]): self.metrics += ['avp', 'rg_MAE_weighted', 'rg_MAE_weighted_tp', 'rg_bin_accuracy_weighted', 'rg_bin_accuracy_weighted_tp'] if 'aleatoric' in self.model: self.metrics += ['rg_uncertainty', 'rg_uncertainty_tp', 'rg_uncertainty_tp_weighted'] self.evaluate_fold_means = True self.ap_match_ious = [0.5] # threshold(s) for considering a prediction as true positive self.min_det_thresh = 0.3 self.model_max_iou_resolution = 0.2 # aggregation method for test and val_patient predictions. # wbc = weighted box clustering as in https://arxiv.org/pdf/1811.08661.pdf, # nms = standard non-maximum suppression, or None = no clustering self.clustering = 'wbc' # iou thresh (exclusive!) for regarding two preds as concerning the same ROI self.clustering_iou = self.model_max_iou_resolution # has to be larger than desired possible overlap iou of model predictions self.merge_2D_to_3D_preds = False self.merge_3D_iou = self.model_max_iou_resolution self.n_test_plots = 1 # per fold and rank self.test_n_epochs = self.save_n_models # should be called n_test_ens, since is number of models to ensemble over during testing # is multiplied by (1 + nr of test augs) ######################### # Assertions # ######################### if not 'class' in self.prediction_tasks: assert self.num_classes == 1 ######################### # Add model specifics # ######################### {'mrcnn': self.add_mrcnn_configs, 'mrcnn_aleatoric': self.add_mrcnn_configs, 'retina_net': self.add_mrcnn_configs, 'retina_unet': self.add_mrcnn_configs, 'detection_unet': self.add_det_unet_configs, 'detection_fpn': self.add_det_fpn_configs }[self.model]() def rg_val_to_bin_id(self, rg_val): #only meant for isotropic radii!! # only 2D radii (x and y dims) or 1D (x or y) are expected return np.round(np.digitize(rg_val, self.bin_edges).mean()) def add_det_fpn_configs(self): - self.learning_rate = [5 * 1e-4] * self.num_epochs + self.learning_rate = [1 * 1e-4] * self.num_epochs self.dynamic_lr_scheduling = True self.scheduling_criterion = 'torch_loss' self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max' self.n_roi_candidates = 4 if self.dim == 2 else 6 # max number of roi candidates to identify per image (slice in 2D, volume in 3D) # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce') self.seg_loss_mode = 'wce' self.wce_weights = [1] * self.num_seg_classes if 'dice' in self.seg_loss_mode else [0.1, 1] self.fp_dice_weight = 1 if self.dim == 2 else 1 # if <1, false positive predictions in foreground are penalized less. self.detection_min_confidence = 0.05 # how to determine score of roi: 'max' or 'median' self.score_det = 'max' def add_det_unet_configs(self): - self.learning_rate = [5 * 1e-4] * self.num_epochs + self.learning_rate = [1 * 1e-4] * self.num_epochs self.dynamic_lr_scheduling = True self.scheduling_criterion = "torch_loss" self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max' # max number of roi candidates to identify per image (slice in 2D, volume in 3D) self.n_roi_candidates = 4 if self.dim == 2 else 6 # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce') self.seg_loss_mode = 'wce' self.wce_weights = [1] * self.num_seg_classes if 'dice' in self.seg_loss_mode else [0.1, 1] # if <1, false positive predictions in foreground are penalized less. self.fp_dice_weight = 1 if self.dim == 2 else 1 self.detection_min_confidence = 0.05 # how to determine score of roi: 'max' or 'median' self.score_det = 'max' self.init_filts = 32 self.kernel_size = 3 # ks for horizontal, normal convs self.kernel_size_m = 2 # ks for max pool self.pad = "same" # "same" or integer, padding of horizontal convs def add_mrcnn_configs(self): self.learning_rate = [1e-4] * self.num_epochs self.dynamic_lr_scheduling = True # with scheduler set in exec self.scheduling_criterion = max(self.model_selection_criteria, key=self.model_selection_criteria.get) self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max' # number of classes for network heads: n_foreground_classes + 1 (background) self.head_classes = self.num_classes + 1 if 'class' in self.prediction_tasks else 2 # feed +/- n neighbouring slices into channel dimension. set to None for no context. self.n_3D_context = None if self.n_3D_context is not None and self.dim == 2: self.n_channels *= (self.n_3D_context * 2 + 1) self.detect_while_training = True # disable the re-sampling of mask proposals to original size for speed-up. # since evaluation is detection-driven (box-matching) and not instance segmentation-driven (iou-matching), # mask outputs are optional. self.return_masks_in_train = True self.return_masks_in_val = True self.return_masks_in_test = True # feature map strides per pyramid level are inferred from architecture. anchor scales are set accordingly. self.backbone_strides = {'xy': [4, 8, 16, 32], 'z': [1, 2, 4, 8]} # anchor scales are chosen according to expected object sizes in data set. Default uses only one anchor scale # per pyramid level. (outer list are pyramid levels (corresponding to BACKBONE_STRIDES), inner list are scales per level.) self.rpn_anchor_scales = {'xy': [[4], [8], [16], [32]], 'z': [[1], [2], [4], [8]]} # choose which pyramid levels to extract features from: P2: 0, P3: 1, P4: 2, P5: 3. self.pyramid_levels = [0, 1, 2, 3] # number of feature maps in rpn. typically lowered in 3D to save gpu-memory. self.n_rpn_features = 512 if self.dim == 2 else 64 # anchor ratios and strides per position in feature maps. self.rpn_anchor_ratios = [0.5, 1., 2.] self.rpn_anchor_stride = 1 # Threshold for first stage (RPN) non-maximum suppression (NMS): LOWER == HARDER SELECTION self.rpn_nms_threshold = max(0.8, self.model_max_iou_resolution) # loss sampling settings. self.rpn_train_anchors_per_image = 4 self.train_rois_per_image = 6 # per batch_instance self.roi_positive_ratio = 0.5 self.anchor_matching_iou = 0.8 # k negative example candidates are drawn from a pool of size k*shem_poolsize (stochastic hard-example mining), # where k<=#positive examples. self.shem_poolsize = 2 self.pool_size = (7, 7) if self.dim == 2 else (7, 7, 3) self.mask_pool_size = (14, 14) if self.dim == 2 else (14, 14, 5) self.mask_shape = (28, 28) if self.dim == 2 else (28, 28, 10) self.rpn_bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2]) self.bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2]) self.window = np.array([0, 0, self.patch_size[0], self.patch_size[1], 0, self.patch_size_3D[2]]) self.scale = np.array([self.patch_size[0], self.patch_size[1], self.patch_size[0], self.patch_size[1], self.patch_size_3D[2], self.patch_size_3D[2]]) # y1,x1,y2,x2,z1,z2 if self.dim == 2: self.rpn_bbox_std_dev = self.rpn_bbox_std_dev[:4] self.bbox_std_dev = self.bbox_std_dev[:4] self.window = self.window[:4] self.scale = self.scale[:4] self.plot_y_max = 1.5 self.n_plot_rpn_props = 5 if self.dim == 2 else 30 # per batch_instance (slice in 2D / patient in 3D) # pre-selection in proposal-layer (stage 1) for NMS-speedup. applied per batch element. self.pre_nms_limit = 2000 if self.dim == 2 else 4000 # n_proposals to be selected after NMS per batch element. too high numbers blow up memory if "detect_while_training" is True, # since proposals of the entire batch are forwarded through second stage as one "batch". self.roi_chunk_size = 1300 if self.dim == 2 else 500 self.post_nms_rois_training = 200 * (self.head_classes-1) if self.dim == 2 else 400 self.post_nms_rois_inference = 200 * (self.head_classes-1) # Final selection of detections (refine_detections) self.model_max_instances_per_batch_element = 9 if self.dim == 2 else 18 # per batch element and class. self.detection_nms_threshold = self.model_max_iou_resolution # needs to be > 0, otherwise all predictions are one cluster. self.model_min_confidence = 0.2 # iou for nms in box refining (directly after heads), should be >0 since ths>=x in mrcnn.py if self.dim == 2: self.backbone_shapes = np.array( [[int(np.ceil(self.patch_size[0] / stride)), int(np.ceil(self.patch_size[1] / stride))] for stride in self.backbone_strides['xy']]) else: self.backbone_shapes = np.array( [[int(np.ceil(self.patch_size[0] / stride)), int(np.ceil(self.patch_size[1] / stride)), int(np.ceil(self.patch_size[2] / stride_z))] for stride, stride_z in zip(self.backbone_strides['xy'], self.backbone_strides['z'] )]) if self.model == 'retina_net' or self.model == 'retina_unet': # whether to use focal loss or SHEM for loss-sample selection self.focal_loss = False # implement extra anchor-scales according to https://arxiv.org/abs/1708.02002 self.rpn_anchor_scales['xy'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in self.rpn_anchor_scales['xy']] self.rpn_anchor_scales['z'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in self.rpn_anchor_scales['z']] self.n_anchors_per_pos = len(self.rpn_anchor_ratios) * 3 # pre-selection of detections for NMS-speedup. per entire batch. self.pre_nms_limit = (500 if self.dim == 2 else 6250) * self.batch_size # anchor matching iou is lower than in Mask R-CNN according to https://arxiv.org/abs/1708.02002 self.anchor_matching_iou = 0.7 if self.model == 'retina_unet': self.operate_stride1 = True diff --git a/models/detection_fpn.py b/models/detection_fpn.py index b59e51d..4cf58ef 100644 --- a/models/detection_fpn.py +++ b/models/detection_fpn.py @@ -1,176 +1,176 @@ #!/usr/bin/env python # Copyright 2019 Division of Medical Image Computing, German Cancer Research Center (DKFZ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ Unet-like Backbone architecture, with non-parametric heuristics for box detection on semantic segmentation outputs. """ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable from scipy.ndimage.measurements import label as lb import utils.exp_utils as utils import utils.model_utils as mutils class net(nn.Module): def __init__(self, cf, logger): super(net, self).__init__() self.cf = cf self.logger = logger backbone = utils.import_module('bbone', cf.backbone_path) self.logger.info("loaded backbone from {}".format(self.cf.backbone_path)) conv_gen = backbone.ConvGenerator(cf.dim) # set operate_stride1=True to generate a unet-like FPN.) self.fpn = backbone.FPN(cf, conv=conv_gen, relu_enc=cf.relu, operate_stride1=True) - self.conv_final = conv_gen(cf.end_filts, cf.num_seg_classes, ks=1, pad=0, norm=cf.norm, relu=None) + self.conv_final = conv_gen(cf.end_filts, cf.num_seg_classes, ks=1, pad=0, norm=None, relu=None) #initialize parameters if self.cf.weight_init=="custom": logger.info("Tried to use custom weight init which is not defined. Using pytorch default.") elif self.cf.weight_init: mutils.initialize_weights(self) else: logger.info("using default pytorch weight init") def forward(self, x): """ forward pass of network. :param x: input image. shape (b, c, y, x, (z)) :return: seg_logits: shape (b, n_classes, y, x, (z)) :return: out_box_coords: list over n_classes. elements are arrays(b, n_rois, (y1, x1, y2, x2, (z1), (z2))) :return: out_max_scores: list over n_classes. elements are arrays(b, n_rois) """ out_features = self.fpn(x)[0] #take only pyramid output of stride 1 seg_logits = self.conv_final(out_features) out_box_coords, out_max_scores = [], [] smax = F.softmax(seg_logits.detach(), dim=1).cpu().data.numpy() for cl in range(1, len(self.cf.class_dict.keys()) + 1): hard_mask = np.copy(smax).argmax(1) hard_mask[hard_mask != cl] = 0 hard_mask[hard_mask == cl] = 1 # perform connected component analysis on argmaxed predictions, # draw boxes around components and return coordinates. box_coords, rois = mutils.get_coords(hard_mask, self.cf.n_roi_candidates, self.cf.dim) # for each object, choose the highest softmax score (in the respective class) # of all pixels in the component as object score. max_scores = [[] for _ in range(x.shape[0])] for bix, broi in enumerate(rois): for nix, nroi in enumerate(broi): score_det = np.max if self.cf.score_det=="max" else np.median #score determination max_scores[bix].append(score_det(smax[bix, cl][nroi > 0])) out_box_coords.append(box_coords) out_max_scores.append(max_scores) return seg_logits, out_box_coords, out_max_scores def train_forward(self, batch, **kwargs): """ train method (also used for validation monitoring). wrapper around forward pass of network. prepares input data for processing, computes losses, and stores outputs in a dictionary. :param batch: dictionary containing 'data', 'seg', etc. :param kwargs: :return: results_dict: dictionary with keys: 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary: [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...] 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes] 'torch_loss': 1D torch tensor for backprop. 'class_loss': classification loss for monitoring. here: dummy array, since no classification conducted. """ img = torch.from_numpy(batch['data']).cuda().float() seg = torch.from_numpy(batch['seg']).cuda().long() seg_ohe = torch.from_numpy(mutils.get_one_hot_encoding(batch['seg'], self.cf.num_seg_classes)).cuda() results_dict = {} seg_logits, box_coords, max_scores = self.forward(img) # no extra class loss applied in this model. pass dummy tensor for monitoring. results_dict['class_loss'] = np.nan results_dict['boxes'] = [[] for _ in range(img.shape[0])] for cix in range(len(self.cf.class_dict.keys())): for bix in range(img.shape[0]): for rix in range(len(max_scores[cix][bix])): if max_scores[cix][bix][rix] > self.cf.detection_min_confidence: results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]), 'box_score': max_scores[cix][bix][rix], 'box_pred_class_id': cix + 1, # add 0 for background. 'box_type': 'det'}) for bix in range(img.shape[0]): for tix in range(len(batch['bb_target'][bix])): gt_box = {'box_coords': batch['bb_target'][bix][tix], 'box_type': 'gt'} for name in self.cf.roi_items: gt_box.update({name: batch[name][bix][tix]}) results_dict['boxes'][bix].append(gt_box) # compute segmentation loss as either weighted cross entropy, dice loss, or the sum of both. loss = torch.tensor([0.], dtype=torch.float, requires_grad=False).cuda() seg_pred = F.softmax(seg_logits, dim=1) if self.cf.seg_loss_mode == 'dice' or self.cf.seg_loss_mode == 'dice_wce': loss += 1 - mutils.batch_dice(seg_pred, seg_ohe.float(), false_positive_weight=float(self.cf.fp_dice_weight)) if self.cf.seg_loss_mode == 'wce' or self.cf.seg_loss_mode == 'dice_wce': loss += F.cross_entropy(seg_logits, seg[:, 0], weight=torch.FloatTensor(self.cf.wce_weights).cuda()) results_dict['torch_loss'] = loss seg_pred = seg_pred.argmax(dim=1).unsqueeze(dim=1).cpu().data.numpy() results_dict['seg_preds'] = seg_pred if 'dice' in self.cf.metrics: results_dict['batch_dices'] = mutils.dice_per_batch_and_class(seg_pred, batch["seg"], self.cf.num_seg_classes, convert_to_ohe=True) #self.logger.info("loss: {0:.2f}".format(loss.item())) return results_dict def test_forward(self, batch, **kwargs): """ test method. wrapper around forward pass of network without usage of any ground truth information. prepares input data for processing and stores outputs in a dictionary. :param batch: dictionary containing 'data' :param kwargs: :return: results_dict: dictionary with keys: 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary: [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...] 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes] """ img = torch.FloatTensor(batch['data']).cuda() seg_logits, box_coords, max_scores = self.forward(img) results_dict = {} results_dict['boxes'] = [[] for _ in range(img.shape[0])] for cix in range(len(box_coords)): for bix in range(img.shape[0]): for rix in range(len(max_scores[cix][bix])): if max_scores[cix][bix][rix] > self.cf.detection_min_confidence: results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]), 'box_score': max_scores[cix][bix][rix], 'box_pred_class_id': cix + 1, 'box_type': 'det'}) results_dict['seg_preds'] = F.softmax(seg_logits, dim=1).cpu().data.numpy() return results_dict diff --git a/models/detection_unet.py b/models/detection_unet.py index 20394ba..dd7e293 100644 --- a/models/detection_unet.py +++ b/models/detection_unet.py @@ -1,545 +1,545 @@ import warnings import os import shutil import time import math import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import utils.exp_utils as utils import utils.model_utils as mutils ''' Use nn.DataParallel to use more than one GPU ''' def center_crop_2D_image_batched(img, crop_size): # from batch generator tools from https://github.com/MIC-DKFZ/batchgenerators # dim 0 is batch, dim 1 is channel, dim 2 and 3 are x y center = np.array(img.shape[2:]) / 2. if not hasattr(crop_size, "__iter__"): center_crop = [int(crop_size)] * (len(img.shape) - 2) else: center_crop = np.array(crop_size) assert len(center_crop) == (len( img.shape) - 2), "If you provide a list/tuple as center crop make sure it has the same len as your data has dims (2d)" return img[:, :, int(center[0] - center_crop[0] / 2.):int(center[0] + center_crop[0] / 2.), int(center[1] - center_crop[1] / 2.):int(center[1] + center_crop[1] / 2.)] def center_crop_3D_image_batched(img, crop_size): # dim 0 is batch, dim 1 is channel, dim 2, 3 and 4 are x y z center = np.array(img.shape[2:]) / 2. if not hasattr(crop_size, "__iter__"): center_crop = np.array([int(crop_size)] * (len(img.shape) - 2)) else: center_crop = np.array(crop_size) assert len(center_crop) == (len( img.shape) - 2), "If you provide a list/tuple as center crop make sure it has the same len as your data has dims (3d)" return img[:, :, int(center[0] - center_crop[0] / 2.):int(center[0] + center_crop[0] / 2.), int(center[1] - center_crop[1] / 2.):int(center[1] + center_crop[1] / 2.), int(center[2] - center_crop[2] / 2.):int(center[2] + center_crop[2] / 2.)] def centercrop_vol(tensor, size): """:param tensor: tensor whose last two dimensions should be centercropped to size :param size: 2- or 3-int tuple of target (height, width(,depth)) """ dim = len(size) if dim==2: center_crop_2D_image_batched(tensor, size) elif dim==3: center_crop_2D_image_batched(tensor, size) else: raise Exception("invalid size argument {} encountered in centercrop".format(size)) """this below worked so fine, when optional z-dim was first spatial dim instead of last h_, w_ = size[0], size[1] #target size (h,w) = tensor.size()[-2:] #orig size dh, dw = h-h_, w-w_ #deltas if dim == 3: d_ = size[2] d = tensor.size()[-3] dd = d-d_ if h_=h: print("no h crop") warn.warn("no height crop applied since target dims larger equal orig dims") if w_=w: warn.warn("no width crop applied since target dims larger equal orig dims") if dim == 3: if d_ < d: tensor = tensor[..., dd // 2:-int(math.ceil(dd / 2.)),:,:] elif d_ >= d: warn.warn("no depth crop applied since target dims larger equal orig dims") """ return tensor def dimcalc_conv2D(dims,F=3,s=1,pad="same"): r""" :param dims: orig width, height as (2,)-np.array :param F: quadratic kernel size :param s: stride :param pad: pad """ if pad=="same": pad = (F-1)//2 h, w = dims[0], dims[1] return np.floor([(h + 2*pad-F)/s+1, (w+ 2*pad-F)/s+1]) def dimcalc_transconv2D(dims,F=2,s=2): r""" :param dims: orig width, height as (2,)-np.array :param F: quadratic kernel size :param s: stride """ h, w = dims[0], dims[1] return np.array([(h-1)*s+F, (w-1)*s+F]) def dimcalc_Unet_std(init_dims, F=3, F_pool=2, F_up=2, s=1, s_pool=2, s_up=2, pad=0): r"""Calculate theoretic dimensions of feature maps throughout layers of this U-net. """ dims = np.array(init_dims) print("init dims: ", dims) def down(dims): for i in range(2): dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad) dims = dimcalc_conv2D(dims, F=F_pool, s=s_pool) return dims.astype(int) def up(dims): for i in range(2): dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad) dims = dimcalc_transconv2D(dims, F=F_up,s=s_up) return dims.astype(int) stage = 1 for i in range(4): dims = down(dims) print("stage ", stage, ": ", dims) stage+=1 for i in range(4): dims = up(dims) print("stage ", stage, ": ", dims) stage+=1 for i in range(2): dims = dimcalc_conv2D(dims,F=F,s=s, pad=pad).astype(int) print("final output size: ", dims) return dims def dimcalc_Unet(init_dims, F=3, F_pool=2, F_up=2, s=1, s_pool=2, s_up=2, pad=0): r"""Calculate theoretic dimensions of feature maps throughout layers of this U-net. """ dims = np.array(init_dims) print("init dims: ", dims) def down(dims): for i in range(3): dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad) dims = dimcalc_conv2D(dims, F=F_pool, s=s_pool) return dims.astype(int) def up(dims): dims = dimcalc_transconv2D(dims, F=F_up,s=s_up) for i in range(3): dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad) return dims.astype(int) stage = 1 for i in range(6): dims = down(dims) print("stage ", stage, ": ", dims) stage+=1 for i in range(3): dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad) for i in range(6): dims = up(dims) print("stage ", stage, ": ", dims) stage+=1 dims = dims.astype(int) print("final output size: ", dims) return dims class horiz_conv(nn.Module): def __init__(self, in_chans, out_chans, kernel_size, c_gen, norm, pad=0, relu="relu", bottleneck=True): super(horiz_conv, self).__init__() #TODO maybe make res-block? if bottleneck: bottleneck = int(np.round((in_chans+out_chans)*3/8)) #print("bottleneck:", bottleneck) else: bottleneck = out_chans self.conv = nn.Sequential( c_gen(in_chans, bottleneck, kernel_size, pad=pad, norm=norm, relu=relu), #TODO maybe use norm only on last conv? c_gen(bottleneck, out_chans, kernel_size, pad=pad, norm=norm, relu=relu), #TODO maybe make bottleneck? #c_gen(out_chans, out_chans, kernel_size, pad=pad, norm=norm, relu=relu), ) def forward(self, x): x = self.conv(x) return x class up(nn.Module): def __init__(self, in_chans, out_chans, kernel_size, interpol, c_gen, norm, pad=0, relu="relu", stride_ip=2): super(up, self).__init__() self.dim = c_gen.dim self.upsample = interpol(stride_ip, "bilinear") if self.dim==2 else interpol(stride_ip, "trilinear") #TODO check if fits with spatial dims order in data self.reduce_chans = c_gen(in_chans, out_chans, ks=1, norm=norm, relu=None) self.horiz = horiz_conv(out_chans*2, out_chans, kernel_size, c_gen, norm=norm, pad=pad, relu=relu) def forward(self, x, skip_inp): #TODO maybe add highway weights in skips? x = self.upsample(x) x = self.reduce_chans(x) #print("shape x, skip", x.shape, skip_inp.shape) targ_size = x.size()[-self.dim:] #ft map x,y,z (spatial) skip_inp = centercrop_vol(skip_inp, targ_size) assert targ_size == skip_inp.size()[-self.dim:], "corresp. skip and forward dimensions don't match" x = torch.cat((x,skip_inp),dim=1) x = self.horiz(x) return x class net(nn.Module): r"""U-Net with few more steps than standard. Dimensions: feature maps have dims ...xhxwxd, d=feature map depth, h, w = orig img height, width. h,w each are downsized by unpadded forward-convs and pooling, upsized by upsampling or upconvolution. If :math:`F\times F` is the single kernel_size and stride is :math:`s\geq 1`, :math:`k` is the number of kernels in the conv, i.e. the resulting feature map depth, (all may differ between operations), then :Forward Conv: input :math:`h \times w \times d` is converted to .. math:: \left[ (h-F)//s+1 \right] \times \left[ (w-F)//s+1 \right] \times k :Pooling: input :math:`h \times w \times d` is converted to .. math:: \left[ (h-F)//s+1 \right] \times \left[ (w-F)//s+1 \right] \times d, pooling filters have no depths => orig depths preserved. :Up-Conv.: input :math:`h \times w \times d` is converted to .. math:: \left[ (h-1)s + F \right] \times \left[ (w-1)s + F \right] \times k """ def down(self, in_chans, out_chans, kernel_size, kernel_size_m, pad=0, relu="relu",maintain_z=False): """generate encoder block :param in_chans: :param out_chans: :param kernel_size: :param pad: :return: """ if maintain_z and self.dim==3: stride_pool = (2,2,1) if not hasattr(kernel_size_m, "__iter__"): kernel_size_m = [kernel_size_m]*self.dim kernel_size_m = (*kernel_size_m[:-1], 1) else: stride_pool = 2 module = nn.Sequential( nn.MaxPool2d(kernel_size_m, stride=stride_pool) if self.dim == 2 else nn.MaxPool3d( kernel_size_m, stride=stride_pool), #--> needs stride 2 in z in upsampling as well! horiz_conv(in_chans, out_chans, kernel_size, self.c_gen, self.norm, pad, relu=relu) ) return module def up(self, in_chans, out_chans, kernel_size, pad=0, relu="relu", maintain_z=False): """generate decoder block :param in_chans: :param out_chans: :param kernel_size: :param pad: :param relu: :return: """ if maintain_z and self.dim==3: stride_ip = (2,2,1) else: stride_ip = 2 module = up(in_chans, out_chans, kernel_size, self.Interpolator, self.c_gen, norm=self.norm, pad=pad, relu=relu, stride_ip=stride_ip) return module def __init__(self, cf, logger): super(net, self).__init__() self.cf = cf self.dim = cf.dim self.norm = cf.norm self.logger = logger backbone = utils.import_module('bbone', cf.backbone_path) self.c_gen = backbone.ConvGenerator(cf.dim) self.Interpolator = backbone.Interpolate #down = DownBlockGen(cf.dim) #up = UpBlockGen(cf.dim, backbone.Interpolate) down = self.down up = self.up pad = cf.pad if pad=="same": pad = (cf.kernel_size-1)//2 self.dims = "not yet recorded" self.is_cuda = False self.init = horiz_conv(len(cf.channels), cf.init_filts, cf.kernel_size, self.c_gen, self.norm, pad=pad, relu=cf.relu) self.down1 = down(cf.init_filts, cf.init_filts*2, cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu) self.down2 = down(cf.init_filts*2, cf.init_filts*4, cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu) self.down3 = down(cf.init_filts*4, cf.init_filts*6, cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu) self.down4 = down(cf.init_filts*6, cf.init_filts*8, cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu, maintain_z=True) self.down5 = down(cf.init_filts*8, cf.init_filts*12, cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu, maintain_z=True) #self.down6 = down(cf.init_filts*10, cf.init_filts*14, cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu) #self.up1 = up(cf.init_filts*14, cf.init_filts*10, cf.kernel_size, pad=pad, relu=cf.relu) self.up2 = up(cf.init_filts*12, cf.init_filts*8, cf.kernel_size, pad=pad, relu=cf.relu, maintain_z=True) self.up3 = up(cf.init_filts*8, cf.init_filts*6, cf.kernel_size, pad=pad, relu=cf.relu, maintain_z=True) self.up4 = up(cf.init_filts*6, cf.init_filts*4, cf.kernel_size, pad=pad, relu=cf.relu) self.up5 = up(cf.init_filts*4, cf.init_filts*2, cf.kernel_size, pad=pad, relu=cf.relu) self.up6 = up(cf.init_filts*2, cf.init_filts, cf.kernel_size, pad=pad, relu=cf.relu) - self.seg = self.c_gen(cf.init_filts, cf.num_seg_classes, 1, norm=None, relu=None) #TODO maybe apply norm too? + self.seg = self.c_gen(cf.init_filts, cf.num_seg_classes, 1, norm=None, relu=None) # initialize parameters if self.cf.weight_init == "custom": logger.info("Tried to use custom weight init which is not defined. Using pytorch default.") elif self.cf.weight_init: mutils.initialize_weights(self) else: logger.info("using default pytorch weight init") def forward(self, x): r'''Forward application of network-function. :param x: input to the network, expected as torch.tensor of dims .. math:: batch\_size \times channels \times height \times width requires_grad should be True for training ''' #self.dims = np.array([x.size()[-self.dim-1:]]) x1 = self.init(x) #self.dims = np.vstack((self.dims, x1.size()[-self.dim-1:])) #---downwards--- x2 = self.down1(x1) #self.dims = np.vstack((self.dims, x2.size()[-self.dim-1:])) x3 = self.down2(x2) #self.dims = np.vstack((self.dims, x3.size()[-self.dim-1:])) x4 = self.down3(x3) #self.dims = np.vstack((self.dims, x4.size()[-self.dim-1:])) x5 = self.down4(x4) #self.dims = np.vstack((self.dims, x5.size()[-self.dim-1:])) #x6 = self.down5(x5) #self.dims = np.vstack((self.dims, x6.size()[-self.dim-1:])) #---bottom--- x = self.down5(x5) #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:])) #---upwards--- #x = self.up1(x, x6) #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:])) x = self.up2(x, x5) #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:])) x = self.up3(x, x4) #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:])) x = self.up4(x, x3) #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:])) x = self.up5(x, x2) #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:])) x = self.up6(x, x1) #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:])) # ---final--- x = self.seg(x) #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:])) seg_logits = x out_box_coords, out_scores = [], [] seg_probs = F.softmax(seg_logits.detach(), dim=1).cpu().data.numpy() #seg_probs = F.softmax(seg_logits, dim=1) assert seg_logits.shape[1]==self.cf.num_seg_classes for cl in range(1, seg_logits.shape[1]): hard_mask = np.copy(seg_probs).argmax(1) #hard_mask = seg_probs.clone().argmax(1) hard_mask[hard_mask != cl] = 0 hard_mask[hard_mask == cl] = 1 # perform connected component analysis on argmaxed predictions, # draw boxes around components and return coordinates. box_coords, rois = mutils.get_coords(hard_mask, self.cf.n_roi_candidates, self.cf.dim) # for each object, choose the highest softmax score (in the respective class) # of all pixels in the component as object score. scores = [[] for b_inst in range(x.shape[0])] # np.zeros((out_features.shape[0], self.cf.n_roi_candidates)) for b_inst, brois in enumerate(rois): for nix, nroi in enumerate(brois): score_det = np.max if self.cf.score_det == "max" else np.median # score determination scores[b_inst].append(score_det(seg_probs[b_inst, cl][nroi > 0])) out_box_coords.append(box_coords) out_scores.append(scores) return seg_logits, out_box_coords, out_scores # noinspection PyCallingNonCallable def train_forward(self, batch, **kwargs): """ train method (also used for validation monitoring). wrapper around forward pass of network. prepares input data for processing, computes losses, and stores outputs in a dictionary. :param batch: dictionary containing 'data', 'seg', etc. :param kwargs: :return: results_dict: dictionary with keys: 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary: [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...] 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes] 'torch_loss': 1D torch tensor for backprop. 'class_loss': classification loss for monitoring. here: dummy array, since no classification conducted. """ img = torch.from_numpy(batch["data"]).float().cuda() seg = torch.from_numpy(batch["seg"]).long().cuda() seg_ohe = torch.from_numpy(mutils.get_one_hot_encoding(batch['seg'], self.cf.num_seg_classes)).float().cuda() results_dict = {} seg_logits, box_coords, scores = self.forward(img) # no extra class loss applied in this model. pass dummy tensor for monitoring. results_dict['class_loss'] = np.nan results_dict['boxes'] = [[] for _ in range(img.shape[0])] for cix in range(len(self.cf.class_dict.keys())): for bix in range(img.shape[0]): for rix in range(len(scores[cix][bix])): if scores[cix][bix][rix] > self.cf.detection_min_confidence: results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]), 'box_score': scores[cix][bix][rix], 'box_pred_class_id': cix + 1, # add 0 for background. 'box_type': 'det', }) for bix in range(img.shape[0]): #bix = batch-element index for tix in range(len(batch['bb_target'][bix])): #target index gt_box = {'box_coords': batch['bb_target'][bix][tix], 'box_type': 'gt'} for name in self.cf.roi_items: gt_box.update({name: batch[name][bix][tix]}) results_dict['boxes'][bix].append(gt_box) # compute segmentation loss as either weighted cross entropy, dice loss, or the sum of both. seg_pred = F.softmax(seg_logits, 1) loss = torch.tensor([0.], dtype=torch.float, requires_grad=False).cuda() if self.cf.seg_loss_mode == 'dice' or self.cf.seg_loss_mode == 'dice_wce': loss += 1 - mutils.batch_dice(seg_pred, seg_ohe.float(), false_positive_weight=float(self.cf.fp_dice_weight)) if self.cf.seg_loss_mode == 'wce' or self.cf.seg_loss_mode == 'dice_wce': loss += F.cross_entropy(seg_logits, seg[:, 0], weight=torch.FloatTensor(self.cf.wce_weights).cuda(), reduction='mean') results_dict['torch_loss'] = loss seg_pred = seg_pred.argmax(dim=1).unsqueeze(dim=1).cpu().data.numpy() results_dict['seg_preds'] = seg_pred if 'dice' in self.cf.metrics: results_dict['batch_dices'] = mutils.dice_per_batch_and_class(seg_pred, batch["seg"], self.cf.num_seg_classes, convert_to_ohe=True) #print("batch dice scores ", results_dict['batch_dices'] ) # self.logger.info("loss: {0:.2f}".format(loss.item())) return results_dict def test_forward(self, batch, **kwargs): """ test method. wrapper around forward pass of network without usage of any ground truth information. prepares input data for processing and stores outputs in a dictionary. :param batch: dictionary containing 'data' :param kwargs: :return: results_dict: dictionary with keys: 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary: [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...] 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes] """ img = torch.FloatTensor(batch['data']).cuda() seg_logits, box_coords, scores = self.forward(img) results_dict = {} results_dict['boxes'] = [[] for b_inst in range(img.shape[0])] for cix in range(len(box_coords)): #class index for bix in range(img.shape[0]): #batch instance for rix in range(len(scores[cix][bix])): #range(self.cf.n_roi_candidates): if scores[cix][bix][rix] > self.cf.detection_min_confidence: results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]), 'box_score': scores[cix][bix][rix], 'box_pred_class_id': cix + 1, 'box_type': 'det'}) # carry probs instead of preds to use for multi-model voting in predictor results_dict['seg_preds'] = F.softmax(seg_logits, dim=1).cpu().data.numpy() return results_dict def actual_dims(self, print_=True): r"""Return dimensions of actually calculated layers at beginning of each block. """ if print_: print("dimensions as recorded in forward pass: ") for stage in range(len(self.dims)): print("Stage ", stage, ": ", self.dims[stage]) return self.dims def cuda(self, device=None): r"""Moves all model parameters and buffers to the GPU. This also makes associated parameters and buffers different objects. So it should be called before constructing optimizer if the module will live on GPU while being optimized. Arguments: device (int, optional): if specified, all parameters will be copied to that device Returns: Module: self """ try: self.loss_f = self.loss_f.cuda() except: pass self.is_cuda = True return self._apply(lambda t: t.cuda(device)) def cpu(self): r"""Moves all model parameters and buffers to the CPU. Returns: Module: self """ self.is_cuda = False return self._apply(lambda t: t.cpu()) \ No newline at end of file diff --git a/models/mrcnn.py b/models/mrcnn.py index e0b7982..c7dcdff 100644 --- a/models/mrcnn.py +++ b/models/mrcnn.py @@ -1,752 +1,752 @@ #!/usr/bin/env python # Copyright 2019 Division of Medical Image Computing, German Cancer Research Center (DKFZ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """ Parts are based on https://github.com/multimodallearning/pytorch-mask-rcnn published under MIT license. """ import os from multiprocessing import Pool import time import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.utils import utils.model_utils as mutils import utils.exp_utils as utils class RPN(nn.Module): """ Region Proposal Network. """ def __init__(self, cf, conv): super(RPN, self).__init__() self.dim = conv.dim self.conv_shared = conv(cf.end_filts, cf.n_rpn_features, ks=3, stride=cf.rpn_anchor_stride, pad=1, relu=cf.relu) self.conv_class = conv(cf.n_rpn_features, 2 * len(cf.rpn_anchor_ratios), ks=1, stride=1, relu=None) self.conv_bbox = conv(cf.n_rpn_features, 2 * self.dim * len(cf.rpn_anchor_ratios), ks=1, stride=1, relu=None) def forward(self, x): """ :param x: input feature maps (b, in_channels, y, x, (z)) :return: rpn_class_logits (b, 2, n_anchors) :return: rpn_probs_logits (b, 2, n_anchors) :return: rpn_bbox (b, 2 * dim, n_anchors) """ # Shared convolutional base of the RPN. x = self.conv_shared(x) # Anchor Score. (batch, anchors per location * 2, y, x, (z)). rpn_class_logits = self.conv_class(x) # Reshape to (batch, 2, anchors) axes = (0, 2, 3, 1) if self.dim == 2 else (0, 2, 3, 4, 1) rpn_class_logits = rpn_class_logits.permute(*axes) rpn_class_logits = rpn_class_logits.contiguous() rpn_class_logits = rpn_class_logits.view(x.size()[0], -1, 2) # Softmax on last dimension (fg vs. bg). rpn_probs = F.softmax(rpn_class_logits, dim=2) # Bounding box refinement. (batch, anchors_per_location * (y, x, (z), log(h), log(w), (log(d)), y, x, (z)) rpn_bbox = self.conv_bbox(x) # Reshape to (batch, 2*dim, anchors) rpn_bbox = rpn_bbox.permute(*axes) rpn_bbox = rpn_bbox.contiguous() rpn_bbox = rpn_bbox.view(x.size()[0], -1, self.dim * 2) return [rpn_class_logits, rpn_probs, rpn_bbox] class Classifier(nn.Module): """ Head network for classification and bounding box refinement. Performs RoiAlign, processes resulting features through a shared convolutional base and finally branches off the classifier- and regression head. """ def __init__(self, cf, conv): super(Classifier, self).__init__() self.cf = cf self.dim = conv.dim self.in_channels = cf.end_filts self.pool_size = cf.pool_size self.pyramid_levels = cf.pyramid_levels # instance_norm does not work with spatial dims (1, 1, (1)) norm = cf.norm if cf.norm != 'instance_norm' else None self.conv1 = conv(cf.end_filts, cf.end_filts * 4, ks=self.pool_size, stride=1, norm=norm, relu=cf.relu) self.conv2 = conv(cf.end_filts * 4, cf.end_filts * 4, ks=1, stride=1, norm=norm, relu=cf.relu) self.linear_bbox = nn.Linear(cf.end_filts * 4, cf.head_classes * 2 * self.dim) if 'regression' in self.cf.prediction_tasks: self.linear_regressor = nn.Linear(cf.end_filts * 4, cf.head_classes * cf.regression_n_features) self.rg_n_feats = cf.regression_n_features #classify into bins of regression values elif 'regression_bin' in self.cf.prediction_tasks: self.linear_regressor = nn.Linear(cf.end_filts * 4, cf.head_classes * len(cf.bin_labels)) self.rg_n_feats = len(cf.bin_labels) else: self.linear_regressor = lambda x: torch.zeros((x.shape[0], cf.head_classes * 1), dtype=torch.float32).fill_(float('NaN')).cuda() self.rg_n_feats = 1 #cf.regression_n_features if 'class' in self.cf.prediction_tasks: self.linear_class = nn.Linear(cf.end_filts * 4, cf.head_classes) else: assert cf.head_classes == 2, "#head classes {} needs to be 2 (bg/fg) when not predicting classes".format(cf.head_classes) self.linear_class = lambda x: torch.zeros((x.shape[0], cf.head_classes), dtype=torch.float64).cuda() def forward(self, x, rois): """ :param x: input feature maps (b, in_channels, y, x, (z)) :param rois: normalized box coordinates as proposed by the RPN to be forwarded through the second stage (n_proposals, (y1, x1, y2, x2, (z1), (z2), batch_ix). Proposals of all batch elements have been merged to one vector, while the origin info has been stored for re-allocation. :return: mrcnn_class_logits (n_proposals, n_head_classes) :return: mrcnn_bbox (n_proposals, n_head_classes, 2 * dim) predicted corrections to be applied to proposals for refinement. """ x = mutils.pyramid_roi_align(x, rois, self.pool_size, self.pyramid_levels, self.dim) x = self.conv1(x) x = self.conv2(x) x = x.view(-1, self.in_channels * 4) mrcnn_bbox = self.linear_bbox(x) mrcnn_bbox = mrcnn_bbox.view(mrcnn_bbox.size()[0], -1, self.dim * 2) mrcnn_class_logits = self.linear_class(x) mrcnn_regress = self.linear_regressor(x) mrcnn_regress = mrcnn_regress.view(mrcnn_regress.size()[0], -1, self.rg_n_feats) return [mrcnn_bbox, mrcnn_class_logits, mrcnn_regress] class Mask(nn.Module): """ Head network for proposal-based mask segmentation. Performs RoiAlign, some convolutions and applies sigmoid on the output logits to allow for overlapping classes. """ def __init__(self, cf, conv): super(Mask, self).__init__() self.pool_size = cf.mask_pool_size self.pyramid_levels = cf.pyramid_levels self.dim = conv.dim self.conv1 = conv(cf.end_filts, cf.end_filts, ks=3, stride=1, pad=1, norm=cf.norm, relu=cf.relu) self.conv2 = conv(cf.end_filts, cf.end_filts, ks=3, stride=1, pad=1, norm=cf.norm, relu=cf.relu) self.conv3 = conv(cf.end_filts, cf.end_filts, ks=3, stride=1, pad=1, norm=cf.norm, relu=cf.relu) self.conv4 = conv(cf.end_filts, cf.end_filts, ks=3, stride=1, pad=1, norm=cf.norm, relu=cf.relu) if conv.dim == 2: - self.deconv = nn.ConvTranspose2d(cf.end_filts, cf.end_filts, kernel_size=2, stride=2) + self.deconv = nn.ConvTranspose2d(cf.end_filts, cf.end_filts, kernel_size=2, stride=2) # todo why no norm here? else: self.deconv = nn.ConvTranspose3d(cf.end_filts, cf.end_filts, kernel_size=2, stride=2) self.relu = nn.ReLU(inplace=True) if cf.relu == 'relu' else nn.LeakyReLU(inplace=True) self.conv5 = conv(cf.end_filts, cf.head_classes, ks=1, stride=1, relu=None) self.sigmoid = nn.Sigmoid() def forward(self, x, rois): """ :param x: input feature maps (b, in_channels, y, x, (z)) :param rois: normalized box coordinates as proposed by the RPN to be forwarded through the second stage (n_proposals, (y1, x1, y2, x2, (z1), (z2), batch_ix). Proposals of all batch elements have been merged to one vector, while the origin info has been stored for re-allocation. :return: x: masks (n_sampled_proposals (n_detections in inference), n_classes, y, x, (z)) """ x = mutils.pyramid_roi_align(x, rois, self.pool_size, self.pyramid_levels, self.dim) x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = self.relu(self.deconv(x)) x = self.conv5(x) x = self.sigmoid(x) return x ############################################################ # Loss Functions ############################################################ def compute_rpn_class_loss(rpn_class_logits, rpn_match, shem_poolsize): """ :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors. :param rpn_class_logits: (n_anchors, 2). logits from RPN classifier. :param SHEM_poolsize: int. factor of top-k candidates to draw from per negative sample (stochastic-hard-example-mining). :return: loss: torch tensor :return: np_neg_ix: 1D array containing indices of the neg_roi_logits, which have been sampled for training. """ # Filter out netural anchors pos_indices = torch.nonzero(rpn_match == 1) neg_indices = torch.nonzero(rpn_match == -1) # loss for positive samples if not 0 in pos_indices.size(): pos_indices = pos_indices.squeeze(1) roi_logits_pos = rpn_class_logits[pos_indices] pos_loss = F.cross_entropy(roi_logits_pos, torch.LongTensor([1] * pos_indices.shape[0]).cuda()) else: pos_loss = torch.FloatTensor([0]).cuda() # loss for negative samples: draw hard negative examples (SHEM) # that match the number of positive samples, but at least 1. if not 0 in neg_indices.size(): neg_indices = neg_indices.squeeze(1) roi_logits_neg = rpn_class_logits[neg_indices] negative_count = np.max((1, pos_indices.cpu().data.numpy().size)) roi_probs_neg = F.softmax(roi_logits_neg, dim=1) neg_ix = mutils.shem(roi_probs_neg, negative_count, shem_poolsize) neg_loss = F.cross_entropy(roi_logits_neg[neg_ix], torch.LongTensor([0] * neg_ix.shape[0]).cuda()) np_neg_ix = neg_ix.cpu().data.numpy() #print("pos, neg count", pos_indices.cpu().data.numpy().size, negative_count) else: neg_loss = torch.FloatTensor([0]).cuda() np_neg_ix = np.array([]).astype('int32') loss = (pos_loss + neg_loss) / 2 return loss, np_neg_ix def compute_rpn_bbox_loss(rpn_pred_deltas, rpn_target_deltas, rpn_match): """ :param rpn_target_deltas: (b, n_positive_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))). Uses 0 padding to fill in unsed bbox deltas. :param rpn_pred_deltas: predicted deltas from RPN. (b, n_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))) :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors. :return: loss: torch 1D tensor. """ if not 0 in torch.nonzero(rpn_match == 1).size(): indices = torch.nonzero(rpn_match == 1).squeeze(1) # Pick bbox deltas that contribute to the loss rpn_pred_deltas = rpn_pred_deltas[indices] # Trim target bounding box deltas to the same length as rpn_bbox. target_deltas = rpn_target_deltas[:rpn_pred_deltas.size()[0], :] # Smooth L1 loss loss = F.smooth_l1_loss(rpn_pred_deltas, target_deltas) else: loss = torch.FloatTensor([0]).cuda() return loss def compute_mrcnn_bbox_loss(mrcnn_pred_deltas, mrcnn_target_deltas, target_class_ids): """ :param mrcnn_target_deltas: (n_sampled_rois, (dy, dx, (dz), log(dh), log(dw), (log(dh))) :param mrcnn_pred_deltas: (n_sampled_rois, n_classes, (dy, dx, (dz), log(dh), log(dw), (log(dh))) :param target_class_ids: (n_sampled_rois) :return: loss: torch 1D tensor. """ if not 0 in torch.nonzero(target_class_ids > 0).size(): positive_roi_ix = torch.nonzero(target_class_ids > 0)[:, 0] positive_roi_class_ids = target_class_ids[positive_roi_ix].long() target_bbox = mrcnn_target_deltas[positive_roi_ix, :].detach() pred_bbox = mrcnn_pred_deltas[positive_roi_ix, positive_roi_class_ids, :] loss = F.smooth_l1_loss(pred_bbox, target_bbox) else: loss = torch.FloatTensor([0]).cuda() return loss def compute_mrcnn_mask_loss(pred_masks, target_masks, target_class_ids): """ :param target_masks: (n_sampled_rois, y, x, (z)) A float32 tensor of values 0 or 1. Uses zero padding to fill array. :param pred_masks: (n_sampled_rois, n_classes, y, x, (z)) float32 tensor with values between [0, 1]. :param target_class_ids: (n_sampled_rois) :return: loss: torch 1D tensor. """ #print("targ masks", target_masks.unique(return_counts=True)) if not 0 in torch.nonzero(target_class_ids > 0).size(): # Only positive ROIs contribute to the loss. And only # the class-specific mask of each ROI. positive_ix = torch.nonzero(target_class_ids > 0)[:, 0] positive_class_ids = target_class_ids[positive_ix].long() y_true = target_masks[positive_ix, :, :].detach() y_pred = pred_masks[positive_ix, positive_class_ids, :, :] loss = F.binary_cross_entropy(y_pred, y_true) else: loss = torch.FloatTensor([0]).cuda() return loss def compute_mrcnn_class_loss(tasks, pred_class_logits, target_class_ids): """ :param pred_class_logits: (n_sampled_rois, n_classes) :param target_class_ids: (n_sampled_rois) batch dimension was merged into roi dimension. :return: loss: torch 1D tensor. """ if 'class' in tasks and not 0 in target_class_ids.size(): loss = F.cross_entropy(pred_class_logits, target_class_ids.long()) else: loss = torch.FloatTensor([0.]).cuda() return loss def compute_mrcnn_regression_loss(tasks, pred, target, target_class_ids): """regression loss is a distance metric between target vector and predicted regression vector. :param pred: (n_sampled_rois, n_classes, [n_rg_feats if real regression or 1 if rg_bin task) :param target: (n_sampled_rois, [n_rg_feats or n_rg_bins]) :return: differentiable loss, torch 1D tensor on cuda """ if not 0 in target.shape and not 0 in torch.nonzero(target_class_ids > 0).shape: positive_roi_ix = torch.nonzero(target_class_ids > 0)[:, 0] positive_roi_class_ids = target_class_ids[positive_roi_ix].long() target = target[positive_roi_ix].detach() pred = pred[positive_roi_ix, positive_roi_class_ids] if "regression_bin" in tasks: loss = F.cross_entropy(pred, target.long()) else: loss = F.smooth_l1_loss(pred, target) #loss = F.mse_loss(pred, target) else: loss = torch.FloatTensor([0.]).cuda() return loss ############################################################ # Detection Layer ############################################################ def compute_roi_scores(tasks, batch_rpn_proposals, mrcnn_cl_logits): """ Depending on the predicition tasks: if no class prediction beyong fg/bg (--> means no additional class head was applied) use RPN objectness scores as roi scores, otherwise class head scores. :param cf: :param batch_rpn_proposals: :param mrcnn_cl_logits: :return: """ if not 'class' in tasks: scores = batch_rpn_proposals[:, :, -1].view(-1, 1) scores = torch.cat((1 - scores, scores), dim=1) else: scores = F.softmax(mrcnn_cl_logits, dim=1) return scores ############################################################ # MaskRCNN Class ############################################################ class net(nn.Module): def __init__(self, cf, logger): super(net, self).__init__() self.cf = cf self.logger = logger self.build() loss_order = ['rpn_class', 'rpn_bbox', 'mrcnn_bbox', 'mrcnn_mask', 'mrcnn_class', 'mrcnn_rg'] if hasattr(cf, "mrcnn_loss_weights"): # bring into right order self.loss_weights = np.array([cf.mrcnn_loss_weights[k] for k in loss_order]) else: self.loss_weights = np.array([1.]*len(loss_order)) if self.cf.weight_init=="custom": logger.info("Tried to use custom weight init which is not defined. Using pytorch default.") elif self.cf.weight_init: mutils.initialize_weights(self) else: logger.info("using default pytorch weight init") def build(self): """Build Mask R-CNN architecture.""" # Image size must be dividable by 2 multiple times. h, w = self.cf.patch_size[:2] if h / 2**5 != int(h / 2**5) or w / 2**5 != int(w / 2**5): raise Exception("Image size must be divisible by 2 at least 5 times " "to avoid fractions when downscaling and upscaling." "For example, use 256, 288, 320, 384, 448, 512, ... etc.,i.e.," "any number x*32 will do!") # instantiate abstract multi-dimensional conv generator and load backbone module. backbone = utils.import_module('bbone', self.cf.backbone_path) self.logger.info("loaded backbone from {}".format(self.cf.backbone_path)) conv = backbone.ConvGenerator(self.cf.dim) # build Anchors, FPN, RPN, Classifier / Bbox-Regressor -head, Mask-head self.np_anchors = mutils.generate_pyramid_anchors(self.logger, self.cf) self.anchors = torch.from_numpy(self.np_anchors).float().cuda() self.fpn = backbone.FPN(self.cf, conv, relu_enc=self.cf.relu, operate_stride1=False).cuda() self.rpn = RPN(self.cf, conv) self.classifier = Classifier(self.cf, conv) self.mask = Mask(self.cf, conv) def forward(self, img, is_training=True): """ :param img: input images (b, c, y, x, (z)). :return: rpn_pred_logits: (b, n_anchors, 2) :return: rpn_pred_deltas: (b, n_anchors, (y, x, (z), log(h), log(w), (log(d)))) :return: batch_proposal_boxes: (b, n_proposals, (y1, x1, y2, x2, (z1), (z2), batch_ix)) only for monitoring/plotting. :return: detections: (n_final_detections, (y1, x1, y2, x2, (z1), (z2), batch_ix, pred_class_id, pred_score) :return: detection_masks: (n_final_detections, n_classes, y, x, (z)) raw molded masks as returned by mask-head. """ # extract features. fpn_outs = self.fpn(img) rpn_feature_maps = [fpn_outs[i] for i in self.cf.pyramid_levels] self.mrcnn_feature_maps = rpn_feature_maps # loop through pyramid layers and apply RPN. layer_outputs = [ self.rpn(p_feats) for p_feats in rpn_feature_maps ] # concatenate layer outputs. # convert from list of lists of level outputs to list of lists of outputs across levels. # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] outputs = list(zip(*layer_outputs)) outputs = [torch.cat(list(o), dim=1) for o in outputs] rpn_pred_logits, rpn_pred_probs, rpn_pred_deltas = outputs # # # generate proposals: apply predicted deltas to anchors and filter by foreground scores from RPN classifier. proposal_count = self.cf.post_nms_rois_training if is_training else self.cf.post_nms_rois_inference batch_normed_props, batch_unnormed_props = mutils.refine_proposals(rpn_pred_probs, rpn_pred_deltas, proposal_count, self.anchors, self.cf) # merge batch dimension of proposals while storing allocation info in coordinate dimension. batch_ixs = torch.arange( batch_normed_props.shape[0]).cuda().unsqueeze(1).repeat(1,batch_normed_props.shape[1]).view(-1).float() rpn_rois = batch_normed_props[:, :, :-1].view(-1, batch_normed_props[:, :, :-1].shape[2]) self.rpn_rois_batch_info = torch.cat((rpn_rois, batch_ixs.unsqueeze(1)), dim=1) # this is the first of two forward passes in the second stage, where no activations are stored for backprop. # here, all proposals are forwarded (with virtual_batch_size = batch_size * post_nms_rois.) # for inference/monitoring as well as sampling of rois for the loss functions. # processed in chunks of roi_chunk_size to re-adjust to gpu-memory. chunked_rpn_rois = self.rpn_rois_batch_info.split(self.cf.roi_chunk_size) bboxes_list, class_logits_list, regressions_list = [], [], [] with torch.no_grad(): for chunk in chunked_rpn_rois: chunk_bboxes, chunk_class_logits, chunk_regressions = self.classifier(self.mrcnn_feature_maps, chunk) bboxes_list.append(chunk_bboxes) class_logits_list.append(chunk_class_logits) regressions_list.append(chunk_regressions) mrcnn_bbox = torch.cat(bboxes_list, 0) mrcnn_class_logits = torch.cat(class_logits_list, 0) mrcnn_regressions = torch.cat(regressions_list, 0) self.mrcnn_roi_scores = compute_roi_scores(self.cf.prediction_tasks, batch_normed_props, mrcnn_class_logits) # refine classified proposals, filter and return final detections. # returns (cf.max_inst_per_batch_element, n_coords+1+...) detections = mutils.refine_detections(self.cf, batch_ixs, rpn_rois, mrcnn_bbox, self.mrcnn_roi_scores, mrcnn_regressions) # forward remaining detections through mask-head to generate corresponding masks. scale = [img.shape[2]] * 4 + [img.shape[-1]] * 2 scale = torch.from_numpy(np.array(scale[:self.cf.dim * 2] + [1])[None]).float().cuda() # first self.cf.dim * 2 entries on axis 1 are always the box coords, +1 is batch_ix detection_boxes = detections[:, :self.cf.dim * 2 + 1] / scale with torch.no_grad(): detection_masks = self.mask(self.mrcnn_feature_maps, detection_boxes) return [rpn_pred_logits, rpn_pred_deltas, batch_unnormed_props, detections, detection_masks] def loss_samples_forward(self, batch_gt_boxes, batch_gt_masks, batch_gt_class_ids, batch_gt_regressions=None): """ this is the second forward pass through the second stage (features from stage one are re-used). samples few rois in loss_example_mining and forwards only those for loss computation. :param batch_gt_class_ids: list over batch elements. Each element is a list over the corresponding roi target labels. :param batch_gt_boxes: list over batch elements. Each element is a list over the corresponding roi target coordinates. :param batch_gt_masks: (b, n(b), c, y, x (,z)) list over batch elements. Each element holds n_gt_rois(b) (i.e., dependent on the batch element) binary masks of shape (c, y, x, (z)). :return: sample_logits: (n_sampled_rois, n_classes) predicted class scores. :return: sample_deltas: (n_sampled_rois, n_classes, 2 * dim) predicted corrections to be applied to proposals for refinement. :return: sample_mask: (n_sampled_rois, n_classes, y, x, (z)) predicted masks per class and proposal. :return: sample_target_class_ids: (n_sampled_rois) target class labels of sampled proposals. :return: sample_target_deltas: (n_sampled_rois, 2 * dim) target deltas of sampled proposals for box refinement. :return: sample_target_masks: (n_sampled_rois, y, x, (z)) target masks of sampled proposals. :return: sample_proposals: (n_sampled_rois, 2 * dim) RPN output for sampled proposals. only for monitoring/plotting. """ # sample rois for loss and get corresponding targets for all Mask R-CNN head network losses. sample_ics, sample_target_deltas, sample_target_mask, sample_target_class_ids, sample_target_regressions = \ mutils.loss_example_mining(self.cf, self.rpn_rois_batch_info, batch_gt_boxes, batch_gt_masks, self.mrcnn_roi_scores, batch_gt_class_ids, batch_gt_regressions) # re-use feature maps and RPN output from first forward pass. sample_proposals = self.rpn_rois_batch_info[sample_ics] if not 0 in sample_proposals.size(): sample_deltas, sample_logits, sample_regressions = self.classifier(self.mrcnn_feature_maps, sample_proposals) sample_mask = self.mask(self.mrcnn_feature_maps, sample_proposals) else: sample_logits = torch.FloatTensor().cuda() sample_deltas = torch.FloatTensor().cuda() sample_regressions = torch.FloatTensor().cuda() sample_mask = torch.FloatTensor().cuda() return [sample_deltas, sample_mask, sample_logits, sample_regressions, sample_proposals, sample_target_deltas, sample_target_mask, sample_target_class_ids, sample_target_regressions] def get_results(self, img_shape, detections, detection_masks, box_results_list=None, return_masks=True): """ Restores batch dimension of merged detections, unmolds detections, creates and fills results dict. :param img_shape: :param detections: shape (n_final_detections, len(info)), where info=( y1, x1, y2, x2, (z1,z2), batch_ix, pred_class_id, pred_score ) :param detection_masks: (n_final_detections, n_classes, y, x, (z)) raw molded masks as returned by mask-head. :param box_results_list: None or list of output boxes for monitoring/plotting. each element is a list of boxes per batch element. :param return_masks: boolean. If True, full resolution masks are returned for all proposals (speed trade-off). :return: results_dict: dictionary with keys: 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary: [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...] 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, 1] only fg. vs. bg for now. class-specific return of masks will come with implementation of instance segmentation evaluation. """ detections = detections.cpu().data.numpy() if self.cf.dim == 2: detection_masks = detection_masks.permute(0, 2, 3, 1).cpu().data.numpy() else: detection_masks = detection_masks.permute(0, 2, 3, 4, 1).cpu().data.numpy() # det masks shape now (n_dets, y,x(,z), n_classes) # restore batch dimension of merged detections using the batch_ix info. batch_ixs = detections[:, self.cf.dim*2] detections = [detections[batch_ixs == ix] for ix in range(img_shape[0])] mrcnn_mask = [detection_masks[batch_ixs == ix] for ix in range(img_shape[0])] # mrcnn_mask: shape (b_size, variable, variable, n_classes), variable bc depends on single instance mask size if box_results_list == None: # for test_forward, where no previous list exists. box_results_list = [[] for _ in range(img_shape[0])] # seg_logits == seg_probs in mrcnn since mask head finishes with sigmoid (--> image space = [0,1]) seg_probs = [] # loop over batch and unmold detections. for ix in range(img_shape[0]): # final masks are one-hot encoded (b, n_classes, y, x, (z)) final_masks = np.zeros((self.cf.num_classes + 1, *img_shape[2:])) #+1 for bg, 0.5 bc mask head classifies only bg/fg with logits between 0,1--> bg is <0.5 if self.cf.num_classes + 1 != self.cf.num_seg_classes: self.logger.warning("n of roi-classifier head classes {} doesnt match cf.num_seg_classes {}".format( self.cf.num_classes + 1, self.cf.num_seg_classes)) if not 0 in detections[ix].shape: boxes = detections[ix][:, :self.cf.dim*2].astype(np.int32) class_ids = detections[ix][:, self.cf.dim*2 + 1].astype(np.int32) scores = detections[ix][:, self.cf.dim*2 + 2] masks = mrcnn_mask[ix][np.arange(boxes.shape[0]), ..., class_ids] regressions = detections[ix][:,self.cf.dim*2+3:] # Filter out detections with zero area. Often only happens in early # stages of training when the network weights are still a bit random. if self.cf.dim == 2: exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] else: exclude_ix = np.where( (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 5] - boxes[:, 4]) <= 0)[0] if exclude_ix.shape[0] > 0: boxes = np.delete(boxes, exclude_ix, axis=0) masks = np.delete(masks, exclude_ix, axis=0) class_ids = np.delete(class_ids, exclude_ix, axis=0) scores = np.delete(scores, exclude_ix, axis=0) regressions = np.delete(regressions, exclude_ix, axis=0) # Resize masks to original image size and set boundary threshold. if return_masks: for i in range(masks.shape[0]): #masks per this batch instance/element/image # Convert neural network mask to full size mask if self.cf.dim == 2: full_mask = mutils.unmold_mask_2D(masks[i], boxes[i], img_shape[2:]) else: full_mask = mutils.unmold_mask_3D(masks[i], boxes[i], img_shape[2:]) # take the maximum seg_logits per class of instances in that class, i.e., a pixel in a class # has the max seg_logit value over all instances of that class in one sample final_masks[class_ids[i]] = np.max((final_masks[class_ids[i]], full_mask), axis=0) final_masks[0] = np.full(final_masks[0].shape, 0.49999999) #effectively min_det_thres at 0.5 per pixel # add final predictions to results. if not 0 in boxes.shape: for ix2, coords in enumerate(boxes): box = {'box_coords': coords, 'box_type': 'det', 'box_score': scores[ix2], 'box_pred_class_id': class_ids[ix2]} #if (hasattr(self.cf, "convert_cl_to_rg") and self.cf.convert_cl_to_rg): if "regression_bin" in self.cf.prediction_tasks: # in this case, regression preds are actually the rg_bin_ids --> map to rg value the bin represents box['rg_bin'] = regressions[ix2].argmax() box['regression'] = self.cf.bin_id2rg_val[box['rg_bin']] else: box['regression'] = regressions[ix2] if hasattr(self.cf, "rg_val_to_bin_id") and \ any(['regression' in task for task in self.cf.prediction_tasks]): box.update({'rg_bin': self.cf.rg_val_to_bin_id(regressions[ix2])}) box_results_list[ix].append(box) # if no detections were made--> keep full bg mask (zeros). seg_probs.append(final_masks) # create and fill results dictionary. results_dict = {} results_dict['boxes'] = box_results_list results_dict['seg_preds'] = np.array(seg_probs) return results_dict def train_forward(self, batch, is_validation=False): """ train method (also used for validation monitoring). wrapper around forward pass of network. prepares input data for processing, computes losses, and stores outputs in a dictionary. :param batch: dictionary containing 'data', 'seg', etc. batch['roi_masks']: (b, n(b), c, h(n), w(n) (z(n))) list like roi_labels but with arrays (masks) inplace of integers. c==channels of the raw segmentation. :return: results_dict: dictionary with keys: 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary: [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...] 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes]. 'torch_loss': 1D torch tensor for backprop. 'class_loss': classification loss for monitoring. """ img = batch['data'] gt_boxes = batch['bb_target'] #axes = (0, 2, 3, 1) if self.cf.dim == 2 else (0, 2, 3, 4, 1) #gt_masks = [np.transpose(batch['roi_masks'][ii], axes=axes) for ii in range(len(batch['roi_masks']))] gt_masks = batch['roi_masks'] gt_class_ids = batch['class_targets'] if 'regression' in self.cf.prediction_tasks: gt_regressions = batch["regression_targets"] elif 'regression_bin' in self.cf.prediction_tasks: gt_regressions = batch["rg_bin_targets"] else: gt_regressions = None img = torch.from_numpy(img).cuda().float() batch_rpn_class_loss = torch.FloatTensor([0]).cuda() batch_rpn_bbox_loss = torch.FloatTensor([0]).cuda() # list of output boxes for monitoring/plotting. each element is a list of boxes per batch element. box_results_list = [[] for _ in range(img.shape[0])] #forward passes. 1. general forward pass, where no activations are saved in second stage (for performance # monitoring and loss sampling). 2. second stage forward pass of sampled rois with stored activations for backprop. rpn_class_logits, rpn_pred_deltas, proposal_boxes, detections, detection_masks = self.forward(img) mrcnn_pred_deltas, mrcnn_pred_mask, mrcnn_class_logits, mrcnn_regressions, sample_proposals, \ mrcnn_target_deltas, target_mask, target_class_ids, target_regressions = \ self.loss_samples_forward(gt_boxes, gt_masks, gt_class_ids, gt_regressions) # loop over batch for b in range(img.shape[0]): if len(gt_boxes[b]) > 0: # add gt boxes to output list for tix in range(len(gt_boxes[b])): gt_box = {'box_type': 'gt', 'box_coords': batch['bb_target'][b][tix]} for name in self.cf.roi_items: gt_box.update({name: batch[name][b][tix]}) box_results_list[b].append(gt_box) # match gt boxes with anchors to generate targets for RPN losses. rpn_match, rpn_target_deltas = mutils.gt_anchor_matching(self.cf, self.np_anchors, gt_boxes[b]) # add positive anchors used for loss to output list for monitoring. pos_anchors = mutils.clip_boxes_numpy(self.np_anchors[np.argwhere(rpn_match == 1)][:, 0], img.shape[2:]) for p in pos_anchors: box_results_list[b].append({'box_coords': p, 'box_type': 'pos_anchor'}) else: rpn_match = np.array([-1]*self.np_anchors.shape[0]) rpn_target_deltas = np.array([0]) rpn_match_gpu = torch.from_numpy(rpn_match).cuda() rpn_target_deltas = torch.from_numpy(rpn_target_deltas).float().cuda() # compute RPN losses. rpn_class_loss, neg_anchor_ix = compute_rpn_class_loss(rpn_class_logits[b], rpn_match_gpu, self.cf.shem_poolsize) rpn_bbox_loss = compute_rpn_bbox_loss(rpn_pred_deltas[b], rpn_target_deltas, rpn_match_gpu) batch_rpn_class_loss += rpn_class_loss /img.shape[0] batch_rpn_bbox_loss += rpn_bbox_loss /img.shape[0] # add negative anchors used for loss to output list for monitoring. # neg_anchor_ix = neg_ix come from shem and mark positions in roi_probs_neg = rpn_class_logits[neg_indices] # with neg_indices = rpn_match == -1 neg_anchors = mutils.clip_boxes_numpy(self.np_anchors[rpn_match == -1][neg_anchor_ix], img.shape[2:]) for n in neg_anchors: box_results_list[b].append({'box_coords': n, 'box_type': 'neg_anchor'}) # add highest scoring proposals to output list for monitoring. rpn_proposals = proposal_boxes[b][proposal_boxes[b, :, -1].argsort()][::-1] for r in rpn_proposals[:self.cf.n_plot_rpn_props, :-1]: box_results_list[b].append({'box_coords': r, 'box_type': 'prop'}) # add positive and negative roi samples used for mrcnn losses to output list for monitoring. if not 0 in sample_proposals.shape: rois = mutils.clip_to_window(self.cf.window, sample_proposals).cpu().data.numpy() for ix, r in enumerate(rois): box_results_list[int(r[-1])].append({'box_coords': r[:-1] * self.cf.scale, 'box_type': 'pos_class' if target_class_ids[ix] > 0 else 'neg_class'}) # compute mrcnn losses. mrcnn_class_loss = compute_mrcnn_class_loss(self.cf.prediction_tasks, mrcnn_class_logits, target_class_ids) mrcnn_bbox_loss = compute_mrcnn_bbox_loss(mrcnn_pred_deltas, mrcnn_target_deltas, target_class_ids) mrcnn_regressions_loss = compute_mrcnn_regression_loss(self.cf.prediction_tasks, mrcnn_regressions, target_regressions, target_class_ids) # mrcnn can be run without pixelwise annotations available (Faster R-CNN mode). # In this case, the mask_loss is taken out of training. if self.cf.frcnn_mode: mrcnn_mask_loss = torch.FloatTensor([0]).cuda() else: mrcnn_mask_loss = compute_mrcnn_mask_loss(mrcnn_pred_mask, target_mask, target_class_ids) loss = batch_rpn_class_loss + batch_rpn_bbox_loss +\ mrcnn_bbox_loss + mrcnn_mask_loss + mrcnn_class_loss + mrcnn_regressions_loss # run unmolding of predictions for monitoring and merge all results to one dictionary. return_masks = self.cf.return_masks_in_val if is_validation else self.cf.return_masks_in_train results_dict = self.get_results(img.shape, detections, detection_masks, box_results_list, return_masks=return_masks) #results_dict['seg_preds'] = results_dict['seg_preds'].argmax(axis=1).astype('uint8')[:,np.newaxis] if 'dice' in self.cf.metrics: results_dict['batch_dices'] = mutils.dice_per_batch_and_class( results_dict['seg_preds'], batch["seg"], self.cf.num_seg_classes, convert_to_ohe=True) results_dict['torch_loss'] = loss results_dict['class_loss'] = mrcnn_class_loss.item() results_dict['bbox_loss'] = mrcnn_bbox_loss.item() results_dict['mask_loss'] = mrcnn_mask_loss.item() results_dict['rg_loss'] = mrcnn_regressions_loss.item() results_dict['rpn_class_loss'] = rpn_class_loss.item() results_dict['rpn_bbox_loss'] = rpn_bbox_loss.item() return results_dict def test_forward(self, batch, return_masks=True): """ test method. wrapper around forward pass of network without usage of any ground truth information. prepares input data for processing and stores outputs in a dictionary. :param batch: dictionary containing 'data' :param return_masks: boolean. If True, full resolution masks are returned for all proposals (speed trade-off). :return: results_dict: dictionary with keys: 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary: [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...] 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes] """ img = batch['data'] img = torch.from_numpy(img).float().cuda() _, _, _, detections, detection_masks = self.forward(img) results_dict = self.get_results(img.shape, detections, detection_masks, return_masks=return_masks) return results_dict \ No newline at end of file