diff --git a/datasets/toy/configs.py b/datasets/toy/configs.py
index 6ae0db0..e67a249 100644
--- a/datasets/toy/configs.py
+++ b/datasets/toy/configs.py
@@ -1,490 +1,490 @@
 #!/usr/bin/env python
 # Copyright 2019 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 
 import sys
 import os
 sys.path.append(os.path.dirname(os.path.realpath(__file__)))
 import numpy as np
 from default_configs import DefaultConfigs
 from collections import namedtuple
 
 boxLabel = namedtuple('boxLabel', ["name", "color"])
 Label = namedtuple("Label", ['id', 'name', 'shape', 'radius', 'color', 'regression', 'ambiguities', 'gt_distortion'])
 binLabel = namedtuple("binLabel", ['id', 'name', 'color', 'bin_vals'])
 
 class Configs(DefaultConfigs):
 
     def __init__(self, server_env=None):
         super(Configs, self).__init__(server_env)
 
         #########################
         #         Prepro        #
         #########################
 
         self.pp_rootdir = os.path.join('/mnt/HDD2TB/Documents/data/toy', "cyl1ps_dev_exact")
         self.pp_npz_dir = self.pp_rootdir+"_npz"
 
         self.pre_crop_size = [320,320,8] #y,x,z; determines pp data shape (2D easily implementable, but only 3D for now)
         self.min_2d_radius = 6 #in pixels
         self.n_train_samples, self.n_test_samples = 80, 80
 
         # not actually real one-hot encoding (ohe) but contains more info: roi-overlap only within classes.
         self.pp_create_ohe_seg = False
         self.pp_empty_samples_ratio = 0.1
 
         self.pp_place_radii_mid_bin = True
         self.pp_only_distort_2d = True
         # outer-most intensity of blurred radii, relative to inner-object intensity. <1 for decreasing, > 1 for increasing.
         # e.g.: setting 0.1 means blurred edge has min intensity 10% as large as inner-object intensity.
         self.pp_blur_min_intensity = 0.2
 
         self.max_instances_per_sample = 3 #how many max instances over all classes per sample (img if 2d, vol if 3d)
         self.max_instances_per_class = self.max_instances_per_sample  # how many max instances per image per class
         self.noise_scale = 0.  # std-dev of gaussian noise
 
         self.ambigs_sampling = "gaussian" #"gaussian" or "uniform"
         """ radius_calib: gt distort for calibrating uncertainty. Range of gt distortion is inferable from
             image by distinguishing it from the rest of the object.
             blurring width around edge will be shifted so that symmetric rel to orig radius.
             blurring scale: if self.ambigs_sampling is uniform, distribution's non-zero range (b-a) will be sqrt(12)*scale
             since uniform dist has variance (b-a)²/12. b,a will be placed symmetrically around unperturbed radius.
             if sampling is gaussian, then scale parameter sets one std dev, i.e., blurring width will be orig_radius * std_dev * 2.
         """
         self.ambiguities = {
              #set which classes to apply which ambs to below in class labels
              #choose out of: 'outer_radius', 'inner_radius', 'radii_relations'.
              #kind              #probability   #scale (gaussian std, relative to unperturbed value)
             #"outer_radius":     (1.,            0.5),
             #"outer_radius_xy":  (1.,            0.5),
             #"inner_radius":     (0.5,            0.1),
             #"radii_relations":  (0.5,            0.1),
             "radius_calib":     (1.,            1./6)
         }
 
         # shape choices: 'cylinder', 'block'
         #                        id,    name,       shape,      radius,                 color,              regression,     ambiguities,    gt_distortion
         self.pp_classes = [Label(1,     'cylinder', 'cylinder', ((6,6,1),(40,40,8)),    (*self.blue, 1.),   "radius_2d",    (),             ()),
                            #Label(2,      'block',      'block',        ((6,6,1),(40,40,8)),  (*self.aubergine,1.),  "radii_2d", (), ('radius_calib',))
             ]
 
 
         #########################
         #         I/O           #
         #########################
 
         self.data_sourcedir = '/mnt/HDD2TB/Documents/data/toy/cyl1ps_exact'
 
         if server_env:
             self.data_sourcedir = '/datasets/data_ramien/toy/cyl1ps_exact_npz'
 
 
         self.test_data_sourcedir = os.path.join(self.data_sourcedir, 'test')
         self.data_sourcedir = os.path.join(self.data_sourcedir, "train")
 
         self.info_df_name = 'info_df.pickle'
 
         # one out of ['mrcnn', 'retina_net', 'retina_unet', 'detection_unet', 'ufrcnn', 'detection_fpn'].
-        self.model = 'mrcnn'
+        self.model = 'retina_unet'
         self.model_path = 'models/{}.py'.format(self.model if not 'retina' in self.model else 'retina_net')
         self.model_path = os.path.join(self.source_dir, self.model_path)
 
 
         #########################
         #      Architecture     #
         #########################
 
         # one out of [2, 3]. dimension the model operates in.
         self.dim = 2
 
         # 'class', 'regression', 'regression_bin', 'regression_ken_gal'
         # currently only tested mode is a single-task at a time (i.e., only one task in below list)
         # but, in principle, tasks could be combined (e.g., object classes and regression per class)
         self.prediction_tasks = ['class',]
 
         self.start_filts = 48 if self.dim == 2 else 18
         self.end_filts = self.start_filts * 4 if self.dim == 2 else self.start_filts * 2
         self.res_architecture = 'resnet50' # 'resnet101' , 'resnet50'
         self.norm = 'instance_norm' # one of None, 'instance_norm', 'batch_norm'
         self.relu = 'relu'
         # one of 'xavier_uniform', 'xavier_normal', or 'kaiming_normal', None (=default = 'kaiming_uniform')
         self.weight_init = None
 
         self.regression_n_features = 1  # length of regressor target vector
 
 
         #########################
         #      Data Loader      #
         #########################
 
         self.num_epochs = 32
         self.num_train_batches = 120 if self.dim == 2 else 80
-        self.batch_size = 16 if self.dim == 2 else 8
+        self.batch_size = 12 if self.dim == 2 else 8
 
         self.n_cv_splits = 4
         # select modalities from preprocessed data
         self.channels = [0]
         self.n_channels = len(self.channels)
 
         # which channel (mod) to show as bg in plotting, will be extra added to batch if not in self.channels
         self.plot_bg_chan = 0
         self.crop_margin = [20, 20, 1]  # has to be smaller than respective patch_size//2
         self.patch_size_2D = self.pre_crop_size[:2]
         self.patch_size_3D = self.pre_crop_size[:2]+[8]
 
         # patch_size to be used for training. pre_crop_size is the patch_size before data augmentation.
         self.patch_size = self.patch_size_2D if self.dim == 2 else self.patch_size_3D
 
         # ratio of free sampled batch elements before class balancing is triggered
         # (>0 to include "empty"/background patches.)
         self.batch_random_ratio = 0.2
         self.balance_target = "class_targets" if 'class' in self.prediction_tasks else "rg_bin_targets"
 
         self.observables_patient = []
         self.observables_rois = []
 
         self.seed = 3 #for generating folds
 
         #############################
         # Colors, Classes, Legends  #
         #############################
         self.plot_frequency = 1
 
         binary_bin_labels = [binLabel(1,  'r<=25',      (*self.green, 1.),      (1,25)),
                              binLabel(2,  'r>25',       (*self.red, 1.),        (25,))]
         quintuple_bin_labels = [binLabel(1,  'r2-10',   (*self.green, 1.),      (2,10)),
                                 binLabel(2,  'r10-20',  (*self.yellow, 1.),     (10,20)),
                                 binLabel(3,  'r20-30',  (*self.orange, 1.),     (20,30)),
                                 binLabel(4,  'r30-40',  (*self.bright_red, 1.), (30,40)),
                                 binLabel(5,  'r>40',    (*self.red, 1.), (40,))]
 
         # choose here if to do 2-way or 5-way regression-bin classification
         task_spec_bin_labels = quintuple_bin_labels
 
         self.class_labels = [
             # regression: regression-task label, either value or "(x,y,z)_radius" or "radii".
             # ambiguities: name of above defined ambig to apply to image data (not gt); need to be iterables!
             # gt_distortion: name of ambig to apply to gt only; needs to be iterable!
             #      #id  #name   #shape  #radius     #color              #regression #ambiguities    #gt_distortion
             Label(  0,  'bg',   None,   (0, 0, 0),  (*self.white, 0.),  (0, 0, 0),  (),             ())]
         if "class" in self.prediction_tasks:
             self.class_labels += self.pp_classes
         else:
             self.class_labels += [Label(1, 'object', 'object', ('various',), (*self.orange, 1.), ('radius_2d',), ("various",), ('various',))]
 
 
         if any(['regression' in task for task in self.prediction_tasks]):
             self.bin_labels = [binLabel(0,  'bg',       (*self.white, 1.),      (0,))]
             self.bin_labels += task_spec_bin_labels
             self.bin_id2label = {label.id: label for label in self.bin_labels}
             bins = [(min(label.bin_vals), max(label.bin_vals)) for label in self.bin_labels]
             self.bin_id2rg_val = {ix: [np.mean(bin)] for ix, bin in enumerate(bins)}
             self.bin_edges = [(bins[i][1] + bins[i + 1][0]) / 2 for i in range(len(bins) - 1)]
             self.bin_dict = {label.id: label.name for label in self.bin_labels if label.id != 0}
 
         if self.class_specific_seg:
           self.seg_labels = self.class_labels
 
         self.box_type2label = {label.name: label for label in self.box_labels}
         self.class_id2label = {label.id: label for label in self.class_labels}
         self.class_dict = {label.id: label.name for label in self.class_labels if label.id != 0}
 
         self.seg_id2label = {label.id: label for label in self.seg_labels}
         self.cmap = {label.id: label.color for label in self.seg_labels}
 
         self.plot_prediction_histograms = True
         self.plot_stat_curves = False
         self.has_colorchannels = False
         self.plot_class_ids = True
 
         self.num_classes = len(self.class_dict)
         self.num_seg_classes = len(self.seg_labels)
 
         #########################
         #   Data Augmentation   #
         #########################
         self.do_aug = True
         self.da_kwargs = {
             'mirror': True,
             'mirror_axes': tuple(np.arange(0, self.dim, 1)),
             'do_elastic_deform': False,
             'alpha': (500., 1500.),
             'sigma': (40., 45.),
             'do_rotation': False,
             'angle_x': (0., 2 * np.pi),
             'angle_y': (0., 0),
             'angle_z': (0., 0),
             'do_scale': False,
             'scale': (0.8, 1.1),
             'random_crop': False,
             'rand_crop_dist': (self.patch_size[0] / 2. - 3, self.patch_size[1] / 2. - 3),
             'border_mode_data': 'constant',
             'border_cval_data': 0,
             'order_data': 1
         }
 
         if self.dim == 3:
             self.da_kwargs['do_elastic_deform'] = False
             self.da_kwargs['angle_x'] = (0, 0.0)
             self.da_kwargs['angle_y'] = (0, 0.0)  # must be 0!!
             self.da_kwargs['angle_z'] = (0., 2 * np.pi)
 
         #########################
         #  Schedule / Selection #
         #########################
 
         # decide whether to validate on entire patient volumes (like testing) or sampled patches (like training)
         # the former is morge accurate, while the latter is faster (depending on volume size)
         self.val_mode = 'val_sampling' # one of 'val_sampling' , 'val_patient'
         if self.val_mode == 'val_patient':
             self.max_val_patients = 220  # if 'all' iterates over entire val_set once.
         if self.val_mode == 'val_sampling':
             self.num_val_batches = 25 if self.dim==2 else 15
 
         self.save_n_models = 2
         self.min_save_thresh = 1 if self.dim == 2 else 1  # =wait time in epochs
         if "class" in self.prediction_tasks:
             self.model_selection_criteria = {name + "_ap": 1. for name in self.class_dict.values()}
         elif any("regression" in task for task in self.prediction_tasks):
             self.model_selection_criteria = {name + "_ap": 0.2 for name in self.class_dict.values()}
             self.model_selection_criteria.update({name + "_avp": 0.8 for name in self.class_dict.values()})
 
         self.lr_decay_factor = 0.5
         self.scheduling_patience = int(self.num_epochs / 5)
         self.weight_decay = 1e-5
         self.clip_norm = None  # number or None
 
         #########################
         #   Testing / Plotting  #
         #########################
 
         self.test_aug_axes = (0,1,(0,1)) # None or list: choices are 0,1,(0,1)
         self.held_out_test_set = True
         self.max_test_patients = "all"  # number or "all" for all
 
         self.test_against_exact_gt = not 'exact' in self.data_sourcedir
         self.val_against_exact_gt = False # True is an unrealistic --> irrelevant scenario.
         self.report_score_level = ['rois']  # 'patient' or 'rois' (incl)
         self.patient_class_of_interest = 1
         self.patient_bin_of_interest = 2
 
         self.eval_bins_separately = False#"additionally" if not 'class' in self.prediction_tasks else False
         self.metrics = ['ap', 'auc', 'dice']
         if any(['regression' in task for task in self.prediction_tasks]):
             self.metrics += ['avp', 'rg_MAE_weighted', 'rg_MAE_weighted_tp',
                              'rg_bin_accuracy_weighted', 'rg_bin_accuracy_weighted_tp']
         if 'aleatoric' in self.model:
             self.metrics += ['rg_uncertainty', 'rg_uncertainty_tp', 'rg_uncertainty_tp_weighted']
         self.evaluate_fold_means = True
 
         self.ap_match_ious = [0.5]  # threshold(s) for considering a prediction as true positive
         self.min_det_thresh = 0.3
 
         self.model_max_iou_resolution = 0.2
 
         # aggregation method for test and val_patient predictions.
         # wbc = weighted box clustering as in https://arxiv.org/pdf/1811.08661.pdf,
         # nms = standard non-maximum suppression, or None = no clustering
         self.clustering = 'wbc'
         # iou thresh (exclusive!) for regarding two preds as concerning the same ROI
         self.clustering_iou = self.model_max_iou_resolution  # has to be larger than desired possible overlap iou of model predictions
 
         self.merge_2D_to_3D_preds = False
         self.merge_3D_iou = self.model_max_iou_resolution
         self.n_test_plots = 1  # per fold and rank
 
         self.test_n_epochs = self.save_n_models  # should be called n_test_ens, since is number of models to ensemble over during testing
         # is multiplied by (1 + nr of test augs)
 
         #########################
         #   Assertions          #
         #########################
         if not 'class' in self.prediction_tasks:
             assert self.num_classes == 1
 
         #########################
         #   Add model specifics #
         #########################
 
         {'mrcnn': self.add_mrcnn_configs, 'mrcnn_aleatoric': self.add_mrcnn_configs,
          'retina_net': self.add_mrcnn_configs, 'retina_unet': self.add_mrcnn_configs,
          'detection_unet': self.add_det_unet_configs, 'detection_fpn': self.add_det_fpn_configs
          }[self.model]()
 
     def rg_val_to_bin_id(self, rg_val):
         #only meant for isotropic radii!!
         # only 2D radii (x and y dims) or 1D (x or y) are expected
         return np.round(np.digitize(rg_val, self.bin_edges).mean())
 
 
     def add_det_fpn_configs(self):
 
-      self.learning_rate = [5 * 1e-4] * self.num_epochs
+      self.learning_rate = [1 * 1e-4] * self.num_epochs
       self.dynamic_lr_scheduling = True
       self.scheduling_criterion = 'torch_loss'
       self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max'
 
       self.n_roi_candidates = 4 if self.dim == 2 else 6
       # max number of roi candidates to identify per image (slice in 2D, volume in 3D)
 
       # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce')
       self.seg_loss_mode = 'wce'
       self.wce_weights = [1] * self.num_seg_classes if 'dice' in self.seg_loss_mode else [0.1, 1]
 
       self.fp_dice_weight = 1 if self.dim == 2 else 1
       # if <1, false positive predictions in foreground are penalized less.
 
       self.detection_min_confidence = 0.05
       # how to determine score of roi: 'max' or 'median'
       self.score_det = 'max'
 
     def add_det_unet_configs(self):
 
-      self.learning_rate = [5 * 1e-4] * self.num_epochs
+      self.learning_rate = [1 * 1e-4] * self.num_epochs
       self.dynamic_lr_scheduling = True
       self.scheduling_criterion = "torch_loss"
       self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max'
 
       # max number of roi candidates to identify per image (slice in 2D, volume in 3D)
       self.n_roi_candidates = 4 if self.dim == 2 else 6
 
       # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce')
       self.seg_loss_mode = 'wce'
       self.wce_weights = [1] * self.num_seg_classes if 'dice' in self.seg_loss_mode else [0.1, 1]
       # if <1, false positive predictions in foreground are penalized less.
       self.fp_dice_weight = 1 if self.dim == 2 else 1
 
       self.detection_min_confidence = 0.05
       # how to determine score of roi: 'max' or 'median'
       self.score_det = 'max'
 
       self.init_filts = 32
       self.kernel_size = 3  # ks for horizontal, normal convs
       self.kernel_size_m = 2  # ks for max pool
       self.pad = "same"  # "same" or integer, padding of horizontal convs
 
     def add_mrcnn_configs(self):
 
       self.learning_rate = [1e-4] * self.num_epochs
       self.dynamic_lr_scheduling = True  # with scheduler set in exec
       self.scheduling_criterion = max(self.model_selection_criteria, key=self.model_selection_criteria.get)
       self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max'
 
       # number of classes for network heads: n_foreground_classes + 1 (background)
       self.head_classes = self.num_classes + 1 if 'class' in self.prediction_tasks else 2
 
       # feed +/- n neighbouring slices into channel dimension. set to None for no context.
       self.n_3D_context = None
       if self.n_3D_context is not None and self.dim == 2:
         self.n_channels *= (self.n_3D_context * 2 + 1)
 
       self.detect_while_training = True
       # disable the re-sampling of mask proposals to original size for speed-up.
       # since evaluation is detection-driven (box-matching) and not instance segmentation-driven (iou-matching),
       # mask outputs are optional.
       self.return_masks_in_train = True
       self.return_masks_in_val = True
       self.return_masks_in_test = True
 
       # feature map strides per pyramid level are inferred from architecture. anchor scales are set accordingly.
       self.backbone_strides = {'xy': [4, 8, 16, 32], 'z': [1, 2, 4, 8]}
       # anchor scales are chosen according to expected object sizes in data set. Default uses only one anchor scale
       # per pyramid level. (outer list are pyramid levels (corresponding to BACKBONE_STRIDES), inner list are scales per level.)
       self.rpn_anchor_scales = {'xy': [[4], [8], [16], [32]], 'z': [[1], [2], [4], [8]]}
       # choose which pyramid levels to extract features from: P2: 0, P3: 1, P4: 2, P5: 3.
       self.pyramid_levels = [0, 1, 2, 3]
       # number of feature maps in rpn. typically lowered in 3D to save gpu-memory.
       self.n_rpn_features = 512 if self.dim == 2 else 64
 
       # anchor ratios and strides per position in feature maps.
       self.rpn_anchor_ratios = [0.5, 1., 2.]
       self.rpn_anchor_stride = 1
       # Threshold for first stage (RPN) non-maximum suppression (NMS):  LOWER == HARDER SELECTION
       self.rpn_nms_threshold = max(0.8, self.model_max_iou_resolution)
 
       # loss sampling settings.
       self.rpn_train_anchors_per_image = 4
       self.train_rois_per_image = 6 # per batch_instance
       self.roi_positive_ratio = 0.5
       self.anchor_matching_iou = 0.8
 
       # k negative example candidates are drawn from a pool of size k*shem_poolsize (stochastic hard-example mining),
       # where k<=#positive examples.
       self.shem_poolsize = 2
 
       self.pool_size = (7, 7) if self.dim == 2 else (7, 7, 3)
       self.mask_pool_size = (14, 14) if self.dim == 2 else (14, 14, 5)
       self.mask_shape = (28, 28) if self.dim == 2 else (28, 28, 10)
 
       self.rpn_bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2])
       self.bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2])
       self.window = np.array([0, 0, self.patch_size[0], self.patch_size[1], 0, self.patch_size_3D[2]])
       self.scale = np.array([self.patch_size[0], self.patch_size[1], self.patch_size[0], self.patch_size[1],
                              self.patch_size_3D[2], self.patch_size_3D[2]])  # y1,x1,y2,x2,z1,z2
 
       if self.dim == 2:
         self.rpn_bbox_std_dev = self.rpn_bbox_std_dev[:4]
         self.bbox_std_dev = self.bbox_std_dev[:4]
         self.window = self.window[:4]
         self.scale = self.scale[:4]
 
       self.plot_y_max = 1.5
       self.n_plot_rpn_props = 5 if self.dim == 2 else 30  # per batch_instance (slice in 2D / patient in 3D)
 
       # pre-selection in proposal-layer (stage 1) for NMS-speedup. applied per batch element.
       self.pre_nms_limit = 2000 if self.dim == 2 else 4000
 
       # n_proposals to be selected after NMS per batch element. too high numbers blow up memory if "detect_while_training" is True,
       # since proposals of the entire batch are forwarded through second stage as one "batch".
       self.roi_chunk_size = 1300 if self.dim == 2 else 500
       self.post_nms_rois_training = 200 * (self.head_classes-1) if self.dim == 2 else 400
       self.post_nms_rois_inference = 200 * (self.head_classes-1)
 
       # Final selection of detections (refine_detections)
       self.model_max_instances_per_batch_element = 9 if self.dim == 2 else 18 # per batch element and class.
       self.detection_nms_threshold = self.model_max_iou_resolution  # needs to be > 0, otherwise all predictions are one cluster.
       self.model_min_confidence = 0.2  # iou for nms in box refining (directly after heads), should be >0 since ths>=x in mrcnn.py
 
       if self.dim == 2:
         self.backbone_shapes = np.array(
           [[int(np.ceil(self.patch_size[0] / stride)),
             int(np.ceil(self.patch_size[1] / stride))]
            for stride in self.backbone_strides['xy']])
       else:
         self.backbone_shapes = np.array(
           [[int(np.ceil(self.patch_size[0] / stride)),
             int(np.ceil(self.patch_size[1] / stride)),
             int(np.ceil(self.patch_size[2] / stride_z))]
            for stride, stride_z in zip(self.backbone_strides['xy'], self.backbone_strides['z']
                                        )])
 
       if self.model == 'retina_net' or self.model == 'retina_unet':
         # whether to use focal loss or SHEM for loss-sample selection
         self.focal_loss = False
         # implement extra anchor-scales according to https://arxiv.org/abs/1708.02002
         self.rpn_anchor_scales['xy'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in
                                         self.rpn_anchor_scales['xy']]
         self.rpn_anchor_scales['z'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in
                                        self.rpn_anchor_scales['z']]
         self.n_anchors_per_pos = len(self.rpn_anchor_ratios) * 3
 
         # pre-selection of detections for NMS-speedup. per entire batch.
         self.pre_nms_limit = (500 if self.dim == 2 else 6250) * self.batch_size
 
         # anchor matching iou is lower than in Mask R-CNN according to https://arxiv.org/abs/1708.02002
         self.anchor_matching_iou = 0.7
 
         if self.model == 'retina_unet':
           self.operate_stride1 = True
diff --git a/models/detection_fpn.py b/models/detection_fpn.py
index b59e51d..4cf58ef 100644
--- a/models/detection_fpn.py
+++ b/models/detection_fpn.py
@@ -1,176 +1,176 @@
 #!/usr/bin/env python
 # Copyright 2019 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 
 """
 Unet-like Backbone architecture, with non-parametric heuristics for box detection on semantic segmentation outputs.
 """
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.autograd import Variable
 from scipy.ndimage.measurements import label as lb
 
 import utils.exp_utils as utils
 import utils.model_utils as mutils
 
 
 class net(nn.Module):
 
     def __init__(self, cf, logger):
 
         super(net, self).__init__()
         self.cf = cf
         self.logger = logger
         backbone = utils.import_module('bbone', cf.backbone_path)
         self.logger.info("loaded backbone from {}".format(self.cf.backbone_path))
         conv_gen = backbone.ConvGenerator(cf.dim)
 
         # set operate_stride1=True to generate a unet-like FPN.)
         self.fpn = backbone.FPN(cf, conv=conv_gen, relu_enc=cf.relu, operate_stride1=True)
-        self.conv_final = conv_gen(cf.end_filts, cf.num_seg_classes, ks=1, pad=0, norm=cf.norm, relu=None)
+        self.conv_final = conv_gen(cf.end_filts, cf.num_seg_classes, ks=1, pad=0, norm=None, relu=None)
 
         #initialize parameters
         if self.cf.weight_init=="custom":
             logger.info("Tried to use custom weight init which is not defined. Using pytorch default.")
         elif self.cf.weight_init:
             mutils.initialize_weights(self)
         else:
             logger.info("using default pytorch weight init")
 
 
     def forward(self, x):
         """
         forward pass of network.
         :param x: input image. shape (b, c, y, x, (z))
         :return: seg_logits: shape (b, n_classes, y, x, (z))
         :return: out_box_coords: list over n_classes. elements are arrays(b, n_rois, (y1, x1, y2, x2, (z1), (z2)))
         :return: out_max_scores: list over n_classes. elements are arrays(b, n_rois)
         """
 
         out_features = self.fpn(x)[0] #take only pyramid output of stride 1
 
         seg_logits = self.conv_final(out_features)
         out_box_coords, out_max_scores = [], []
         smax = F.softmax(seg_logits.detach(), dim=1).cpu().data.numpy()
 
         for cl in range(1, len(self.cf.class_dict.keys()) + 1):
             hard_mask = np.copy(smax).argmax(1)
             hard_mask[hard_mask != cl] = 0
             hard_mask[hard_mask == cl] = 1
             # perform connected component analysis on argmaxed predictions,
             # draw boxes around components and return coordinates.
             box_coords, rois = mutils.get_coords(hard_mask, self.cf.n_roi_candidates, self.cf.dim)
 
             # for each object, choose the highest softmax score (in the respective class)
             # of all pixels in the component as object score.
             max_scores = [[] for _ in range(x.shape[0])]
             for bix, broi in enumerate(rois):
                 for nix, nroi in enumerate(broi):
                     score_det = np.max if self.cf.score_det=="max" else np.median #score determination
                     max_scores[bix].append(score_det(smax[bix, cl][nroi > 0]))
             out_box_coords.append(box_coords)
             out_max_scores.append(max_scores)
         return seg_logits, out_box_coords, out_max_scores
 
     def train_forward(self, batch, **kwargs):
         """
         train method (also used for validation monitoring). wrapper around forward pass of network. prepares input data
         for processing, computes losses, and stores outputs in a dictionary.
         :param batch: dictionary containing 'data', 'seg', etc.
         :param kwargs:
         :return: results_dict: dictionary with keys:
                 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary:
                         [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...]
                 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes]
                 'torch_loss': 1D torch tensor for backprop.
                 'class_loss': classification loss for monitoring. here: dummy array, since no classification conducted.
         """
 
         img = torch.from_numpy(batch['data']).cuda().float()
         seg = torch.from_numpy(batch['seg']).cuda().long()
         seg_ohe = torch.from_numpy(mutils.get_one_hot_encoding(batch['seg'], self.cf.num_seg_classes)).cuda()
         results_dict = {}
         seg_logits, box_coords, max_scores = self.forward(img)
 
         # no extra class loss applied in this model. pass dummy tensor for monitoring.
         results_dict['class_loss'] = np.nan
 
         results_dict['boxes'] = [[] for _ in range(img.shape[0])]
         for cix in range(len(self.cf.class_dict.keys())):
             for bix in range(img.shape[0]):
                 for rix in range(len(max_scores[cix][bix])):
                     if max_scores[cix][bix][rix] > self.cf.detection_min_confidence:
                         results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]),
                                     'box_score': max_scores[cix][bix][rix],
                                     'box_pred_class_id': cix + 1, # add 0 for background.
                                     'box_type': 'det'})
 
         for bix in range(img.shape[0]):
             for tix in range(len(batch['bb_target'][bix])):
                 gt_box = {'box_coords': batch['bb_target'][bix][tix], 'box_type': 'gt'}
                 for name in self.cf.roi_items:
                     gt_box.update({name: batch[name][bix][tix]})
 
                 results_dict['boxes'][bix].append(gt_box)
 
         # compute segmentation loss as either weighted cross entropy, dice loss, or the sum of both.
         loss = torch.tensor([0.], dtype=torch.float, requires_grad=False).cuda()
         seg_pred = F.softmax(seg_logits, dim=1)
         if self.cf.seg_loss_mode == 'dice' or self.cf.seg_loss_mode == 'dice_wce':
             loss += 1 - mutils.batch_dice(seg_pred, seg_ohe.float(), false_positive_weight=float(self.cf.fp_dice_weight))
 
         if self.cf.seg_loss_mode == 'wce' or self.cf.seg_loss_mode == 'dice_wce':
             loss += F.cross_entropy(seg_logits, seg[:, 0], weight=torch.FloatTensor(self.cf.wce_weights).cuda())
 
         results_dict['torch_loss'] = loss
         seg_pred = seg_pred.argmax(dim=1).unsqueeze(dim=1).cpu().data.numpy()
         results_dict['seg_preds'] = seg_pred
         if 'dice' in self.cf.metrics:
             results_dict['batch_dices'] = mutils.dice_per_batch_and_class(seg_pred, batch["seg"],
                                                                            self.cf.num_seg_classes, convert_to_ohe=True)
         #self.logger.info("loss: {0:.2f}".format(loss.item()))
         return results_dict
 
 
     def test_forward(self, batch, **kwargs):
         """
         test method. wrapper around forward pass of network without usage of any ground truth information.
         prepares input data for processing and stores outputs in a dictionary.
         :param batch: dictionary containing 'data'
         :param kwargs:
         :return: results_dict: dictionary with keys:
                'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary:
                        [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...]
                'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes]
         """
         img = torch.FloatTensor(batch['data']).cuda()
         seg_logits, box_coords, max_scores = self.forward(img)
 
         results_dict = {}
         results_dict['boxes'] = [[] for _ in range(img.shape[0])]
         for cix in range(len(box_coords)):
             for bix in range(img.shape[0]):
                 for rix in range(len(max_scores[cix][bix])):
                     if max_scores[cix][bix][rix] > self.cf.detection_min_confidence:
                         results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]),
                                     'box_score': max_scores[cix][bix][rix],
                                     'box_pred_class_id': cix + 1,
                                     'box_type': 'det'})
         results_dict['seg_preds'] = F.softmax(seg_logits, dim=1).cpu().data.numpy()
 
         return results_dict
 
diff --git a/models/detection_unet.py b/models/detection_unet.py
index 20394ba..dd7e293 100644
--- a/models/detection_unet.py
+++ b/models/detection_unet.py
@@ -1,545 +1,545 @@
 import warnings
 import os
 import shutil
 import time
 
 import math
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
 
 
 import utils.exp_utils as utils
 import utils.model_utils as mutils
 
 '''
 Use nn.DataParallel to use more than one GPU
 '''
 
 def center_crop_2D_image_batched(img, crop_size):
     # from batch generator tools from https://github.com/MIC-DKFZ/batchgenerators
     # dim 0 is batch, dim 1 is channel, dim 2 and 3 are x y
     center = np.array(img.shape[2:]) / 2.
     if not hasattr(crop_size, "__iter__"):
         center_crop = [int(crop_size)] * (len(img.shape) - 2)
     else:
         center_crop = np.array(crop_size)
         assert len(center_crop) == (len(
             img.shape) - 2), "If you provide a list/tuple as center crop make sure it has the same len as your data has dims (2d)"
     return img[:, :, int(center[0] - center_crop[0] / 2.):int(center[0] + center_crop[0] / 2.),
            int(center[1] - center_crop[1] / 2.):int(center[1] + center_crop[1] / 2.)]
 
 def center_crop_3D_image_batched(img, crop_size):
     # dim 0 is batch, dim 1 is channel, dim 2, 3 and 4 are x y z
     center = np.array(img.shape[2:]) / 2.
     if not hasattr(crop_size, "__iter__"):
         center_crop = np.array([int(crop_size)] * (len(img.shape) - 2))
     else:
         center_crop = np.array(crop_size)
         assert len(center_crop) == (len(
             img.shape) - 2), "If you provide a list/tuple as center crop make sure it has the same len as your data has dims (3d)"
     return img[:, :, int(center[0] - center_crop[0] / 2.):int(center[0] + center_crop[0] / 2.),
            int(center[1] - center_crop[1] / 2.):int(center[1] + center_crop[1] / 2.),
            int(center[2] - center_crop[2] / 2.):int(center[2] + center_crop[2] / 2.)]
 
 
 def centercrop_vol(tensor, size):
     """:param tensor: tensor whose last two dimensions should be centercropped to size
     :param size: 2- or 3-int tuple of target (height, width(,depth))
     """
     dim = len(size)
     if dim==2:
         center_crop_2D_image_batched(tensor, size)
     elif dim==3:
         center_crop_2D_image_batched(tensor, size)
     else:
         raise Exception("invalid size argument {} encountered in centercrop".format(size))
 
     """this below worked so fine, when optional z-dim was first spatial dim instead of last
     h_, w_ = size[0], size[1] #target size
     (h,w) = tensor.size()[-2:] #orig size
     dh, dw = h-h_, w-w_ #deltas
     if dim == 3:
         d_ = size[2]
         d  = tensor.size()[-3]
         dd = d-d_
         
     if h_<h:
         tensor = tensor[...,dh//2:-int(math.ceil(dh/2.)),:] #crop height
     elif h_>=h:
         print("no h crop")
         warn.warn("no height crop applied since target dims larger equal orig dims")
     if w_<w:
         tensor = tensor[...,dw//2:-int(math.ceil(dw/2.))]
     elif w_>=w:
         warn.warn("no width crop applied since target dims larger equal orig dims")
     if dim == 3:
         if d_ < d:
             tensor = tensor[..., dd // 2:-int(math.ceil(dd / 2.)),:,:]
         elif d_ >= d:
             warn.warn("no depth crop applied since target dims larger equal orig dims")
     """
 
     return tensor
     
 def dimcalc_conv2D(dims,F=3,s=1,pad="same"):
     r"""
     :param dims: orig width, height as (2,)-np.array
     :param F: quadratic kernel size
     :param s: stride
     :param pad: pad
     """
     if pad=="same":
         pad = (F-1)//2
     h, w = dims[0], dims[1] 
     return np.floor([(h + 2*pad-F)/s+1, (w+ 2*pad-F)/s+1])
 
 def dimcalc_transconv2D(dims,F=2,s=2):
     r"""
     :param dims: orig width, height as (2,)-np.array
     :param F: quadratic kernel size
     :param s: stride
     """    
 
     h, w = dims[0], dims[1]
     return np.array([(h-1)*s+F, (w-1)*s+F])
 
 def dimcalc_Unet_std(init_dims, F=3, F_pool=2, F_up=2, s=1, s_pool=2, s_up=2, pad=0):
     r"""Calculate theoretic dimensions of feature maps throughout layers of this U-net.
     """
     dims = np.array(init_dims)
     print("init dims: ", dims)
     
     def down(dims):
         for i in range(2):
             dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad)       
         dims = dimcalc_conv2D(dims, F=F_pool, s=s_pool)     
         return dims.astype(int)    
     def up(dims):
         for i in range(2):
             dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad)
         dims = dimcalc_transconv2D(dims, F=F_up,s=s_up)
         return dims.astype(int)
     
     stage = 1
     for i in range(4):
         dims = down(dims)
         print("stage ", stage, ": ", dims)
         stage+=1
     for i in range(4):
         dims = up(dims)
         print("stage ", stage, ": ", dims)
         stage+=1
     for i in range(2):
         dims = dimcalc_conv2D(dims,F=F,s=s, pad=pad).astype(int)
     print("final output size: ", dims)
     return dims
 
 def dimcalc_Unet(init_dims, F=3, F_pool=2, F_up=2, s=1, s_pool=2, s_up=2, pad=0):
     r"""Calculate theoretic dimensions of feature maps throughout layers of this U-net.
     """
     dims = np.array(init_dims)
     print("init dims: ", dims)
     
     def down(dims):
         for i in range(3):
             dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad)       
         dims = dimcalc_conv2D(dims, F=F_pool, s=s_pool)     
         return dims.astype(int)    
     def up(dims):
         dims = dimcalc_transconv2D(dims, F=F_up,s=s_up)
         for i in range(3):
             dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad)
         return dims.astype(int)
     
     stage = 1
     for i in range(6):
         dims = down(dims)
         print("stage ", stage, ": ", dims)
         stage+=1
     for i in range(3):
         dims = dimcalc_conv2D(dims, F=F, s=s, pad=pad)
     for i in range(6):
         dims = up(dims)
         print("stage ", stage, ": ", dims)
         stage+=1
     dims = dims.astype(int)
     print("final output size: ", dims)
     return dims
 
 
 
 class horiz_conv(nn.Module):
     def __init__(self, in_chans, out_chans, kernel_size, c_gen, norm, pad=0, relu="relu", bottleneck=True):
         super(horiz_conv, self).__init__()
         #TODO maybe make res-block?
         if bottleneck:
             bottleneck = int(np.round((in_chans+out_chans)*3/8))
             #print("bottleneck:", bottleneck)
         else:
             bottleneck = out_chans
         self.conv = nn.Sequential(
             c_gen(in_chans, bottleneck, kernel_size, pad=pad, norm=norm, relu=relu), #TODO maybe use norm only on last conv?
             c_gen(bottleneck, out_chans, kernel_size, pad=pad, norm=norm, relu=relu), #TODO maybe make bottleneck?
             #c_gen(out_chans, out_chans, kernel_size, pad=pad, norm=norm, relu=relu),
             )
     def forward(self, x):
         x = self.conv(x)
         return x
 
 class up(nn.Module):
     def __init__(self, in_chans, out_chans, kernel_size, interpol, c_gen, norm, pad=0, relu="relu", stride_ip=2):
         super(up, self).__init__()
         self.dim = c_gen.dim
         self.upsample = interpol(stride_ip, "bilinear") if self.dim==2 else interpol(stride_ip, "trilinear") #TODO check if fits with spatial dims order in data
         self.reduce_chans = c_gen(in_chans, out_chans, ks=1, norm=norm, relu=None)
         self.horiz = horiz_conv(out_chans*2, out_chans, kernel_size, c_gen, norm=norm, pad=pad, relu=relu)
 
     def forward(self, x, skip_inp):
         #TODO maybe add highway weights in skips?
         x = self.upsample(x)
         x = self.reduce_chans(x)
         #print("shape x, skip", x.shape, skip_inp.shape)
         targ_size = x.size()[-self.dim:] #ft map x,y,z (spatial)
         skip_inp = centercrop_vol(skip_inp, targ_size)
         assert targ_size == skip_inp.size()[-self.dim:], "corresp. skip and forward dimensions don't match"
         x = torch.cat((x,skip_inp),dim=1)
         x = self.horiz(x)
         return x
 
    
 class net(nn.Module):
     r"""U-Net with few more steps than standard.
     
     Dimensions: 
         feature maps have dims ...xhxwxd, d=feature map depth, h, w = orig 
         img height, width. h,w each are downsized by unpadded forward-convs and pooling,
         upsized by upsampling or upconvolution.
         If :math:`F\times F` is the single kernel_size and stride is :math:`s\geq 1`, 
         :math:`k` is the number of kernels in the conv, i.e. the resulting feature map depth,
         (all may differ between operations), then
     
     :Forward Conv: input  :math:`h \times w \times d` is converted to
     .. math:: \left[ (h-F)//s+1 \right] \times \left[ (w-F)//s+1 \right] \times k
     
     :Pooling: input  :math:`h \times w \times d` is converted to
     .. math:: \left[ (h-F)//s+1 \right] \times \left[ (w-F)//s+1 \right] \times d,
     pooling filters have no depths => orig depths preserved.
 
     :Up-Conv.: input  :math:`h \times w \times d` is converted to
     .. math:: \left[ (h-1)s + F \right] \times \left[ (w-1)s + F \right] \times k
     """
 
 
     def down(self, in_chans, out_chans, kernel_size, kernel_size_m, pad=0, relu="relu",maintain_z=False):
         """generate encoder block
         :param in_chans:
         :param out_chans:
         :param kernel_size:
         :param pad:
         :return:
         """
         if maintain_z and self.dim==3:
             stride_pool = (2,2,1)
             if not hasattr(kernel_size_m, "__iter__"):
                 kernel_size_m = [kernel_size_m]*self.dim
             kernel_size_m = (*kernel_size_m[:-1], 1)
         else:
             stride_pool = 2
         module = nn.Sequential(
             nn.MaxPool2d(kernel_size_m, stride=stride_pool) if self.dim == 2 else nn.MaxPool3d(
                 kernel_size_m, stride=stride_pool),
             #--> needs stride 2 in z in upsampling as well!
             horiz_conv(in_chans, out_chans, kernel_size, self.c_gen, self.norm, pad, relu=relu)
         )
         return module
 
     def up(self, in_chans, out_chans, kernel_size, pad=0, relu="relu", maintain_z=False):
         """generate decoder block
         :param in_chans:
         :param out_chans:
         :param kernel_size:
         :param pad:
         :param relu:
         :return:
         """
         if maintain_z and self.dim==3:
             stride_ip = (2,2,1)
         else:
             stride_ip = 2
 
         module = up(in_chans, out_chans, kernel_size, self.Interpolator, self.c_gen, norm=self.norm, pad=pad,
                     relu=relu, stride_ip=stride_ip)
 
         return module
 
 
     def __init__(self, cf, logger):
         super(net, self).__init__()
 
         self.cf = cf
         self.dim = cf.dim
         self.norm = cf.norm
         self.logger = logger
         backbone = utils.import_module('bbone', cf.backbone_path)
         self.c_gen = backbone.ConvGenerator(cf.dim)
         self.Interpolator = backbone.Interpolate
 
         #down = DownBlockGen(cf.dim)
         #up = UpBlockGen(cf.dim, backbone.Interpolate)
         down = self.down
         up = self.up
 
         pad = cf.pad
         if pad=="same":
             pad = (cf.kernel_size-1)//2
 
         
         self.dims = "not yet recorded"
         self.is_cuda = False
               
         self.init = horiz_conv(len(cf.channels), cf.init_filts, cf.kernel_size, self.c_gen, self.norm, pad=pad,
                                relu=cf.relu)
         
         self.down1 = down(cf.init_filts,    cf.init_filts*2,  cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu)
         self.down2 = down(cf.init_filts*2,  cf.init_filts*4,  cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu)
         self.down3 = down(cf.init_filts*4,  cf.init_filts*6,  cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu)
         self.down4 = down(cf.init_filts*6,  cf.init_filts*8,  cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu,
                           maintain_z=True)
         self.down5 = down(cf.init_filts*8,  cf.init_filts*12, cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu,
                           maintain_z=True)
         #self.down6 = down(cf.init_filts*10, cf.init_filts*14, cf.kernel_size, cf.kernel_size_m, pad=pad, relu=cf.relu)
         
         #self.up1 = up(cf.init_filts*14, cf.init_filts*10, cf.kernel_size, pad=pad, relu=cf.relu)
         self.up2 = up(cf.init_filts*12, cf.init_filts*8,  cf.kernel_size, pad=pad, relu=cf.relu, maintain_z=True)
         self.up3 = up(cf.init_filts*8,  cf.init_filts*6,  cf.kernel_size, pad=pad, relu=cf.relu, maintain_z=True)
         self.up4 = up(cf.init_filts*6,  cf.init_filts*4,  cf.kernel_size, pad=pad, relu=cf.relu)
         self.up5 = up(cf.init_filts*4,  cf.init_filts*2,  cf.kernel_size, pad=pad, relu=cf.relu)
         self.up6 = up(cf.init_filts*2,  cf.init_filts,    cf.kernel_size, pad=pad, relu=cf.relu)
         
-        self.seg = self.c_gen(cf.init_filts, cf.num_seg_classes, 1, norm=None, relu=None) #TODO maybe apply norm too?
+        self.seg = self.c_gen(cf.init_filts, cf.num_seg_classes, 1, norm=None, relu=None)
 
 
         # initialize parameters
         if self.cf.weight_init == "custom":
             logger.info("Tried to use custom weight init which is not defined. Using pytorch default.")
         elif self.cf.weight_init:
             mutils.initialize_weights(self)
         else:
             logger.info("using default pytorch weight init")
         
     
     def forward(self, x):
         r'''Forward application of network-function.
         
         :param x: input to the network, expected as torch.tensor of dims
         .. math:: batch\_size \times channels \times height \times width
         requires_grad should be True for training
         '''
         #self.dims = np.array([x.size()[-self.dim-1:]])
         
         x1 = self.init(x)
         #self.dims = np.vstack((self.dims, x1.size()[-self.dim-1:]))
         
         #---downwards---
         x2 = self.down1(x1)
         #self.dims = np.vstack((self.dims, x2.size()[-self.dim-1:]))
         x3 = self.down2(x2)
         #self.dims = np.vstack((self.dims, x3.size()[-self.dim-1:]))
         x4 = self.down3(x3)
         #self.dims = np.vstack((self.dims, x4.size()[-self.dim-1:]))
         x5 = self.down4(x4)
         #self.dims = np.vstack((self.dims, x5.size()[-self.dim-1:]))
         #x6 = self.down5(x5)
         #self.dims = np.vstack((self.dims, x6.size()[-self.dim-1:]))
         
         #---bottom---
         x = self.down5(x5)
         #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:]))
         
         #---upwards---
         #x = self.up1(x, x6)
         #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:]))
         x = self.up2(x, x5)
         #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:]))
         x = self.up3(x, x4)
         #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:]))
         x = self.up4(x, x3)
         #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:]))
         x = self.up5(x, x2)
         #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:]))
 
         x = self.up6(x, x1)
         #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:]))
 
         # ---final---
         x = self.seg(x)
         #self.dims = np.vstack((self.dims, x.size()[-self.dim-1:]))
 
         seg_logits = x
         out_box_coords, out_scores = [], []
         seg_probs = F.softmax(seg_logits.detach(), dim=1).cpu().data.numpy()
         #seg_probs = F.softmax(seg_logits, dim=1)
 
         assert seg_logits.shape[1]==self.cf.num_seg_classes
         for cl in range(1, seg_logits.shape[1]):
             hard_mask = np.copy(seg_probs).argmax(1)
             #hard_mask = seg_probs.clone().argmax(1)
             hard_mask[hard_mask != cl] = 0
             hard_mask[hard_mask == cl] = 1
             # perform connected component analysis on argmaxed predictions,
             # draw boxes around components and return coordinates.
             box_coords, rois = mutils.get_coords(hard_mask, self.cf.n_roi_candidates, self.cf.dim)
 
             # for each object, choose the highest softmax score (in the respective class)
             # of all pixels in the component as object score.
             scores = [[] for b_inst in range(x.shape[0])]  # np.zeros((out_features.shape[0], self.cf.n_roi_candidates))
             for b_inst, brois in enumerate(rois):
                 for nix, nroi in enumerate(brois):
                     score_det = np.max if self.cf.score_det == "max" else np.median  # score determination
                     scores[b_inst].append(score_det(seg_probs[b_inst, cl][nroi > 0]))
             out_box_coords.append(box_coords)
             out_scores.append(scores)
 
         return seg_logits, out_box_coords, out_scores
 
     # noinspection PyCallingNonCallable
     def train_forward(self, batch, **kwargs):
         """
         train method (also used for validation monitoring). wrapper around forward pass of network. prepares input data
         for processing, computes losses, and stores outputs in a dictionary.
         :param batch: dictionary containing 'data', 'seg', etc.
         :param kwargs:
         :return: results_dict: dictionary with keys:
                 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary:
                         [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...]
                 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes]
                 'torch_loss': 1D torch tensor for backprop.
                 'class_loss': classification loss for monitoring. here: dummy array, since no classification conducted.
         """
 
         img = torch.from_numpy(batch["data"]).float().cuda()
         seg = torch.from_numpy(batch["seg"]).long().cuda()
         seg_ohe = torch.from_numpy(mutils.get_one_hot_encoding(batch['seg'], self.cf.num_seg_classes)).float().cuda()
 
         results_dict = {}
         seg_logits, box_coords, scores = self.forward(img)
 
         # no extra class loss applied in this model. pass dummy tensor for monitoring.
         results_dict['class_loss'] = np.nan
 
         results_dict['boxes'] = [[] for _ in range(img.shape[0])]
         for cix in range(len(self.cf.class_dict.keys())):
             for bix in range(img.shape[0]):
                 for rix in range(len(scores[cix][bix])):
                     if scores[cix][bix][rix] > self.cf.detection_min_confidence:
                         results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]),
                                                            'box_score': scores[cix][bix][rix],
                                                            'box_pred_class_id': cix + 1,  # add 0 for background.
                                                            'box_type': 'det',
                                                            })
 
         for bix in range(img.shape[0]): #bix = batch-element index
             for tix in range(len(batch['bb_target'][bix])): #target index
                 gt_box = {'box_coords': batch['bb_target'][bix][tix], 'box_type': 'gt'}
                 for name in self.cf.roi_items:
                     gt_box.update({name: batch[name][bix][tix]})
                 results_dict['boxes'][bix].append(gt_box)
 
         # compute segmentation loss as either weighted cross entropy, dice loss, or the sum of both.
         seg_pred = F.softmax(seg_logits, 1)
         loss = torch.tensor([0.], dtype=torch.float, requires_grad=False).cuda()
         if self.cf.seg_loss_mode == 'dice' or self.cf.seg_loss_mode == 'dice_wce':
             loss += 1 - mutils.batch_dice(seg_pred, seg_ohe.float(),
                                          false_positive_weight=float(self.cf.fp_dice_weight))
 
         if self.cf.seg_loss_mode == 'wce' or self.cf.seg_loss_mode == 'dice_wce':
             loss += F.cross_entropy(seg_logits, seg[:, 0], weight=torch.FloatTensor(self.cf.wce_weights).cuda(),
                                     reduction='mean')
 
         results_dict['torch_loss'] = loss
         seg_pred = seg_pred.argmax(dim=1).unsqueeze(dim=1).cpu().data.numpy()
         results_dict['seg_preds'] = seg_pred
         if 'dice' in self.cf.metrics:
             results_dict['batch_dices'] = mutils.dice_per_batch_and_class(seg_pred, batch["seg"],
                                                                            self.cf.num_seg_classes, convert_to_ohe=True)
             #print("batch dice scores ", results_dict['batch_dices'] )
         # self.logger.info("loss: {0:.2f}".format(loss.item()))
         return results_dict
 
     def test_forward(self, batch, **kwargs):
         """
         test method. wrapper around forward pass of network without usage of any ground truth information.
         prepares input data for processing and stores outputs in a dictionary.
         :param batch: dictionary containing 'data'
         :param kwargs:
         :return: results_dict: dictionary with keys:
                'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary:
                        [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...]
                'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes]
         """
         img = torch.FloatTensor(batch['data']).cuda()
         seg_logits, box_coords, scores = self.forward(img)
 
         results_dict = {}
         results_dict['boxes'] = [[] for b_inst in range(img.shape[0])]
         for cix in range(len(box_coords)): #class index
             for bix in range(img.shape[0]): #batch instance
                 for rix in range(len(scores[cix][bix])): #range(self.cf.n_roi_candidates):
                     if scores[cix][bix][rix] > self.cf.detection_min_confidence:
                         results_dict['boxes'][bix].append({'box_coords': np.copy(box_coords[cix][bix][rix]),
                                     'box_score': scores[cix][bix][rix],
                                     'box_pred_class_id': cix + 1,
                                     'box_type': 'det'})
         # carry probs instead of preds to use for multi-model voting in predictor
         results_dict['seg_preds'] = F.softmax(seg_logits, dim=1).cpu().data.numpy()
 
 
         return results_dict
 
 
     def actual_dims(self, print_=True):
         r"""Return dimensions of actually calculated layers at beginning of each block.
         """
         if print_:
             print("dimensions as recorded in forward pass: ")
             for stage in range(len(self.dims)):
                 print("Stage ", stage, ": ", self.dims[stage])
         return self.dims
         
     def cuda(self, device=None):
         r"""Moves all model parameters and buffers to the GPU.
 
         This also makes associated parameters and buffers different objects. So
         it should be called before constructing optimizer if the module will
         live on GPU while being optimized.
 
         Arguments:
             device (int, optional): if specified, all parameters will be
                 copied to that device
 
         Returns:
             Module: self
         """
         try:
             self.loss_f = self.loss_f.cuda()
         except:
             pass
         self.is_cuda = True
         return self._apply(lambda t: t.cuda(device))
     
     def cpu(self):
         r"""Moves all model parameters and buffers to the CPU.
 
         Returns:
             Module: self
         """
         self.is_cuda = False
         return self._apply(lambda t: t.cpu()) 
 
 
 
 
         
\ No newline at end of file
diff --git a/models/mrcnn.py b/models/mrcnn.py
index e0b7982..c7dcdff 100644
--- a/models/mrcnn.py
+++ b/models/mrcnn.py
@@ -1,752 +1,752 @@
 #!/usr/bin/env python
 # Copyright 2019 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 
 """
 Parts are based on https://github.com/multimodallearning/pytorch-mask-rcnn
 published under MIT license.
 """
 import os
 from multiprocessing import  Pool
 import time
 
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.utils
 
 import utils.model_utils as mutils
 import utils.exp_utils as utils
 
 
 
 class RPN(nn.Module):
     """
     Region Proposal Network.
     """
 
     def __init__(self, cf, conv):
 
         super(RPN, self).__init__()
         self.dim = conv.dim
 
         self.conv_shared = conv(cf.end_filts, cf.n_rpn_features, ks=3, stride=cf.rpn_anchor_stride, pad=1, relu=cf.relu)
         self.conv_class = conv(cf.n_rpn_features, 2 * len(cf.rpn_anchor_ratios), ks=1, stride=1, relu=None)
         self.conv_bbox = conv(cf.n_rpn_features, 2 * self.dim * len(cf.rpn_anchor_ratios), ks=1, stride=1, relu=None)
 
 
     def forward(self, x):
         """
         :param x: input feature maps (b, in_channels, y, x, (z))
         :return: rpn_class_logits (b, 2, n_anchors)
         :return: rpn_probs_logits (b, 2, n_anchors)
         :return: rpn_bbox (b, 2 * dim, n_anchors)
         """
 
         # Shared convolutional base of the RPN.
         x = self.conv_shared(x)
 
         # Anchor Score. (batch, anchors per location * 2, y, x, (z)).
         rpn_class_logits = self.conv_class(x)
         # Reshape to (batch, 2, anchors)
         axes = (0, 2, 3, 1) if self.dim == 2 else (0, 2, 3, 4, 1)
         rpn_class_logits = rpn_class_logits.permute(*axes)
         rpn_class_logits = rpn_class_logits.contiguous()
         rpn_class_logits = rpn_class_logits.view(x.size()[0], -1, 2)
 
         # Softmax on last dimension (fg vs. bg).
         rpn_probs = F.softmax(rpn_class_logits, dim=2)
 
         # Bounding box refinement. (batch, anchors_per_location * (y, x, (z), log(h), log(w), (log(d)), y, x, (z))
         rpn_bbox = self.conv_bbox(x)
 
         # Reshape to (batch, 2*dim, anchors)
         rpn_bbox = rpn_bbox.permute(*axes)
         rpn_bbox = rpn_bbox.contiguous()
         rpn_bbox = rpn_bbox.view(x.size()[0], -1, self.dim * 2)
 
         return [rpn_class_logits, rpn_probs, rpn_bbox]
 
 
 
 class Classifier(nn.Module):
     """
     Head network for classification and bounding box refinement. Performs RoiAlign, processes resulting features through a
     shared convolutional base and finally branches off the classifier- and regression head.
     """
     def __init__(self, cf, conv):
         super(Classifier, self).__init__()
 
         self.cf = cf
         self.dim = conv.dim
         self.in_channels = cf.end_filts
         self.pool_size = cf.pool_size
         self.pyramid_levels = cf.pyramid_levels
         # instance_norm does not work with spatial dims (1, 1, (1))
         norm = cf.norm if cf.norm != 'instance_norm' else None
 
         self.conv1 = conv(cf.end_filts, cf.end_filts * 4, ks=self.pool_size, stride=1, norm=norm, relu=cf.relu)
         self.conv2 = conv(cf.end_filts * 4, cf.end_filts * 4, ks=1, stride=1, norm=norm, relu=cf.relu)
         self.linear_bbox = nn.Linear(cf.end_filts * 4, cf.head_classes * 2 * self.dim)
 
 
         if 'regression' in self.cf.prediction_tasks:
             self.linear_regressor = nn.Linear(cf.end_filts * 4, cf.head_classes * cf.regression_n_features)
             self.rg_n_feats = cf.regression_n_features
         #classify into bins of regression values
         elif 'regression_bin' in self.cf.prediction_tasks:
             self.linear_regressor = nn.Linear(cf.end_filts * 4, cf.head_classes * len(cf.bin_labels))
             self.rg_n_feats = len(cf.bin_labels)
         else:
             self.linear_regressor = lambda x: torch.zeros((x.shape[0], cf.head_classes * 1), dtype=torch.float32).fill_(float('NaN')).cuda()
             self.rg_n_feats = 1 #cf.regression_n_features
         if 'class' in self.cf.prediction_tasks:
             self.linear_class = nn.Linear(cf.end_filts * 4, cf.head_classes)
         else:
             assert cf.head_classes == 2, "#head classes {} needs to be 2 (bg/fg) when not predicting classes".format(cf.head_classes)
             self.linear_class = lambda x: torch.zeros((x.shape[0], cf.head_classes), dtype=torch.float64).cuda()
 
 
     def forward(self, x, rois):
         """
         :param x: input feature maps (b, in_channels, y, x, (z))
         :param rois: normalized box coordinates as proposed by the RPN to be forwarded through
         the second stage (n_proposals, (y1, x1, y2, x2, (z1), (z2), batch_ix). Proposals of all batch elements
         have been merged to one vector, while the origin info has been stored for re-allocation.
         :return: mrcnn_class_logits (n_proposals, n_head_classes)
         :return: mrcnn_bbox (n_proposals, n_head_classes, 2 * dim) predicted corrections to be applied to proposals for refinement.
         """
         x = mutils.pyramid_roi_align(x, rois, self.pool_size, self.pyramid_levels, self.dim)
         x = self.conv1(x)
         x = self.conv2(x)
         x = x.view(-1, self.in_channels * 4)
 
         mrcnn_bbox = self.linear_bbox(x)
         mrcnn_bbox = mrcnn_bbox.view(mrcnn_bbox.size()[0], -1, self.dim * 2)
         mrcnn_class_logits = self.linear_class(x)
         mrcnn_regress = self.linear_regressor(x)
         mrcnn_regress = mrcnn_regress.view(mrcnn_regress.size()[0], -1, self.rg_n_feats)
 
         return [mrcnn_bbox, mrcnn_class_logits, mrcnn_regress]
 
 
 class Mask(nn.Module):
     """
     Head network for proposal-based mask segmentation. Performs RoiAlign, some convolutions and applies sigmoid on the
     output logits to allow for overlapping classes.
     """
     def __init__(self, cf, conv):
         super(Mask, self).__init__()
         self.pool_size = cf.mask_pool_size
         self.pyramid_levels = cf.pyramid_levels
         self.dim = conv.dim
         self.conv1 = conv(cf.end_filts, cf.end_filts, ks=3, stride=1, pad=1, norm=cf.norm, relu=cf.relu)
         self.conv2 = conv(cf.end_filts, cf.end_filts, ks=3, stride=1, pad=1, norm=cf.norm, relu=cf.relu)
         self.conv3 = conv(cf.end_filts, cf.end_filts, ks=3, stride=1, pad=1, norm=cf.norm, relu=cf.relu)
         self.conv4 = conv(cf.end_filts, cf.end_filts, ks=3, stride=1, pad=1, norm=cf.norm, relu=cf.relu)
         if conv.dim == 2:
-            self.deconv = nn.ConvTranspose2d(cf.end_filts, cf.end_filts, kernel_size=2, stride=2)
+            self.deconv = nn.ConvTranspose2d(cf.end_filts, cf.end_filts, kernel_size=2, stride=2) # todo why no norm here?
         else:
             self.deconv = nn.ConvTranspose3d(cf.end_filts, cf.end_filts, kernel_size=2, stride=2)
 
         self.relu = nn.ReLU(inplace=True) if cf.relu == 'relu' else nn.LeakyReLU(inplace=True)
         self.conv5 = conv(cf.end_filts, cf.head_classes, ks=1, stride=1, relu=None)
         self.sigmoid = nn.Sigmoid()
 
     def forward(self, x, rois):
         """
         :param x: input feature maps (b, in_channels, y, x, (z))
         :param rois: normalized box coordinates as proposed by the RPN to be forwarded through
         the second stage (n_proposals, (y1, x1, y2, x2, (z1), (z2), batch_ix). Proposals of all batch elements
         have been merged to one vector, while the origin info has been stored for re-allocation.
         :return: x: masks (n_sampled_proposals (n_detections in inference), n_classes, y, x, (z))
         """
         x = mutils.pyramid_roi_align(x, rois, self.pool_size, self.pyramid_levels, self.dim)
         x = self.conv1(x)
         x = self.conv2(x)
         x = self.conv3(x)
         x = self.conv4(x)
         x = self.relu(self.deconv(x))
         x = self.conv5(x)
         x = self.sigmoid(x)
         return x
 
 
 ############################################################
 #  Loss Functions
 ############################################################
 
 def compute_rpn_class_loss(rpn_class_logits, rpn_match, shem_poolsize):
     """
     :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors.
     :param rpn_class_logits: (n_anchors, 2). logits from RPN classifier.
     :param SHEM_poolsize: int. factor of top-k candidates to draw from per negative sample (stochastic-hard-example-mining).
     :return: loss: torch tensor
     :return: np_neg_ix: 1D array containing indices of the neg_roi_logits, which have been sampled for training.
     """
 
     # Filter out netural anchors
     pos_indices = torch.nonzero(rpn_match == 1)
     neg_indices = torch.nonzero(rpn_match == -1)
 
     # loss for positive samples
     if not 0 in pos_indices.size():
         pos_indices = pos_indices.squeeze(1)
         roi_logits_pos = rpn_class_logits[pos_indices]
         pos_loss = F.cross_entropy(roi_logits_pos, torch.LongTensor([1] * pos_indices.shape[0]).cuda())
     else:
         pos_loss = torch.FloatTensor([0]).cuda()
 
     # loss for negative samples: draw hard negative examples (SHEM)
     # that match the number of positive samples, but at least 1.
     if not 0 in neg_indices.size():
         neg_indices = neg_indices.squeeze(1)
         roi_logits_neg = rpn_class_logits[neg_indices]
         negative_count = np.max((1, pos_indices.cpu().data.numpy().size))
         roi_probs_neg = F.softmax(roi_logits_neg, dim=1)
         neg_ix = mutils.shem(roi_probs_neg, negative_count, shem_poolsize)
         neg_loss = F.cross_entropy(roi_logits_neg[neg_ix], torch.LongTensor([0] * neg_ix.shape[0]).cuda())
         np_neg_ix = neg_ix.cpu().data.numpy()
         #print("pos, neg count", pos_indices.cpu().data.numpy().size, negative_count)
     else:
         neg_loss = torch.FloatTensor([0]).cuda()
         np_neg_ix = np.array([]).astype('int32')
 
     loss = (pos_loss + neg_loss) / 2
     return loss, np_neg_ix
 
 
 def compute_rpn_bbox_loss(rpn_pred_deltas, rpn_target_deltas, rpn_match):
     """
     :param rpn_target_deltas:   (b, n_positive_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))).
     Uses 0 padding to fill in unsed bbox deltas.
     :param rpn_pred_deltas: predicted deltas from RPN. (b, n_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd))))
     :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors.
     :return: loss: torch 1D tensor.
     """
     if not 0 in torch.nonzero(rpn_match == 1).size():
 
         indices = torch.nonzero(rpn_match == 1).squeeze(1)
         # Pick bbox deltas that contribute to the loss
         rpn_pred_deltas = rpn_pred_deltas[indices]
         # Trim target bounding box deltas to the same length as rpn_bbox.
         target_deltas = rpn_target_deltas[:rpn_pred_deltas.size()[0], :]
         # Smooth L1 loss
         loss = F.smooth_l1_loss(rpn_pred_deltas, target_deltas)
     else:
         loss = torch.FloatTensor([0]).cuda()
 
     return loss
 
 def compute_mrcnn_bbox_loss(mrcnn_pred_deltas, mrcnn_target_deltas, target_class_ids):
     """
     :param mrcnn_target_deltas: (n_sampled_rois, (dy, dx, (dz), log(dh), log(dw), (log(dh)))
     :param mrcnn_pred_deltas: (n_sampled_rois, n_classes, (dy, dx, (dz), log(dh), log(dw), (log(dh)))
     :param target_class_ids: (n_sampled_rois)
     :return: loss: torch 1D tensor.
     """
     if not 0 in torch.nonzero(target_class_ids > 0).size():
         positive_roi_ix = torch.nonzero(target_class_ids > 0)[:, 0]
         positive_roi_class_ids = target_class_ids[positive_roi_ix].long()
         target_bbox = mrcnn_target_deltas[positive_roi_ix, :].detach()
         pred_bbox = mrcnn_pred_deltas[positive_roi_ix, positive_roi_class_ids, :]
         loss = F.smooth_l1_loss(pred_bbox, target_bbox)
     else:
         loss = torch.FloatTensor([0]).cuda()
 
     return loss
 
 def compute_mrcnn_mask_loss(pred_masks, target_masks, target_class_ids):
     """
     :param target_masks: (n_sampled_rois, y, x, (z)) A float32 tensor of values 0 or 1. Uses zero padding to fill array.
     :param pred_masks: (n_sampled_rois, n_classes, y, x, (z)) float32 tensor with values between [0, 1].
     :param target_class_ids: (n_sampled_rois)
     :return: loss: torch 1D tensor.
     """
     #print("targ masks", target_masks.unique(return_counts=True))
     if not 0 in torch.nonzero(target_class_ids > 0).size():
         # Only positive ROIs contribute to the loss. And only
         # the class-specific mask of each ROI.
         positive_ix = torch.nonzero(target_class_ids > 0)[:, 0]
         positive_class_ids = target_class_ids[positive_ix].long()
         y_true = target_masks[positive_ix, :, :].detach()
         y_pred = pred_masks[positive_ix, positive_class_ids, :, :]
         loss = F.binary_cross_entropy(y_pred, y_true)
     else:
         loss = torch.FloatTensor([0]).cuda()
 
     return loss
 
 def compute_mrcnn_class_loss(tasks, pred_class_logits, target_class_ids):
     """
     :param pred_class_logits: (n_sampled_rois, n_classes)
     :param target_class_ids: (n_sampled_rois) batch dimension was merged into roi dimension.
     :return: loss: torch 1D tensor.
     """
     if 'class' in tasks and not 0 in target_class_ids.size():
         loss = F.cross_entropy(pred_class_logits, target_class_ids.long())
     else:
         loss = torch.FloatTensor([0.]).cuda()
 
     return loss
 
 def compute_mrcnn_regression_loss(tasks, pred, target, target_class_ids):
     """regression loss is a distance metric between target vector and predicted regression vector.
     :param pred: (n_sampled_rois, n_classes, [n_rg_feats if real regression or 1 if rg_bin task)
     :param target: (n_sampled_rois, [n_rg_feats or n_rg_bins])
     :return: differentiable loss, torch 1D tensor on cuda
     """
 
     if not 0 in target.shape and not 0 in torch.nonzero(target_class_ids > 0).shape:
         positive_roi_ix = torch.nonzero(target_class_ids > 0)[:, 0]
         positive_roi_class_ids = target_class_ids[positive_roi_ix].long()
         target = target[positive_roi_ix].detach()
         pred = pred[positive_roi_ix, positive_roi_class_ids]
         if "regression_bin" in tasks:
             loss = F.cross_entropy(pred, target.long())
         else:
             loss = F.smooth_l1_loss(pred, target)
             #loss = F.mse_loss(pred, target)
     else:
         loss = torch.FloatTensor([0.]).cuda()
 
     return loss
 
 ############################################################
 #  Detection Layer
 ############################################################
 
 def compute_roi_scores(tasks, batch_rpn_proposals, mrcnn_cl_logits):
     """ Depending on the predicition tasks: if no class prediction beyong fg/bg (--> means no additional class
         head was applied) use RPN objectness scores as roi scores, otherwise class head scores.
     :param cf:
     :param batch_rpn_proposals:
     :param mrcnn_cl_logits:
     :return:
     """
     if not 'class' in tasks:
         scores = batch_rpn_proposals[:, :, -1].view(-1, 1)
         scores = torch.cat((1 - scores, scores), dim=1)
     else:
         scores = F.softmax(mrcnn_cl_logits, dim=1)
 
     return scores
 
 ############################################################
 #  MaskRCNN Class
 ############################################################
 
 class net(nn.Module):
 
 
     def __init__(self, cf, logger):
 
         super(net, self).__init__()
         self.cf = cf
         self.logger = logger
         self.build()
 
         loss_order = ['rpn_class', 'rpn_bbox', 'mrcnn_bbox', 'mrcnn_mask', 'mrcnn_class', 'mrcnn_rg']
         if hasattr(cf, "mrcnn_loss_weights"):
             # bring into right order
             self.loss_weights = np.array([cf.mrcnn_loss_weights[k] for k in loss_order])
         else:
             self.loss_weights = np.array([1.]*len(loss_order))
 
         if self.cf.weight_init=="custom":
             logger.info("Tried to use custom weight init which is not defined. Using pytorch default.")
         elif self.cf.weight_init:
             mutils.initialize_weights(self)
         else:
             logger.info("using default pytorch weight init")
 
     def build(self):
         """Build Mask R-CNN architecture."""
 
         # Image size must be dividable by 2 multiple times.
         h, w = self.cf.patch_size[:2]
         if h / 2**5 != int(h / 2**5) or w / 2**5 != int(w / 2**5):
             raise Exception("Image size must be divisible by 2 at least 5 times "
                             "to avoid fractions when downscaling and upscaling."
                             "For example, use 256, 288, 320, 384, 448, 512, ... etc.,i.e.,"
                             "any number x*32 will do!")
 
         # instantiate abstract multi-dimensional conv generator and load backbone module.
         backbone = utils.import_module('bbone', self.cf.backbone_path)
         self.logger.info("loaded backbone from {}".format(self.cf.backbone_path))
         conv = backbone.ConvGenerator(self.cf.dim)
 
         # build Anchors, FPN, RPN, Classifier / Bbox-Regressor -head, Mask-head
         self.np_anchors = mutils.generate_pyramid_anchors(self.logger, self.cf)
         self.anchors = torch.from_numpy(self.np_anchors).float().cuda()
         self.fpn = backbone.FPN(self.cf, conv, relu_enc=self.cf.relu, operate_stride1=False).cuda()
         self.rpn = RPN(self.cf, conv)
         self.classifier = Classifier(self.cf, conv)
         self.mask = Mask(self.cf, conv)
 
     def forward(self, img, is_training=True):
         """
         :param img: input images (b, c, y, x, (z)).
         :return: rpn_pred_logits: (b, n_anchors, 2)
         :return: rpn_pred_deltas: (b, n_anchors, (y, x, (z), log(h), log(w), (log(d))))
         :return: batch_proposal_boxes: (b, n_proposals, (y1, x1, y2, x2, (z1), (z2), batch_ix)) only for monitoring/plotting.
         :return: detections: (n_final_detections, (y1, x1, y2, x2, (z1), (z2), batch_ix, pred_class_id, pred_score)
         :return: detection_masks: (n_final_detections, n_classes, y, x, (z)) raw molded masks as returned by mask-head.
         """
         # extract features.
         fpn_outs = self.fpn(img)
         rpn_feature_maps = [fpn_outs[i] for i in self.cf.pyramid_levels]
         self.mrcnn_feature_maps = rpn_feature_maps
 
         # loop through pyramid layers and apply RPN.
         layer_outputs = [ self.rpn(p_feats) for p_feats in rpn_feature_maps ]
 
         # concatenate layer outputs.
         # convert from list of lists of level outputs to list of lists of outputs across levels.
         # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]]
         outputs = list(zip(*layer_outputs))
         outputs = [torch.cat(list(o), dim=1) for o in outputs]
         rpn_pred_logits, rpn_pred_probs, rpn_pred_deltas = outputs
         #
         # # generate proposals: apply predicted deltas to anchors and filter by foreground scores from RPN classifier.
         proposal_count = self.cf.post_nms_rois_training if is_training else self.cf.post_nms_rois_inference
         batch_normed_props, batch_unnormed_props = mutils.refine_proposals(rpn_pred_probs, rpn_pred_deltas,
                                                                             proposal_count, self.anchors, self.cf)
 
         # merge batch dimension of proposals while storing allocation info in coordinate dimension.
         batch_ixs = torch.arange(
             batch_normed_props.shape[0]).cuda().unsqueeze(1).repeat(1,batch_normed_props.shape[1]).view(-1).float()
         rpn_rois = batch_normed_props[:, :, :-1].view(-1, batch_normed_props[:, :, :-1].shape[2])
         self.rpn_rois_batch_info = torch.cat((rpn_rois, batch_ixs.unsqueeze(1)), dim=1)
 
         # this is the first of two forward passes in the second stage, where no activations are stored for backprop.
         # here, all proposals are forwarded (with virtual_batch_size = batch_size * post_nms_rois.)
         # for inference/monitoring as well as sampling of rois for the loss functions.
         # processed in chunks of roi_chunk_size to re-adjust to gpu-memory.
         chunked_rpn_rois = self.rpn_rois_batch_info.split(self.cf.roi_chunk_size)
         bboxes_list, class_logits_list, regressions_list = [], [], []
         with torch.no_grad():
             for chunk in chunked_rpn_rois:
                 chunk_bboxes, chunk_class_logits, chunk_regressions = self.classifier(self.mrcnn_feature_maps, chunk)
                 bboxes_list.append(chunk_bboxes)
                 class_logits_list.append(chunk_class_logits)
                 regressions_list.append(chunk_regressions)
         mrcnn_bbox = torch.cat(bboxes_list, 0)
         mrcnn_class_logits = torch.cat(class_logits_list, 0)
         mrcnn_regressions = torch.cat(regressions_list, 0)
         self.mrcnn_roi_scores = compute_roi_scores(self.cf.prediction_tasks, batch_normed_props, mrcnn_class_logits)
 
         # refine classified proposals, filter and return final detections.
         # returns (cf.max_inst_per_batch_element, n_coords+1+...)
         detections = mutils.refine_detections(self.cf, batch_ixs, rpn_rois, mrcnn_bbox, self.mrcnn_roi_scores,
                                        mrcnn_regressions)
 
         # forward remaining detections through mask-head to generate corresponding masks.
         scale = [img.shape[2]] * 4 + [img.shape[-1]] * 2
         scale = torch.from_numpy(np.array(scale[:self.cf.dim * 2] + [1])[None]).float().cuda()
 
         # first self.cf.dim * 2 entries on axis 1 are always the box coords, +1 is batch_ix
         detection_boxes = detections[:, :self.cf.dim * 2 + 1] / scale
         with torch.no_grad():
             detection_masks = self.mask(self.mrcnn_feature_maps, detection_boxes)
 
         return [rpn_pred_logits, rpn_pred_deltas, batch_unnormed_props, detections, detection_masks]
 
 
     def loss_samples_forward(self, batch_gt_boxes, batch_gt_masks, batch_gt_class_ids, batch_gt_regressions=None):
         """
         this is the second forward pass through the second stage (features from stage one are re-used).
         samples few rois in loss_example_mining and forwards only those for loss computation.
         :param batch_gt_class_ids: list over batch elements. Each element is a list over the corresponding roi target labels.
         :param batch_gt_boxes: list over batch elements. Each element is a list over the corresponding roi target coordinates.
         :param batch_gt_masks: (b, n(b), c, y, x (,z)) list over batch elements. Each element holds n_gt_rois(b)
                 (i.e., dependent on the batch element) binary masks of shape (c, y, x, (z)).
         :return: sample_logits: (n_sampled_rois, n_classes) predicted class scores.
         :return: sample_deltas: (n_sampled_rois, n_classes, 2 * dim) predicted corrections to be applied to proposals for refinement.
         :return: sample_mask: (n_sampled_rois, n_classes, y, x, (z)) predicted masks per class and proposal.
         :return: sample_target_class_ids: (n_sampled_rois) target class labels of sampled proposals.
         :return: sample_target_deltas: (n_sampled_rois, 2 * dim) target deltas of sampled proposals for box refinement.
         :return: sample_target_masks: (n_sampled_rois, y, x, (z)) target masks of sampled proposals.
         :return: sample_proposals: (n_sampled_rois, 2 * dim) RPN output for sampled proposals. only for monitoring/plotting.
         """
         # sample rois for loss and get corresponding targets for all Mask R-CNN head network losses.
         sample_ics, sample_target_deltas, sample_target_mask, sample_target_class_ids, sample_target_regressions = \
             mutils.loss_example_mining(self.cf, self.rpn_rois_batch_info, batch_gt_boxes, batch_gt_masks,
                                        self.mrcnn_roi_scores, batch_gt_class_ids, batch_gt_regressions)
 
         # re-use feature maps and RPN output from first forward pass.
         sample_proposals = self.rpn_rois_batch_info[sample_ics]
         if not 0 in sample_proposals.size():
             sample_deltas, sample_logits, sample_regressions = self.classifier(self.mrcnn_feature_maps, sample_proposals)
             sample_mask = self.mask(self.mrcnn_feature_maps, sample_proposals)
         else:
             sample_logits = torch.FloatTensor().cuda()
             sample_deltas = torch.FloatTensor().cuda()
             sample_regressions = torch.FloatTensor().cuda()
             sample_mask = torch.FloatTensor().cuda()
 
         return [sample_deltas, sample_mask, sample_logits, sample_regressions, sample_proposals,
                 sample_target_deltas, sample_target_mask, sample_target_class_ids, sample_target_regressions]
 
     def get_results(self, img_shape, detections, detection_masks, box_results_list=None, return_masks=True):
         """
         Restores batch dimension of merged detections, unmolds detections, creates and fills results dict.
         :param img_shape:
         :param detections: shape (n_final_detections, len(info)), where
             info=( y1, x1, y2, x2, (z1,z2), batch_ix, pred_class_id, pred_score )
         :param detection_masks: (n_final_detections, n_classes, y, x, (z)) raw molded masks as returned by mask-head.
         :param box_results_list: None or list of output boxes for monitoring/plotting.
         each element is a list of boxes per batch element.
         :param return_masks: boolean. If True, full resolution masks are returned for all proposals (speed trade-off).
         :return: results_dict: dictionary with keys:
                  'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary:
                           [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...]
                  'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, 1] only fg. vs. bg for now.
                  class-specific return of masks will come with implementation of instance segmentation evaluation.
         """
 
         detections = detections.cpu().data.numpy()
         if self.cf.dim == 2:
             detection_masks = detection_masks.permute(0, 2, 3, 1).cpu().data.numpy()
         else:
             detection_masks = detection_masks.permute(0, 2, 3, 4, 1).cpu().data.numpy()
         # det masks shape now (n_dets, y,x(,z), n_classes)
         # restore batch dimension of merged detections using the batch_ix info.
         batch_ixs = detections[:, self.cf.dim*2]
         detections = [detections[batch_ixs == ix] for ix in range(img_shape[0])]
         mrcnn_mask = [detection_masks[batch_ixs == ix] for ix in range(img_shape[0])]
         # mrcnn_mask: shape (b_size, variable, variable, n_classes), variable bc depends on single instance mask size
 
         if box_results_list == None: # for test_forward, where no previous list exists.
             box_results_list =  [[] for _ in range(img_shape[0])]
         # seg_logits == seg_probs in mrcnn since mask head finishes with sigmoid (--> image space = [0,1])
         seg_probs = []
         # loop over batch and unmold detections.
         for ix in range(img_shape[0]):
 
             # final masks are one-hot encoded (b, n_classes, y, x, (z))
             final_masks = np.zeros((self.cf.num_classes + 1, *img_shape[2:]))
             #+1 for bg, 0.5 bc mask head classifies only bg/fg with logits between 0,1--> bg is <0.5
             if self.cf.num_classes + 1 != self.cf.num_seg_classes:
                 self.logger.warning("n of roi-classifier head classes {} doesnt match cf.num_seg_classes {}".format(
                     self.cf.num_classes + 1, self.cf.num_seg_classes))
 
             if not 0 in detections[ix].shape:
                 boxes = detections[ix][:, :self.cf.dim*2].astype(np.int32)
                 class_ids = detections[ix][:, self.cf.dim*2 + 1].astype(np.int32)
                 scores = detections[ix][:, self.cf.dim*2 + 2]
                 masks = mrcnn_mask[ix][np.arange(boxes.shape[0]), ..., class_ids]
                 regressions = detections[ix][:,self.cf.dim*2+3:]
 
                 # Filter out detections with zero area. Often only happens in early
                 # stages of training when the network weights are still a bit random.
                 if self.cf.dim == 2:
                     exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0]
                 else:
                     exclude_ix = np.where(
                         (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 5] - boxes[:, 4]) <= 0)[0]
 
                 if exclude_ix.shape[0] > 0:
                     boxes = np.delete(boxes, exclude_ix, axis=0)
                     masks = np.delete(masks, exclude_ix, axis=0)
                     class_ids = np.delete(class_ids, exclude_ix, axis=0)
                     scores = np.delete(scores, exclude_ix, axis=0)
                     regressions = np.delete(regressions, exclude_ix, axis=0)
 
                 # Resize masks to original image size and set boundary threshold.
                 if return_masks:
                     for i in range(masks.shape[0]): #masks per this batch instance/element/image
                         # Convert neural network mask to full size mask
                         if self.cf.dim == 2:
                             full_mask = mutils.unmold_mask_2D(masks[i], boxes[i], img_shape[2:])
                         else:
                             full_mask = mutils.unmold_mask_3D(masks[i], boxes[i], img_shape[2:])
                         # take the maximum seg_logits per class of instances in that class, i.e., a pixel in a class
                         # has the max seg_logit value over all instances of that class in one sample
                         final_masks[class_ids[i]] = np.max((final_masks[class_ids[i]], full_mask), axis=0)
                     final_masks[0] = np.full(final_masks[0].shape, 0.49999999) #effectively min_det_thres at 0.5 per pixel
 
                 # add final predictions to results.
                 if not 0 in boxes.shape:
                     for ix2, coords in enumerate(boxes):
                         box = {'box_coords': coords, 'box_type': 'det', 'box_score': scores[ix2],
                                'box_pred_class_id': class_ids[ix2]}
                         #if (hasattr(self.cf, "convert_cl_to_rg") and self.cf.convert_cl_to_rg):
                         if "regression_bin" in self.cf.prediction_tasks:
                             # in this case, regression preds are actually the rg_bin_ids --> map to rg value the bin represents
                             box['rg_bin'] = regressions[ix2].argmax()
                             box['regression'] = self.cf.bin_id2rg_val[box['rg_bin']]
                         else:
                             box['regression'] = regressions[ix2]
                             if hasattr(self.cf, "rg_val_to_bin_id") and \
                                     any(['regression' in task for task in self.cf.prediction_tasks]):
                                 box.update({'rg_bin': self.cf.rg_val_to_bin_id(regressions[ix2])})
 
                         box_results_list[ix].append(box)
 
             # if no detections were made--> keep full bg mask (zeros).
             seg_probs.append(final_masks)
 
         # create and fill results dictionary.
         results_dict = {}
         results_dict['boxes'] = box_results_list
         results_dict['seg_preds'] = np.array(seg_probs)
 
         return results_dict
 
 
     def train_forward(self, batch, is_validation=False):
         """
         train method (also used for validation monitoring). wrapper around forward pass of network. prepares input data
         for processing, computes losses, and stores outputs in a dictionary.
         :param batch: dictionary containing 'data', 'seg', etc.
             batch['roi_masks']: (b, n(b), c, h(n), w(n) (z(n))) list like roi_labels but with arrays (masks) inplace of
         integers. c==channels of the raw segmentation.
         :return: results_dict: dictionary with keys:
                 'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary:
                         [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...]
                 'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes].
                 'torch_loss': 1D torch tensor for backprop.
                 'class_loss': classification loss for monitoring.
         """
         img = batch['data']
         gt_boxes = batch['bb_target']
         #axes = (0, 2, 3, 1) if self.cf.dim == 2 else (0, 2, 3, 4, 1)
         #gt_masks = [np.transpose(batch['roi_masks'][ii], axes=axes) for ii in range(len(batch['roi_masks']))]
         gt_masks = batch['roi_masks']
         gt_class_ids = batch['class_targets']
         if 'regression' in self.cf.prediction_tasks:
             gt_regressions = batch["regression_targets"]
         elif 'regression_bin' in self.cf.prediction_tasks:
             gt_regressions = batch["rg_bin_targets"]
         else:
             gt_regressions = None
 
         img = torch.from_numpy(img).cuda().float()
         batch_rpn_class_loss = torch.FloatTensor([0]).cuda()
         batch_rpn_bbox_loss = torch.FloatTensor([0]).cuda()
 
         # list of output boxes for monitoring/plotting. each element is a list of boxes per batch element.
         box_results_list = [[] for _ in range(img.shape[0])]
 
         #forward passes. 1. general forward pass, where no activations are saved in second stage (for performance
         # monitoring and loss sampling). 2. second stage forward pass of sampled rois with stored activations for backprop.
         rpn_class_logits, rpn_pred_deltas, proposal_boxes, detections, detection_masks = self.forward(img)
 
         mrcnn_pred_deltas, mrcnn_pred_mask, mrcnn_class_logits, mrcnn_regressions, sample_proposals, \
         mrcnn_target_deltas, target_mask, target_class_ids, target_regressions = \
             self.loss_samples_forward(gt_boxes, gt_masks, gt_class_ids, gt_regressions)
         # loop over batch
         for b in range(img.shape[0]):
             if len(gt_boxes[b]) > 0:
                 # add gt boxes to output list
                 for tix in range(len(gt_boxes[b])):
                     gt_box = {'box_type': 'gt', 'box_coords': batch['bb_target'][b][tix]}
                     for name in self.cf.roi_items:
                         gt_box.update({name: batch[name][b][tix]})
                     box_results_list[b].append(gt_box)
 
                 # match gt boxes with anchors to generate targets for RPN losses.
                 rpn_match, rpn_target_deltas = mutils.gt_anchor_matching(self.cf, self.np_anchors, gt_boxes[b])
 
                 # add positive anchors used for loss to output list for monitoring.
                 pos_anchors = mutils.clip_boxes_numpy(self.np_anchors[np.argwhere(rpn_match == 1)][:, 0], img.shape[2:])
                 for p in pos_anchors:
                     box_results_list[b].append({'box_coords': p, 'box_type': 'pos_anchor'})
 
             else:
                 rpn_match = np.array([-1]*self.np_anchors.shape[0])
                 rpn_target_deltas = np.array([0])
 
             rpn_match_gpu = torch.from_numpy(rpn_match).cuda()
             rpn_target_deltas = torch.from_numpy(rpn_target_deltas).float().cuda()
 
             # compute RPN losses.
             rpn_class_loss, neg_anchor_ix = compute_rpn_class_loss(rpn_class_logits[b], rpn_match_gpu, self.cf.shem_poolsize)
             rpn_bbox_loss = compute_rpn_bbox_loss(rpn_pred_deltas[b], rpn_target_deltas, rpn_match_gpu)
             batch_rpn_class_loss += rpn_class_loss /img.shape[0]
             batch_rpn_bbox_loss += rpn_bbox_loss /img.shape[0]
 
             # add negative anchors used for loss to output list for monitoring.
             # neg_anchor_ix = neg_ix come from shem and mark positions in roi_probs_neg = rpn_class_logits[neg_indices]
             # with neg_indices = rpn_match == -1
             neg_anchors = mutils.clip_boxes_numpy(self.np_anchors[rpn_match == -1][neg_anchor_ix], img.shape[2:])
             for n in neg_anchors:
                 box_results_list[b].append({'box_coords': n, 'box_type': 'neg_anchor'})
 
             # add highest scoring proposals to output list for monitoring.
             rpn_proposals = proposal_boxes[b][proposal_boxes[b, :, -1].argsort()][::-1]
             for r in rpn_proposals[:self.cf.n_plot_rpn_props, :-1]:
                 box_results_list[b].append({'box_coords': r, 'box_type': 'prop'})
 
         # add positive and negative roi samples used for mrcnn losses to output list for monitoring.
         if not 0 in sample_proposals.shape:
             rois = mutils.clip_to_window(self.cf.window, sample_proposals).cpu().data.numpy()
             for ix, r in enumerate(rois):
                 box_results_list[int(r[-1])].append({'box_coords': r[:-1] * self.cf.scale,
                                             'box_type': 'pos_class' if target_class_ids[ix] > 0 else 'neg_class'})
 
         # compute mrcnn losses.
         mrcnn_class_loss = compute_mrcnn_class_loss(self.cf.prediction_tasks, mrcnn_class_logits, target_class_ids)
         mrcnn_bbox_loss = compute_mrcnn_bbox_loss(mrcnn_pred_deltas, mrcnn_target_deltas, target_class_ids)
         mrcnn_regressions_loss = compute_mrcnn_regression_loss(self.cf.prediction_tasks, mrcnn_regressions, target_regressions, target_class_ids)
         # mrcnn can be run without pixelwise annotations available (Faster R-CNN mode).
         # In this case, the mask_loss is taken out of training.
         if self.cf.frcnn_mode:
             mrcnn_mask_loss = torch.FloatTensor([0]).cuda()
         else:
             mrcnn_mask_loss = compute_mrcnn_mask_loss(mrcnn_pred_mask, target_mask, target_class_ids)
 
         loss = batch_rpn_class_loss + batch_rpn_bbox_loss +\
                mrcnn_bbox_loss + mrcnn_mask_loss +  mrcnn_class_loss + mrcnn_regressions_loss
 
         # run unmolding of predictions for monitoring and merge all results to one dictionary.
         return_masks = self.cf.return_masks_in_val if is_validation else self.cf.return_masks_in_train
         results_dict = self.get_results(img.shape, detections, detection_masks, box_results_list,
                                         return_masks=return_masks)
 
         #results_dict['seg_preds'] = results_dict['seg_preds'].argmax(axis=1).astype('uint8')[:,np.newaxis]
         if 'dice' in self.cf.metrics:
             results_dict['batch_dices'] = mutils.dice_per_batch_and_class(
                 results_dict['seg_preds'], batch["seg"], self.cf.num_seg_classes, convert_to_ohe=True)
 
         results_dict['torch_loss'] = loss
         results_dict['class_loss'] = mrcnn_class_loss.item()
         results_dict['bbox_loss'] = mrcnn_bbox_loss.item()
         results_dict['mask_loss'] = mrcnn_mask_loss.item()
         results_dict['rg_loss'] = mrcnn_regressions_loss.item()
         results_dict['rpn_class_loss'] = rpn_class_loss.item()
         results_dict['rpn_bbox_loss'] = rpn_bbox_loss.item()
         return results_dict
 
 
     def test_forward(self, batch, return_masks=True):
         """
         test method. wrapper around forward pass of network without usage of any ground truth information.
         prepares input data for processing and stores outputs in a dictionary.
         :param batch: dictionary containing 'data'
         :param return_masks: boolean. If True, full resolution masks are returned for all proposals (speed trade-off).
         :return: results_dict: dictionary with keys:
                'boxes': list over batch elements. each batch element is a list of boxes. each box is a dictionary:
                        [[{box_0}, ... {box_n}], [{box_0}, ... {box_n}], ...]
                'seg_preds': pixel-wise class predictions (b, 1, y, x, (z)) with values [0, n_classes]
         """
         img = batch['data']
         img = torch.from_numpy(img).float().cuda()
         _, _, _, detections, detection_masks = self.forward(img)
         results_dict = self.get_results(img.shape, detections, detection_masks, return_masks=return_masks)
 
         return results_dict
\ No newline at end of file