diff --git a/experiments/toy_exp/configs.py b/experiments/toy_exp/configs.py
index 6278a11..e234ac1 100644
--- a/experiments/toy_exp/configs.py
+++ b/experiments/toy_exp/configs.py
@@ -1,350 +1,350 @@
 #!/usr/bin/env python
 # Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 
 import sys
 import os
 sys.path.append(os.path.dirname(os.path.realpath(__file__)))
 import numpy as np
 from default_configs import DefaultConfigs
 
 class configs(DefaultConfigs):
 
     def __init__(self, server_env=None):
 
         #########################
         #    Preprocessing      #
         #########################
 
         self.root_dir = '/home/gregor/datasets/toy_mdt'
 
         #########################
         #         I/O           #
         #########################
 
 
         # one out of [2, 3]. dimension the model operates in.
         self.dim = 2
 
         # one out of ['mrcnn', 'retina_net', 'retina_unet', 'detection_unet', 'ufrcnn'].
-        self.model = 'detection_unet'
+        self.model = 'retina_unet'
 
         DefaultConfigs.__init__(self, self.model, server_env, self.dim)
 
         # int [0 < dataset_size]. select n patients from dataset for prototyping.
         self.select_prototype_subset = None
         self.hold_out_test_set = True
         self.n_train_data = 2500
 
         # choose one of the 3 toy experiments described in https://arxiv.org/pdf/1811.08661.pdf
         # one of ['donuts_shape', 'donuts_pattern', 'circles_scale'].
         toy_mode = 'donuts_shape_noise'
 
         # path to preprocessed data.
         self.input_df_name = 'info_df.pickle'
         self.pp_name = os.path.join(toy_mode, 'train')
         self.pp_data_path = os.path.join(self.root_dir, self.pp_name)
         self.pp_test_name = os.path.join(toy_mode, 'test')
         self.pp_test_data_path = os.path.join(self.root_dir, self.pp_test_name)
 
         # settings for deployment in cloud.
         if server_env:
             # path to preprocessed data.
-            pp_root_dir = '/datasets/datasets_ramien/toy_mdt'
+            pp_root_dir = '/datasets/datasets_ramien/toy_exp/data'
             self.pp_name = os.path.join(toy_mode, 'train')
             self.pp_data_path = os.path.join(pp_root_dir, self.pp_name)
             self.pp_test_name = os.path.join(toy_mode, 'test')
             self.pp_test_data_path = os.path.join(pp_root_dir, self.pp_test_name)
             self.select_prototype_subset = None
 
         #########################
         #      Data Loader      #
         #########################
 
         # select modalities from preprocessed data
         self.channels = [0]
         self.n_channels = len(self.channels)
 
         # patch_size to be used for training. pre_crop_size is the patch_size before data augmentation.
         self.pre_crop_size_2D = [320, 320]
         self.patch_size_2D = [320, 320]
 
         self.patch_size = self.patch_size_2D if self.dim == 2 else self.patch_size_3D
         self.pre_crop_size = self.pre_crop_size_2D if self.dim == 2 else self.pre_crop_size_3D
 
         # ratio of free sampled batch elements before class balancing is triggered
         # (>0 to include "empty"/background patches.)
         self.batch_sample_slack = 0.2
 
         # set 2D network to operate in 3D images.
         self.merge_2D_to_3D_preds = False
 
         # feed +/- n neighbouring slices into channel dimension. set to None for no context.
         self.n_3D_context = None
         if self.n_3D_context is not None and self.dim == 2:
             self.n_channels *= (self.n_3D_context * 2 + 1)
 
 
         #########################
         #      Architecture      #
         #########################
 
         self.start_filts = 48 if self.dim == 2 else 18
         self.end_filts = self.start_filts * 4 if self.dim == 2 else self.start_filts * 2
         self.res_architecture = 'resnet50' # 'resnet101' , 'resnet50'
         self.norm = None # one of None, 'instance_norm', 'batch_norm'
         self.weight_decay = 1e-4
 
         # one of 'xavier_uniform', 'xavier_normal', or 'kaiming_normal', None (=default = 'kaiming_uniform')
         self.weight_init = None
 
         #########################
         #  Schedule / Selection #
         #########################
 
         self.num_epochs = 11
         self.num_train_batches = 100 if self.dim == 2 else 200
         self.batch_size = 20 if self.dim == 2 else 8
 
         self.do_validation = True
         # decide whether to validate on entire patient volumes (like testing) or sampled patches (like training)
         # the former is morge accurate, while the latter is faster (depending on volume size)
         self.val_mode = 'val_patient' # one of 'val_sampling' , 'val_patient'
         if self.val_mode == 'val_patient':
             self.max_val_patients = None  # if 'None' iterates over entire val_set once.
         if self.val_mode == 'val_sampling':
             self.num_val_batches = 50
 
         # set dynamic_lr_scheduling to True to apply LR scheduling with below settings.
         self.dynamic_lr_scheduling = True
         self.lr_decay_factor = 0.5
         self.scheduling_patience = int(self.num_train_batches * self.batch_size / 2400)
         self.scheduling_criterion = 'malignant_ap'
         self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max'
 
         #########################
         #   Testing / Plotting  #
         #########################
 
         # set the top-n-epochs to be saved for temporal averaging in testing.
-        self.save_n_models = 2
-        self.test_n_epochs = 2
+        self.save_n_models = 5
+        self.test_n_epochs = 5
 
         # set a minimum epoch number for saving in case of instabilities in the first phase of training.
         self.min_save_thresh = 0 if self.dim == 2 else 0
 
         self.report_score_level = ['patient', 'rois']  # choose list from 'patient', 'rois'
         self.class_dict = {1: 'benign', 2: 'malignant'}  # 0 is background.
         self.patient_class_of_interest = 2  # patient metrics are only plotted for one class.
         self.ap_match_ious = [0.1]  # list of ious to be evaluated for ap-scoring.
 
         self.model_selection_criteria = ['benign_ap', 'malignant_ap'] # criteria to average over for saving epochs.
         self.min_det_thresh = 0.1  # minimum confidence value to select predictions for evaluation.
 
         # threshold for clustering predictions together (wcs = weighted cluster scoring).
         # needs to be >= the expected overlap of predictions coming from one model (typically NMS threshold).
         # if too high, preds of the same object are separate clusters.
         self.wcs_iou = 1e-5
 
         self.plot_prediction_histograms = True
         self.plot_stat_curves = False
 
         #########################
         #   Data Augmentation   #
         #########################
 
         self.da_kwargs={
         'do_elastic_deform': True,
         'alpha':(0., 1500.),
         'sigma':(30., 50.),
         'do_rotation':True,
         'angle_x': (0., 2 * np.pi),
         'angle_y': (0., 0),
         'angle_z': (0., 0),
         'do_scale': True,
         'scale':(0.8, 1.1),
         'random_crop':False,
         'rand_crop_dist':  (self.patch_size[0] / 2. - 3, self.patch_size[1] / 2. - 3),
         'border_mode_data': 'constant',
         'border_cval_data': 0,
         'order_data': 1
         }
 
         if self.dim == 3:
             self.da_kwargs['do_elastic_deform'] = False
             self.da_kwargs['angle_x'] = (0, 0.0)
             self.da_kwargs['angle_y'] = (0, 0.0) #must be 0!!
             self.da_kwargs['angle_z'] = (0., 2 * np.pi)
 
 
         #########################
         #   Add model specifics #
         #########################
 
         {'detection_unet': self.add_det_unet_configs,
          'mrcnn': self.add_mrcnn_configs,
          'ufrcnn': self.add_mrcnn_configs,
          'ufrcnn_surrounding': self.add_mrcnn_configs,
          'retina_net': self.add_mrcnn_configs,
          'retina_unet': self.add_mrcnn_configs,
          'prob_detector': self.add_mrcnn_configs,
         }[self.model]()
 
 
     def add_det_unet_configs(self):
 
         self.learning_rate = [1e-4] * self.num_epochs
 
         # aggregation from pixel perdiction to object scores (connected component). One of ['max', 'median']
         self.aggregation_operation = 'max'
 
         # max number of roi candidates to identify per image (slice in 2D, volume in 3D)
         self.n_roi_candidates = 3 if self.dim == 2 else 8
 
         # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce')
         self.seg_loss_mode = 'dice_wce'
 
         # if <1, false positive predictions in foreground are penalized less.
         self.fp_dice_weight = 1 if self.dim == 2 else 1
 
         self.wce_weights = [1, 1, 1]
         self.detection_min_confidence = self.min_det_thresh
 
         # if 'True', loss distinguishes all classes, else only foreground vs. background (class agnostic).
         self.class_specific_seg_flag = True
         self.num_seg_classes = 3 if self.class_specific_seg_flag else 2
         self.head_classes = self.num_seg_classes
 
     def add_mrcnn_configs(self):
 
         # learning rate is a list with one entry per epoch.
         self.learning_rate = [1e-4] * self.num_epochs
 
         # disable mask head loss. (e.g. if no pixelwise annotations available)
         self.frcnn_mode = False
 
         # disable the re-sampling of mask proposals to original size for speed-up.
         # since evaluation is detection-driven (box-matching) and not instance segmentation-driven (iou-matching),
         # mask-outputs are optional.
         self.return_masks_in_val = True
         self.return_masks_in_test = False
 
         # set number of proposal boxes to plot after each epoch.
         self.n_plot_rpn_props = 0 if self.dim == 2 else 0
 
         # number of classes for head networks: n_foreground_classes + 1 (background)
         self.head_classes = 3
 
         # seg_classes hier refers to the first stage classifier (RPN)
         self.num_seg_classes = 2  # foreground vs. background
 
         # feature map strides per pyramid level are inferred from architecture.
         self.backbone_strides = {'xy': [4, 8, 16, 32], 'z': [1, 2, 4, 8]}
 
         # anchor scales are chosen according to expected object sizes in data set. Default uses only one anchor scale
         # per pyramid level. (outer list are pyramid levels (corresponding to BACKBONE_STRIDES), inner list are scales per level.)
         self.rpn_anchor_scales = {'xy': [[8], [16], [32], [64]], 'z': [[2], [4], [8], [16]]}
 
         # choose which pyramid levels to extract features from: P2: 0, P3: 1, P4: 2, P5: 3.
         self.pyramid_levels = [0, 1, 2, 3]
 
         # number of feature maps in rpn. typically lowered in 3D to save gpu-memory.
         self.n_rpn_features = 512 if self.dim == 2 else 128
 
         # anchor ratios and strides per position in feature maps.
         self.rpn_anchor_ratios = [0.5, 1., 2.]
         self.rpn_anchor_stride = 1
 
         # Threshold for first stage (RPN) non-maximum suppression (NMS):  LOWER == HARDER SELECTION
         self.rpn_nms_threshold = 0.7 if self.dim == 2 else 0.7
 
         # loss sampling settings.
         self.rpn_train_anchors_per_image = 2  #per batch element
         self.train_rois_per_image = 2 #per batch element
         self.roi_positive_ratio = 0.5
         self.anchor_matching_iou = 0.7
 
         # factor of top-k candidates to draw from  per negative sample (stochastic-hard-example-mining).
         # poolsize to draw top-k candidates from will be shem_poolsize * n_negative_samples.
         self.shem_poolsize = 10
 
         self.pool_size = (7, 7) if self.dim == 2 else (7, 7, 3)
         self.mask_pool_size = (14, 14) if self.dim == 2 else (14, 14, 5)
         self.mask_shape = (28, 28) if self.dim == 2 else (28, 28, 10)
 
         self.rpn_bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2])
         self.bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2])
         self.window = np.array([0, 0, self.patch_size[0], self.patch_size[1]])
         self.scale = np.array([self.patch_size[0], self.patch_size[1], self.patch_size[0], self.patch_size[1]])
 
         if self.dim == 2:
             self.rpn_bbox_std_dev = self.rpn_bbox_std_dev[:4]
             self.bbox_std_dev = self.bbox_std_dev[:4]
             self.window = self.window[:4]
             self.scale = self.scale[:4]
 
         # pre-selection in proposal-layer (stage 1) for NMS-speedup. applied per batch element.
         self.pre_nms_limit = 3000 if self.dim == 2 else 6000
 
         # n_proposals to be selected after NMS per batch element. too high numbers blow up memory if "detect_while_training" is True,
         # since proposals of the entire batch are forwarded through second stage in as one "batch".
         self.roi_chunk_size = 800 if self.dim == 2 else 600
         self.post_nms_rois_training = 500 if self.dim == 2 else 75
         self.post_nms_rois_inference = 500
 
         # Final selection of detections (refine_detections)
         self.model_max_instances_per_batch_element = 10 if self.dim == 2 else 30  # per batch element and class.
         self.detection_nms_threshold = 1e-5  # needs to be > 0, otherwise all predictions are one cluster.
         self.model_min_confidence = 0.1
 
         if self.dim == 2:
             self.backbone_shapes = np.array(
                 [[int(np.ceil(self.patch_size[0] / stride)),
                   int(np.ceil(self.patch_size[1] / stride))]
                  for stride in self.backbone_strides['xy']])
         else:
             self.backbone_shapes = np.array(
                 [[int(np.ceil(self.patch_size[0] / stride)),
                   int(np.ceil(self.patch_size[1] / stride)),
                   int(np.ceil(self.patch_size[2] / stride_z))]
                  for stride, stride_z in zip(self.backbone_strides['xy'], self.backbone_strides['z']
                                              )])
         if self.model == 'ufrcnn':
             self.operate_stride1 = True
             self.class_specific_seg_flag = True
             self.num_seg_classes = 3 if self.class_specific_seg_flag else 2
             self.frcnn_mode = True
 
         if self.model == 'retina_net' or self.model == 'retina_unet' or self.model == 'prob_detector':
             # implement extra anchor-scales according to retina-net publication.
             self.rpn_anchor_scales['xy'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in
                                             self.rpn_anchor_scales['xy']]
             self.rpn_anchor_scales['z'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in
                                            self.rpn_anchor_scales['z']]
             self.n_anchors_per_pos = len(self.rpn_anchor_ratios) * 3
 
             self.n_rpn_features = 256 if self.dim == 2 else 64
 
             # pre-selection of detections for NMS-speedup. per entire batch.
             self.pre_nms_limit = 10000 if self.dim == 2 else 50000
 
             # anchor matching iou is lower than in Mask R-CNN according to https://arxiv.org/abs/1708.02002
             self.anchor_matching_iou = 0.5
 
             # if 'True', seg loss distinguishes all classes, else only foreground vs. background (class agnostic).
             self.num_seg_classes = 3 if self.class_specific_seg_flag else 2
 
             if self.model == 'retina_unet':
                 self.operate_stride1 = True
diff --git a/requirements.txt b/requirements.txt
index 2d7d947..0f6e3a7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,62 +1,65 @@
 absl-py==0.9.0
 backcall==0.1.0
 batchgenerators==0.19.3
 cachetools==4.0.0
 certifi==2019.11.28
 cffi==1.11.5
 chardet==3.0.4
 cycler==0.10.0
 Cython==0.29.14
 decorator==4.4.1
 future==0.18.2
 google-auth==1.10.0
 google-auth-oauthlib==0.4.1
 grpcio==1.26.0
 idna==2.8
 imageio==2.6.1
 ipython-genutils==0.2.0
 jedi==0.15.1
 joblib==0.14.1
 kiwisolver==1.1.0
 linecache2==1.0.0
 Markdown==3.1.1
 matplotlib==3.1.2
 medicaldetectiontoolkit==0.0.1
 networkx==2.4
+nms-extension==0.0.0
 numpy==1.17.4
 oauthlib==3.1.0
 pandas==0.25.3
 parso==0.5.2
 pexpect==4.7.0
 pickleshare==0.7.5
 Pillow==6.2.1
 prompt-toolkit==3.0.2
 protobuf==3.11.2
 ptyprocess==0.6.0
 pyasn1==0.4.8
 pyasn1-modules==0.2.7
 pycparser==2.19
 Pygments==2.5.2
 pyparsing==2.4.5
 python-dateutil==2.8.1
 pytz==2019.3
 PyWavelets==1.1.1
 requests==2.22.0
 requests-oauthlib==1.3.0
+RoIAlign-extension-2D==0.0.0
+RoIAlign-extension-3D==0.0.0
 rsa==4.0
 scikit-image==0.16.2
 scikit-learn==0.22.1
 scipy==1.3.3
 six==1.13.0
 sklearn==0.0
 tensorboard==2.1.0
 threadpoolctl==1.1.0
 torch==1.4.0
 torchvision==0.5.0
 tqdm==4.40.2
 traceback2==1.4.0
 traitlets==4.3.3
 unittest2==1.1.0
 urllib3==1.25.7
 wcwidth==0.1.7
 Werkzeug==0.16.0
diff --git a/shell_scripts/cluster_runner_meddec.sh b/shell_scripts/cluster_runner_meddec.sh
new file mode 100644
index 0000000..ef036be
--- /dev/null
+++ b/shell_scripts/cluster_runner_meddec.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+#Usage:
+# -->not true?: this script has to be started from the same directory the python files called below lie in (e.g. exec.py lies in meddetectiontkit).
+# part of the slurm-job name you pass to sbatch will be the experiment folder's name.
+# you need to pass 3 positional arguments to this script (cluster_runner_..sh #1 #2 #3):
+# -#1 source directory in which main source code (framework) is located (e.g. medicaldetectiontoolkit/)
+# -#2 the exp_dir where job-specific code was copied before by create_exp and exp results are safed by exec.py
+# -#3 absolute path to dataset-specific code in source dir
+# -#4 mode to run
+# -#5 folds to run on
+
+source_dir=${1}
+exp_dir=${2}
+dataset_abs_path=${3}
+mode=${4}
+folds=${5}
+resume=$6
+
+#known problem: trap somehow does not execute the rm -r tmp_dir command when using scancel on job
+#trap clean_up EXIT KILL TERM ABRT QUIT
+
+job_dir=/ssd/ramien/${LSB_JOBID}
+
+tmp_dir_data=${job_dir}/data
+mkdir $tmp_dir_data
+
+tmp_dir_cache=${job_dir}/cache
+mkdir $tmp_dir_cache
+CUDA_CACHE_PATH=$tmp_dir_cache
+export CUDA_CACHE_PATH
+
+
+#data must not lie permantly on nodes' ssd, only during training time
+#needs to be named with the SLURM_JOB_ID to not be automatically removed
+#can permanently lie on /datasets drive --> copy from there before every experiment
+#files on datasets are saved as npz (compressed) --> use data_manager.py to copy and unpack into .npy; is done implicitly in exec.py
+
+#(tensorboard --logdir ${exp_dir}/.. --port 1337 || echo "tboard startup failed")& # || tensorboard --logdir ${exp_dir}/.. --port 1338)&
+#tboard_pid=$!
+
+#clean_up() {
+#	rm -rf ${job_dir};
+#}
+
+export OMP_NUM_THREADS=1 # this is a work-around fix for batchgenerators to deal with numpy-inherent multi-threading.
+
+launch_opts="${source_dir}/exec.py --use_stored_settings --server_env --exp_source ${dataset_abs_path} --data_dest ${tmp_dir_data} --exp_dir ${exp_dir} --mode ${mode}"
+
+if [ ! -z "${resume}" ]; then
+  launch_opts=${launch_opts} --resume
+  echo "Resuming from checkpoint(s)."
+fi
+
+if [ ! -z "${folds}" ]; then
+  launch_opts=${launch_opts} --folds ${folds}
+fi
+
+echo "submitting with ${launch_opts}"
+python ${launch_opts}
+
+
+
+
+
diff --git a/shell_scripts/job_starter.sh b/shell_scripts/job_starter.sh
new file mode 100644
index 0000000..3792f8d
--- /dev/null
+++ b/shell_scripts/job_starter.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+#wrapper for cluster_runner_....sh which copies job-specific, frequently changing files (e.g. configs.py) before the actual sbatch job 
+#is submitted since the job might pend in queue before execution --> hazard of job-specific files being unintentionally changed during queue wait time. 
+#positonal
+# -arg #1 identifies the folder name of the dataset-related code (e.g. >toy_exp< or >lidc_exp<) within the code source directory
+# -arg #2 is the experiment and first part of the job name,
+# optional args and flags:
+# -c / --create: (flag) whether to create the exp, i.e., if this is a new start of the exp with configs etc from source dir.
+# -f / --folds FOLDS: (option) fold(s) to run on (FOLDS needs to be only one int or string of multiple ints separated by space), default None (-->set to all in config)
+# -m / --mode MODE: (option) string, one of "train", "train_test", "test", defaults to "train_test"
+# -p / --exp_parent_dir: (option) name of parent_dir rel to dataset folder on cluster. exp_dir is exp_parent_dir/exp_name, if not given defaults to "experiments"
+# -q / --queue: (option) which queue (-q parameter for bsub) to send job to. default: gputest. others: gputest-short (max 5h jobs). 
+# -w / --which: (option) same as argument -m to bsub; host or host list (string separated by space) to send the job to.
+# 		use nodenameXX where XX==nr of node or nodenameXX,nodenameYY,... or nodename[XX-YY]. nodename is e.g. e132-comp.
+# --gmem: (option) how much gpu memory to request for job (in gigabytes), defaults to 11.9. Currently, the smaller nodes have 11.9G, the larger ones 31.7G.
+# --resume: (flag) only with explicit fold argument, if set, resumes from checkpoint in exp_dir/fold_x/last_state.pth.
+# --no_parallel: (flag) if set, folds won't start as parallel jobs on cluster, but run sequentially in one job.
+
+dataset_name="${1}"
+exp_name="${2}"
+
+#arguments not passed, e.g. $7 if no seventh argument, are null.
+if [ ! -z "${18}" ]; then #-z checks if is null string
+ echo "Error: Received too many arguments."
+ exit
+fi
+
+#make args optional: move up if some args are missing inbetween
+while [ ${#} -gt 2 ]; do
+  case "${3}" in
+		-c|--create)
+      		create_exp="c"
+			shift
+      		;;
+		-f|--folds)
+			folds="${4}"
+			shift; shift
+			;;
+		-m|--mode)
+			mode="${4}"
+			shift; shift
+			;;
+		-p|--exp_parent_dir)
+			exp_parent_dir="${4}"
+			shift; shift			
+			;;
+		-q|--queue)
+			queue="${4}"
+			shift; shift			
+			;;
+		-w|--which)
+			which="${4}"
+			shift; shift			
+			;;
+		-R|--resource)
+			resource="${4}"
+			shift; shift			
+			;;
+		--gmem)
+			gmem="${4}"
+			shift; shift
+			;;
+		--resume)
+			resume=true
+			shift
+			;;
+		--no_parallel)
+			no_parallel=true
+			shift
+			;;
+    *)
+			echo "Invalid argument/option passed: ${3}"
+			exit 1
+			;;
+  esac
+done
+
+# default values
+if [ -z ${exp_parent_dir} ]; then 
+	exp_parent_dir="experiments"
+fi
+
+if [ -z ${mode} ]; then 
+	mode="train_test"
+fi
+
+if [ -z ${queue} ]; then 
+	queue="gputest"
+fi
+
+
+if [ -z ${gmem} ]; then 
+	gmem="11"
+fi
+
+
+root_dir=/home/ramien #assumes /home/ramien exists
+prep_node=ramien@e132-comp07 #node used for prep tasks like create_exp
+#medicaldetectiontoolkit
+source_dir=${root_dir}/mdt-public
+
+dataset_abs_path=${source_dir}/experiments/${dataset_name} #set as second argument passed to this script
+exp_parent_dir=/datasets/datasets_ramien/${dataset_name}/${exp_parent_dir}
+#exp_parent_dir=/home/gregor/Documents/medicaldetectiontoolkit/datasets/${dataset_name}/experiments #for testing this script
+# /dataset is not mounted on log-in/job submission nodes (would maybe make sense, I feel), only on queue gputest's nodes e132-compXX.
+#ssh ${prep_node} "mkdir -p ${exp_parent_dir}"
+exp_dir=${exp_parent_dir}/${exp_name}
+
+#activate virtualenv that has all the packages:
+source_dl="module load python/3.7.0; module load gcc/7.2.0; source ${root_dir}/.virtualenvs/deeplearning37/bin/activate;"
+
+# TODO as long as no fix available: this script needs to be started directly from the prep node. :/ would be nice if (most importantly
+# 'module ...' would also work over ssh, but somehow some commands are not availabe over the ssh-induced shell (even when using it as interactive).
+eval ${source_dl}
+
+# ssh: (not working)
+#create_cmd="ssh ${prep_node} '${source_dl} python ${source_dir}/exec.py --server_env --mode create_exp --exp_dir ${exp_dir} --exp_source ${dataset_abs_path};'"
+# directly from prep node:
+create_cmd="python ${source_dir}/exec.py --server_env --mode create_exp --exp_dir ${exp_dir} --exp_source ${dataset_abs_path};"
+
+
+#if create_exp, check if would overwrite existing exp_dir
+if [ ! -z ${create_exp} ] && [ ${create_exp} = "c" ]; then #-n doesnt work as replacement for !-z
+	if [ -d ${exp_dir} ]; then
+		echo "Please confirm to overwrite exp ${exp_name} settings, (Y/n): "; read confirmation
+		if ([ "${confirmation}" = "y" ] || [ "${confirmation}" = "yes" ] || [ "${confirmation}" = "Y" ] || [ -z "${confirmation}" ]); then
+				echo "Overwriting ${exp_name}"
+		else
+				echo "Exiting due to overwrite denial. Adjust options."
+				exit
+		fi
+	fi
+	#echo "opts: name ${exp_name}, ${source_dir}/exec.py --server_env --mode create_exp --exp_dir ${exp_dir} --exp_source ${dataset_abs_path}"
+	echo "Creating ${exp_name}"
+	eval ${create_cmd}
+else
+	if [ ! -d ${exp_dir} ]; then
+		echo "Experiment directory ${exp_dir} does not exist."
+		echo "Run create_exp? (Y/n): "; read confirmation
+			if ([ "${confirmation}" = "y" ] || [ "${confirmation}" = "yes" ] || [ "${confirmation}" = "Y" ] || [ -z "${confirmation}" ]); then
+				echo "Creating ${exp_name}"
+				eval ${create_cmd}
+			fi
+	fi
+fi
+
+#if not create_exp, check if would overwrite existing folds (possibly valuable trained params!)
+if [ -z ${create_exp} ] && ([ ${mode} = "train" ] || [ ${mode} = "train_test" ]) && [ -z "${resume}" ]; then
+	for f in ${folds}; do #if folds is null this check won't apply and folds will be quietly overwritten.
+		if [ -d ${exp_dir}/fold_${f} ]; then #-d checks if is dir
+			echo "please confirm to overwrite fold_${f}, (Y/n):"; read confirmation
+			if ([ "${confirmation}" = "y" ] || [ "${confirmation}" = "yes" ] || [ "${confirmation}" = "Y" ] || [ -z "${confirmation}" ]); then
+				echo "Overwriting "${exp_name}/fold_${f}
+			else
+				echo "Exiting due to overwrite denial. Adjust options."
+				exit
+			fi
+		fi
+	done
+fi
+
+
+
+bsub_opts="bsub -N -q ${queue} -gpu num=1:j_exclusive=yes:mode=exclusive_process:gmem=${gmem}G"
+if [ ! -z "$resource" ]; then
+	bsub_opts=$bsub_opts $resource
+fi
+if [ ! -z ${which} ]; then
+	bsub_opts="${bsub_opts} -m ${which}"
+fi
+
+#----- parallel/separate fold jobs (each fold in a single job) -----------
+if [ ! -z "${folds}" ] && [ -z ${no_parallel} ]; then #WHY do i need to convert to string again?
+	for f in ${folds}; do
+		out_file=${exp_dir}/logs/fold_${f}_lsf_output.out
+		bsub_opts="$bsub_opts -J '${dataset_name} ${exp_name}  fold ${f} ${mode}' -oo '${out_file}'"
+		eval "${bsub_opts} sh cluster_runner_meddec.sh ${source_dir} ${exp_dir} ${dataset_abs_path} ${mode} ${f} ${resume}"
+	done
+
+#----- consecutive folds job (all folds in one single job) -----------
+else 
+	if [ ! -z ${resume} ]; then
+		echo "You need to explicitly specify folds if you would like to resume from a checkpoint. Exiting."
+		exit
+	fi
+	out_file=${exp_dir}/lsf_output.out
+	bsub_opts="$bsub_opts -J '${dataset_name} ${exp_name}  folds ${folds} ${mode}' -oo '${out_file}'"
+	eval "${bsub_opts} sh cluster_runner_meddec.sh ${source_dir} ${exp_dir} ${dataset_abs_path} ${mode} ${folds} ${resume}"
+	echo "Started in no parallel, folds:" ${folds}
+fi
+
+
+
diff --git a/shell_scripts/update_scripts_on_cluster.sh b/shell_scripts/update_scripts_on_cluster.sh
index 4b9b1b2..aa6c934 100644
--- a/shell_scripts/update_scripts_on_cluster.sh
+++ b/shell_scripts/update_scripts_on_cluster.sh
@@ -1,18 +1,18 @@
 
 #cluster sync
 rootp=/home/gregor/Documents
 server=ramien@odcf-lsf01.dkfz.de
 #server=ramien@e132-comp04
 serverroot=${server}:/home/ramien
 
 #---- medicaldetectiontoolkit -----
 codep=${rootp}/mdt-public
 server_codep=${serverroot}/mdt-public
 
-rsync -avhe "ssh -i /home/gregor/.ssh/id_rsa" ${rootp}/mdt-public/shell_scripts/cluster_runner_meddec.sh ${rootp}/mdt-public/shell_scripts/job_starter.sh ${server_codep}
+rsync -avhe "ssh -i /home/gregor/.ssh/id_rsa" ${rootp}/mdt-public/shell_scripts/dummy_runner.sh ${rootp}/mdt-public/shell_scripts/cluster_runner_meddec.sh ${rootp}/mdt-public/shell_scripts/job_starter.sh ${server_codep}
 rsync -avhe "ssh -i /home/gregor/.ssh/id_rsa" ${rootp}/environmental/job_scheduler,cluster/bpeek_wrapper.sh ${serverroot}
 
 # add/remove --include 'custom_extension/**/*.whl' for compiled c++/CUDA exts
 
 rsync -avhe "ssh -i /home/gregor/.ssh/id_rsa" --include '*/' --include '*.py' --include 'requirements.txt' --include 'custom_extensions/**/*.whl'  --exclude '*' --prune-empty-dirs ${codep} ${serverroot}
 
diff --git a/utils/exp_utils.py b/utils/exp_utils.py
index 3aab283..58c8c3e 100644
--- a/utils/exp_utils.py
+++ b/utils/exp_utils.py
@@ -1,419 +1,418 @@
 #!/usr/bin/env python
 # Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 import sys
 import subprocess
 import os
 
 import plotting
 import importlib.util
 import pickle
 
 import logging
 from torch.utils.tensorboard import SummaryWriter
 
 from collections import OrderedDict
 import numpy as np
 import torch
 import pandas as pd
 
 
 class CombinedLogger(object):
     """Combine console and tensorboard logger and record system metrics.
     """
 
     def __init__(self, name, log_dir, server_env=True, fold="all"):
         self.pylogger = logging.getLogger(name)
         self.tboard = SummaryWriter(log_dir=os.path.join(log_dir, "tboard"))
         self.log_dir = log_dir
         self.fold = str(fold)
         self.server_env = server_env
 
         self.pylogger.setLevel(logging.DEBUG)
         self.log_file = os.path.join(log_dir, "fold_"+self.fold, 'exec.log')
         os.makedirs(os.path.dirname(self.log_file), exist_ok=True)
         self.pylogger.addHandler(logging.FileHandler(self.log_file))
         if not server_env:
             self.pylogger.addHandler(ColorHandler())
         else:
             self.pylogger.addHandler(logging.StreamHandler())
         self.pylogger.propagate = False
 
     def __getattr__(self, attr):
         """delegate all undefined method requests to objects of
         this class in order pylogger, tboard (first find first serve).
         E.g., combinedlogger.add_scalars(...) should trigger self.tboard.add_scalars(...)
         """
         for obj in [self.pylogger, self.tboard]:
             if attr in dir(obj):
                 return getattr(obj, attr)
         print("logger attr not found")
 
     def set_logfile(self, fold=None, log_file=None):
         if fold is not None:
             self.fold = str(fold)
         if log_file is None:
             self.log_file = os.path.join(self.log_dir, "fold_"+self.fold, 'exec.log')
         else:
             self.log_file = log_file
         os.makedirs(os.path.dirname(self.log_file), exist_ok=True)
         for hdlr in self.pylogger.handlers:
             hdlr.close()
         self.pylogger.handlers = []
         self.pylogger.addHandler(logging.FileHandler(self.log_file))
         if not self.server_env:
             self.pylogger.addHandler(ColorHandler())
         else:
             self.pylogger.addHandler(logging.StreamHandler())
 
     def metrics2tboard(self, metrics, global_step=None, suptitle=None):
         """
         :param metrics: {'train': dataframe, 'val':df}, df as produced in
             evaluator.py.evaluate_predictions
         """
         # print("metrics", metrics)
         if global_step is None:
             global_step = len(metrics['train'][list(metrics['train'].keys())[0]]) - 1
         if suptitle is not None:
             suptitle = str(suptitle)
         else:
             suptitle = "Fold_" + str(self.fold)
 
         for key in ['train', 'val']:
             # series = {k:np.array(v[-1]) for (k,v) in metrics[key].items() if not np.isnan(v[-1]) and not 'Bin_Stats' in k}
             loss_series = {}
             unc_series = {}
             bin_stat_series = {}
             mon_met_series = {}
             for tag, val in metrics[key].items():
                 val = val[-1]  # maybe remove list wrapping, recording in evaluator?
                 if 'loss' in tag.lower() and not np.isnan(val):
                     loss_series["{}".format(tag)] = val
                 elif not np.isnan(val):
                     mon_met_series["{}".format(tag)] = val
 
             self.tboard.add_scalars(suptitle + "/Losses/{}".format(key), loss_series, global_step)
             self.tboard.add_scalars(suptitle + "/Monitor_Metrics/{}".format(key), mon_met_series, global_step)
         self.tboard.add_scalars(suptitle + "/Learning_Rate", metrics["lr"], global_step)
         return
 
     def __del__(self):  # otherwise might produce multiple prints e.g. in ipython console
         for hdlr in self.pylogger.handlers:
             hdlr.close()
         self.pylogger.handlers = []
         del self.pylogger
         self.tboard.flush()
         # close somehow prevents main script from exiting
         # maybe revise this issue in a later pytorch version
         #self.tboard.close()
 
 
 def get_logger(exp_dir, server_env=False):
     """
     creates logger instance. writing out info to file, to terminal and to tensorboard.
     :param exp_dir: experiment directory, where exec.log file is stored.
     :param server_env: True if operating in server environment (e.g., gpu cluster)
     :return: custom CombinedLogger instance.
     """
     log_dir = os.path.join(exp_dir, "logs")
     logger = CombinedLogger('medicaldetectiontoolkit', log_dir, server_env=server_env)
     print("Logging to {}".format(logger.log_file))
     return logger
 
 
 def prep_exp(dataset_path, exp_path, server_env, use_stored_settings=True, is_training=True):
     """
     I/O handling, creating of experiment folder structure. Also creates a snapshot of configs/model scripts and copies them to the exp_dir.
     This way the exp_dir contains all info needed to conduct an experiment, independent to changes in actual source code. Thus, training/inference of this experiment can be started at anytime. Therefore, the model script is copied back to the source code dir as tmp_model (tmp_backbone).
     Provides robust structure for cloud deployment.
     :param dataset_path: path to source code for specific data set. (e.g. medicaldetectiontoolkit/lidc_exp)
     :param exp_path: path to experiment directory.
     :param server_env: boolean flag. pass to configs script for cloud deployment.
     :param use_stored_settings: boolean flag. When starting training: If True, starts training from snapshot in existing experiment directory, else creates experiment directory on the fly using configs/model scripts from source code.
     :param is_training: boolean flag. distinguishes train vs. inference mode.
     :return:
     """
 
     if is_training:
         if use_stored_settings:
             cf_file = import_module('cf_file', os.path.join(exp_path, 'configs.py'))
             cf = cf_file.configs(server_env)
             # in this mode, previously saved model and backbone need to be found in exp dir.
             if not os.path.isfile(os.path.join(exp_path, 'model.py')) or \
                     not os.path.isfile(os.path.join(exp_path, 'backbone.py')):
                 raise Exception(
                     "Selected use_stored_settings option but no model and/or backbone source files exist in exp dir.")
             cf.model_path = os.path.join(exp_path, 'model.py')
             cf.backbone_path = os.path.join(exp_path, 'backbone.py')
         else:
             # this case overwrites settings files in exp dir, i.e., default_configs, configs, backbone, model
-            print("exp path", exp_path)
             os.makedirs(exp_path, exist_ok=True)
             # run training with source code info and copy snapshot of model to exp_dir for later testing (overwrite scripts if exp_dir already exists.)
             subprocess.call('cp {} {}'.format('default_configs.py', os.path.join(exp_path, 'default_configs.py')),
                             shell=True)
             subprocess.call(
                 'cp {} {}'.format(os.path.join(dataset_path, 'configs.py'), os.path.join(exp_path, 'configs.py')),
                 shell=True)
             cf_file = import_module('cf_file', os.path.join(dataset_path, 'configs.py'))
             cf = cf_file.configs(server_env)
             subprocess.call('cp {} {}'.format(cf.model_path, os.path.join(exp_path, 'model.py')), shell=True)
             subprocess.call('cp {} {}'.format(cf.backbone_path, os.path.join(exp_path, 'backbone.py')), shell=True)
             if os.path.isfile(os.path.join(exp_path, "fold_ids.pickle")):
                 subprocess.call('rm {}'.format(os.path.join(exp_path, "fold_ids.pickle")), shell=True)
 
     else:
         # testing, use model and backbone stored in exp dir.
         cf_file = import_module('cf_file', os.path.join(exp_path, 'configs.py'))
         cf = cf_file.configs(server_env)
         cf.model_path = os.path.join(exp_path, 'model.py')
         cf.backbone_path = os.path.join(exp_path, 'backbone.py')
 
 
     cf.exp_dir = exp_path
     cf.test_dir = os.path.join(cf.exp_dir, 'test')
     cf.plot_dir = os.path.join(cf.exp_dir, 'plots')
     if not os.path.exists(cf.test_dir):
         os.mkdir(cf.test_dir)
     if not os.path.exists(cf.plot_dir):
         os.mkdir(cf.plot_dir)
     cf.experiment_name = exp_path.split("/")[-1]
     cf.server_env = server_env
     cf.created_fold_id_pickle = False
 
     return cf
 
 
 
 def import_module(name, path):
     """
     correct way of importing a module dynamically in python 3.
     :param name: name given to module instance.
     :param path: path to module.
     :return: module: returned module instance.
     """
     spec = importlib.util.spec_from_file_location(name, path)
     module = importlib.util.module_from_spec(spec)
     spec.loader.exec_module(module)
     return module
 
 
 
 class ModelSelector:
     '''
     saves a checkpoint after each epoch as 'last_state' (can be loaded to continue interrupted training).
     saves the top-k (k=cf.save_n_models) ranked epochs. In inference, predictions of multiple epochs can be ensembled to improve performance.
     '''
 
     def __init__(self, cf, logger):
 
         self.cf = cf
         self.saved_epochs = [-1] * cf.save_n_models
         self.logger = logger
 
     def run_model_selection(self, net, optimizer, monitor_metrics, epoch):
 
         # take the mean over all selection criteria in each epoch
         non_nan_scores = np.mean(np.array([[0 if (ii is None or np.isnan(ii)) else ii for ii in monitor_metrics['val'][sc]] for sc in self.cf.model_selection_criteria]), 0)
         epochs_scores = [ii for ii in non_nan_scores[1:]]
         # ranking of epochs according to model_selection_criterion
         epoch_ranking = np.argsort(epochs_scores, kind="stable")[::-1] + 1 #epochs start at 1
         # if set in configs, epochs < min_save_thresh are discarded from saving process.
         epoch_ranking = epoch_ranking[epoch_ranking >= self.cf.min_save_thresh]
 
         # check if current epoch is among the top-k epochs.
         if epoch in epoch_ranking[:self.cf.save_n_models]:
 
             save_dir = os.path.join(self.cf.fold_dir, '{}_best_checkpoint'.format(epoch))
             if not os.path.exists(save_dir):
                 os.mkdir(save_dir)
 
             torch.save(net.state_dict(), os.path.join(save_dir, 'params.pth'))
             with open(os.path.join(save_dir, 'monitor_metrics.pickle'), 'wb') as handle:
                 pickle.dump(monitor_metrics, handle)
             # save epoch_ranking to keep info for inference.
             np.save(os.path.join(self.cf.fold_dir, 'epoch_ranking'), epoch_ranking[:self.cf.save_n_models])
             np.save(os.path.join(save_dir, 'epoch_ranking'), epoch_ranking[:self.cf.save_n_models])
 
             self.logger.info(
                 "saving current epoch {} at rank {}".format(epoch, np.argwhere(epoch_ranking == epoch)))
             # delete params of the epoch that just fell out of the top-k epochs.
             for se in [int(ii.split('_')[0]) for ii in os.listdir(self.cf.fold_dir) if 'best_checkpoint' in ii]:
                 if se in epoch_ranking[self.cf.save_n_models:]:
                     subprocess.call('rm -rf {}'.format(os.path.join(self.cf.fold_dir, '{}_best_checkpoint'.format(se))), shell=True)
                     self.logger.info('deleting epoch {} at rank {}'.format(se, np.argwhere(epoch_ranking == se)))
 
         state = {
             'epoch': epoch,
             'state_dict': net.state_dict(),
             'optimizer': optimizer.state_dict(),
         }
 
         # save checkpoint of current epoch.
         save_dir = os.path.join(self.cf.fold_dir, 'last_checkpoint'.format(epoch))
         if not os.path.exists(save_dir):
             os.mkdir(save_dir)
         torch.save(state, os.path.join(save_dir, 'params.pth'))
         np.save(os.path.join(save_dir, 'epoch_ranking'), epoch_ranking[:self.cf.save_n_models])
         with open(os.path.join(save_dir, 'monitor_metrics.pickle'), 'wb') as handle:
             pickle.dump(monitor_metrics, handle)
 
 
 
 def load_checkpoint(checkpoint_path, net, optimizer):
 
     checkpoint_params = torch.load(os.path.join(checkpoint_path, 'params.pth'))
     net.load_state_dict(checkpoint_params['state_dict'])
     optimizer.load_state_dict(checkpoint_params['optimizer'])
     with open(os.path.join(checkpoint_path, 'monitor_metrics.pickle'), 'rb') as handle:
         monitor_metrics = pickle.load(handle)
     starting_epoch = checkpoint_params['epoch'] + 1
     return starting_epoch, monitor_metrics
 
 
 
 def prepare_monitoring(cf):
     """
     creates dictionaries, where train/val metrics are stored.
     """
     metrics = {}
     # first entry for loss dict accounts for epoch starting at 1.
     metrics['train'] = OrderedDict()
     metrics['val'] = OrderedDict()
     metric_classes = []
     if 'rois' in cf.report_score_level:
         metric_classes.extend([v for k, v in cf.class_dict.items()])
     if 'patient' in cf.report_score_level:
         metric_classes.extend(['patient'])
     for cl in metric_classes:
         metrics['train'][cl + '_ap'] = [np.nan]
         metrics['val'][cl + '_ap'] = [np.nan]
         if cl == 'patient':
             metrics['train'][cl + '_auc'] = [np.nan]
             metrics['val'][cl + '_auc'] = [np.nan]
 
     return metrics
 
 
 
 def create_csv_output(results_list, cf, logger):
     """
     Write out test set predictions to .csv file. output format is one line per prediction:
     PatientID | PredictionID | [y1 x1 y2 x2 (z1) (z2)] | score | pred_classID
     Note, that prediction coordinates correspond to images as loaded for training/testing and need to be adapted when
     plotted over raw data (before preprocessing/resampling).
     :param results_list: [[patient_results, patient_id], [patient_results, patient_id], ...]
     """
 
     logger.info('creating csv output file at {}'.format(os.path.join(cf.exp_dir, 'results.csv')))
     predictions_df = pd.DataFrame(columns = ['patientID', 'predictionID', 'coords', 'score', 'pred_classID'])
     for r in results_list:
 
         pid = r[1]
 
         #optionally load resampling info from preprocessing to match output predictions with raw data.
         #with open(os.path.join(cf.exp_dir, 'test_resampling_info', pid), 'rb') as handle:
         #    resampling_info = pickle.load(handle)
 
         for bix, box in enumerate(r[0][0]):
             assert box['box_type'] == 'det', box['box_type']
             coords = box['box_coords']
             score = box['box_score']
             pred_class_id = box['box_pred_class_id']
             out_coords = []
             if score >= cf.min_det_thresh:
                 out_coords.append(coords[0]) #* resampling_info['scale'][0])
                 out_coords.append(coords[1]) #* resampling_info['scale'][1])
                 out_coords.append(coords[2]) #* resampling_info['scale'][0])
                 out_coords.append(coords[3]) #* resampling_info['scale'][1])
                 if len(coords) > 4:
                     out_coords.append(coords[4]) #* resampling_info['scale'][2] + resampling_info['z_crop'])
                     out_coords.append(coords[5]) #* resampling_info['scale'][2] + resampling_info['z_crop'])
 
                 predictions_df.loc[len(predictions_df)] = [pid, bix, out_coords, score, pred_class_id]
     try:
         fold = cf.fold
     except:
         fold = 'hold_out'
     predictions_df.to_csv(os.path.join(cf.exp_dir, 'results_{}.csv'.format(fold)), index=False)
 
 
 
 class _AnsiColorizer(object):
     """
     A colorizer is an object that loosely wraps around a stream, allowing
     callers to write text to the stream in a particular color.
 
     Colorizer classes must implement C{supported()} and C{write(text, color)}.
     """
     _colors = dict(black=30, red=31, green=32, yellow=33,
                    blue=34, magenta=35, cyan=36, white=37, default=39)
 
     def __init__(self, stream):
         self.stream = stream
 
     @classmethod
     def supported(cls, stream=sys.stdout):
         """
         A class method that returns True if the current platform supports
         coloring terminal output using this method. Returns False otherwise.
         """
         if not stream.isatty():
             return False  # auto color only on TTYs
         try:
             import curses
         except ImportError:
             return False
         else:
             try:
                 try:
                     return curses.tigetnum("colors") > 2
                 except curses.error:
                     curses.setupterm()
                     return curses.tigetnum("colors") > 2
             except:
                 raise
                 # guess false in case of error
                 return False
 
     def write(self, text, color):
         """
         Write the given text to the stream in the given color.
 
         @param text: Text to be written to the stream.
 
         @param color: A string label for a color. e.g. 'red', 'white'.
         """
         color = self._colors[color]
         self.stream.write('\x1b[%sm%s\x1b[0m' % (color, text))
 
 
 
 class ColorHandler(logging.StreamHandler):
 
 
     def __init__(self, stream=sys.stdout):
         super(ColorHandler, self).__init__(_AnsiColorizer(stream))
 
     def emit(self, record):
         msg_colors = {
             logging.DEBUG: "green",
             logging.INFO: "default",
             logging.WARNING: "red",
             logging.ERROR: "red"
         }
         color = msg_colors.get(record.levelno, "blue")
         self.stream.write(record.msg + "\n", color)