diff --git a/exec.py b/exec.py index a448a17..ca5e249 100644 --- a/exec.py +++ b/exec.py @@ -1,247 +1,254 @@ #!/usr/bin/env python # Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """execution script.""" import argparse import os import time import torch import utils.exp_utils as utils from evaluator import Evaluator from predictor import Predictor from plotting import plot_batch_prediction def train(logger): """ perform the training routine for a given fold. saves plots and selected parameters to the experiment dir specified in the configs. """ logger.info('performing training in {}D over fold {} on experiment {} with model {}'.format( cf.dim, cf.fold, cf.exp_dir, cf.model)) net = model.net(cf, logger).cuda() optimizer = torch.optim.Adam(net.parameters(), lr=cf.learning_rate[0], weight_decay=cf.weight_decay) + if cf.dynamic_lr_scheduling: + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode=cf.scheduling_mode, factor=cf.lr_decay_factor, + patience=cf.scheduling_patience) + model_selector = utils.ModelSelector(cf, logger) train_evaluator = Evaluator(cf, logger, mode='train') val_evaluator = Evaluator(cf, logger, mode=cf.val_mode) starting_epoch = 1 # prepare monitoring monitor_metrics = utils.prepare_monitoring(cf) if cf.resume_to_checkpoint: starting_epoch, monitor_metrics = utils.load_checkpoint(cf.resume_to_checkpoint, net, optimizer) logger.info('resumed to checkpoint {} at epoch {}'.format(cf.resume_to_checkpoint, starting_epoch)) logger.info('loading dataset and initializing batch generators...') batch_gen = data_loader.get_train_generators(cf, logger) for epoch in range(starting_epoch, cf.num_epochs + 1): logger.info('starting training epoch {}'.format(epoch)) - for param_group in optimizer.param_groups: - param_group['lr'] = cf.learning_rate[epoch - 1] - start_time = time.time() net.train() train_results_list = [] for bix in range(cf.num_train_batches): batch = next(batch_gen['train']) tic_fw = time.time() results_dict = net.train_forward(batch) tic_bw = time.time() optimizer.zero_grad() results_dict['torch_loss'].backward() optimizer.step() logger.info('tr. batch {0}/{1} (ep. {2}) fw {3:.2f}s / bw {4:.2f} s / total {5:.2f} s || ' .format(bix + 1, cf.num_train_batches, epoch, tic_bw - tic_fw, time.time() - tic_bw, time.time() - tic_fw) + results_dict['logger_string']) train_results_list.append([results_dict['boxes'], batch['pid']]) _, monitor_metrics['train'] = train_evaluator.evaluate_predictions(train_results_list, monitor_metrics['train']) #import IPython; IPython.embed() train_time = time.time() - start_time logger.info('starting validation in mode {}.'.format(cf.val_mode)) with torch.no_grad(): net.eval() if cf.do_validation: val_results_list = [] val_predictor = Predictor(cf, net, logger, mode='val') for _ in range(batch_gen['n_val']): batch = next(batch_gen[cf.val_mode]) if cf.val_mode == 'val_patient': results_dict = val_predictor.predict_patient(batch) elif cf.val_mode == 'val_sampling': results_dict = net.train_forward(batch, is_validation=True) val_results_list.append([results_dict['boxes'], batch['pid']]) _, monitor_metrics['val'] = val_evaluator.evaluate_predictions(val_results_list, monitor_metrics['val']) model_selector.run_model_selection(net, optimizer, monitor_metrics, epoch) # update monitoring and prediction plots monitor_metrics.update({"lr": {str(g): group['lr'] for (g, group) in enumerate(optimizer.param_groups)}}) logger.metrics2tboard(monitor_metrics, global_step=epoch) epoch_time = time.time() - start_time logger.info('trained epoch {}: took {:.2f} s ({:.2f} s train / {:.2f} s val)'.format( epoch, epoch_time, train_time, epoch_time-train_time)) batch = next(batch_gen['val_sampling']) results_dict = net.train_forward(batch, is_validation=True) logger.info('plotting predictions from validation sampling.') plot_batch_prediction(batch, results_dict, cf) + # -------------- scheduling ----------------- + if cf.dynamic_lr_scheduling: + scheduler.step(monitor_metrics["val"][cf.scheduling_criterion][-1]) + else: + for param_group in optimizer.param_groups: + param_group['lr'] = cf.learning_rate[epoch-1] def test(logger): """ perform testing for a given fold (or hold out set). save stats in evaluator. """ logger.info('starting testing model of fold {} in exp {}'.format(cf.fold, cf.exp_dir)) net = model.net(cf, logger).cuda() test_predictor = Predictor(cf, net, logger, mode='test') test_evaluator = Evaluator(cf, logger, mode='test') batch_gen = data_loader.get_test_generator(cf, logger) test_results_list = test_predictor.predict_test_set(batch_gen, return_results=True) test_evaluator.evaluate_predictions(test_results_list) test_evaluator.score_test_df() if __name__ == '__main__': stime = time.time() parser = argparse.ArgumentParser() parser.add_argument('-m', '--mode', type=str, default='train_test', help='one out of: train / test / train_test / analysis / create_exp') parser.add_argument('-f','--folds', nargs='+', type=int, default=None, help='None runs over all folds in CV. otherwise specify list of folds.') parser.add_argument('--exp_dir', type=str, default='/path/to/experiment/directory', help='path to experiment dir. will be created if non existent.') parser.add_argument('--server_env', default=False, action='store_true', help='change IO settings to deploy models on a cluster.') parser.add_argument('--slurm_job_id', type=str, default=None, help='job scheduler info') parser.add_argument('--use_stored_settings', default=False, action='store_true', help='load configs from existing exp_dir instead of source dir. always done for testing, ' 'but can be set to true to do the same for training. useful in job scheduler environment, ' 'where source code might change before the job actually runs.') parser.add_argument('--resume_to_checkpoint', type=str, default=None, help='if resuming to checkpoint, the desired fold still needs to be parsed via --folds.') parser.add_argument('--exp_source', type=str, default='experiments/toy_exp', help='specifies, from which source experiment to load configs and data_loader.') parser.add_argument('-d', '--dev', default=False, action='store_true', help="development mode: shorten everything") args = parser.parse_args() folds = args.folds resume_to_checkpoint = args.resume_to_checkpoint if args.mode == 'train' or args.mode == 'train_test': cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, args.use_stored_settings) if args.dev: folds = [0,1] cf.batch_size, cf.num_epochs, cf.min_save_thresh, cf.save_n_models = 3 if cf.dim==2 else 1, 1, 0, 1 cf.num_train_batches, cf.num_val_batches, cf.max_val_patients = 5, 1, 1 cf.test_n_epochs = cf.save_n_models cf.max_test_patients = 1 cf.slurm_job_id = args.slurm_job_id logger = utils.get_logger(cf.exp_dir, cf.server_env) data_loader = utils.import_module('dl', os.path.join(args.exp_source, 'data_loader.py')) model = utils.import_module('model', cf.model_path) logger.info("loaded model from {}".format(cf.model_path)) if folds is None: folds = range(cf.n_cv_splits) for fold in folds: cf.fold_dir = os.path.join(cf.exp_dir, 'fold_{}'.format(fold)) cf.fold = fold cf.resume_to_checkpoint = resume_to_checkpoint if not os.path.exists(cf.fold_dir): os.mkdir(cf.fold_dir) logger.set_logfile(fold=fold) train(logger) cf.resume_to_checkpoint = None if args.mode == 'train_test': test(logger) elif args.mode == 'test': cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, is_training=False, use_stored_settings=True) if args.dev: folds = [0,1] cf.test_n_epochs = 1; cf.max_test_patients = 1 cf.slurm_job_id = args.slurm_job_id logger = utils.get_logger(cf.exp_dir, cf.server_env) data_loader = utils.import_module('dl', os.path.join(args.exp_source, 'data_loader.py')) model = utils.import_module('model', cf.model_path) logger.info("loaded model from {}".format(cf.model_path)) if folds is None: folds = range(cf.n_cv_splits) for fold in folds: cf.fold_dir = os.path.join(cf.exp_dir, 'fold_{}'.format(fold)) cf.fold = fold logger.set_logfile(fold=fold) test(logger) # load raw predictions saved by predictor during testing, run aggregation algorithms and evaluation. elif args.mode == 'analysis': cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, is_training=False, use_stored_settings=True) logger = utils.get_logger(cf.exp_dir, cf.server_env) if cf.hold_out_test_set: cf.folds = args.folds predictor = Predictor(cf, net=None, logger=logger, mode='analysis') results_list = predictor.load_saved_predictions(apply_wbc=True) utils.create_csv_output(results_list, cf, logger) else: if folds is None: folds = range(cf.n_cv_splits) for fold in folds: cf.fold_dir = os.path.join(cf.exp_dir, 'fold_{}'.format(fold)) cf.fold = fold logger.set_logfile(fold=fold) predictor = Predictor(cf, net=None, logger=logger, mode='analysis') results_list = predictor.load_saved_predictions(apply_wbc=True) logger.info('starting evaluation...') evaluator = Evaluator(cf, logger, mode='test') evaluator.evaluate_predictions(results_list) evaluator.score_test_df() # create experiment folder and copy scripts without starting job. # useful for cloud deployment where configs might change before job actually runs. elif args.mode == 'create_exp': cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, use_stored_settings=True) logger = utils.get_logger(cf.exp_dir) logger.info('created experiment directory at {}'.format(args.exp_dir)) else: raise RuntimeError('mode specified in args is not implemented...') mins, secs = divmod((time.time() - stime), 60) h, mins = divmod(mins, 60) t = "{:d}h:{:02d}m:{:02d}s".format(int(h), int(mins), int(secs)) logger.info("{} total runtime: {}".format(os.path.split(__file__)[1], t)) del logger \ No newline at end of file diff --git a/experiments/toy_exp/configs.py b/experiments/toy_exp/configs.py index c01c4ca..00e9b95 100644 --- a/experiments/toy_exp/configs.py +++ b/experiments/toy_exp/configs.py @@ -1,344 +1,350 @@ #!/usr/bin/env python # Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import sys import os sys.path.append(os.path.dirname(os.path.realpath(__file__))) import numpy as np from default_configs import DefaultConfigs class configs(DefaultConfigs): def __init__(self, server_env=None): ######################### # Preprocessing # ######################### self.root_dir = '/home/gregor/datasets/toy_mdt' - self.pp_noisy_bg = False ######################### # I/O # ######################### # one out of [2, 3]. dimension the model operates in. self.dim = 2 # one out of ['mrcnn', 'retina_net', 'retina_unet', 'detection_unet', 'ufrcnn']. - self.model = 'retina_unet' + self.model = 'detection_unet' DefaultConfigs.__init__(self, self.model, server_env, self.dim) # int [0 < dataset_size]. select n patients from dataset for prototyping. self.select_prototype_subset = None self.hold_out_test_set = True - self.n_train_data = 1000 + self.n_train_data = 2500 # choose one of the 3 toy experiments described in https://arxiv.org/pdf/1811.08661.pdf # one of ['donuts_shape', 'donuts_pattern', 'circles_scale']. - toy_mode = 'donuts_shape' + toy_mode = 'donuts_shape_noise' # path to preprocessed data. self.input_df_name = 'info_df.pickle' self.pp_name = os.path.join(toy_mode, 'train') self.pp_data_path = os.path.join(self.root_dir, self.pp_name) self.pp_test_name = os.path.join(toy_mode, 'test') self.pp_test_data_path = os.path.join(self.root_dir, self.pp_test_name) # settings for deployment in cloud. if server_env: # path to preprocessed data. pp_root_dir = '/path/to/data' self.pp_name = os.path.join(toy_mode, 'train') self.pp_data_path = os.path.join(pp_root_dir, self.pp_name) self.pp_test_name = os.path.join(toy_mode, 'test') self.pp_test_data_path = os.path.join(pp_root_dir, self.pp_test_name) self.select_prototype_subset = None ######################### # Data Loader # ######################### # select modalities from preprocessed data self.channels = [0] self.n_channels = len(self.channels) # patch_size to be used for training. pre_crop_size is the patch_size before data augmentation. self.pre_crop_size_2D = [320, 320] self.patch_size_2D = [320, 320] self.patch_size = self.patch_size_2D if self.dim == 2 else self.patch_size_3D self.pre_crop_size = self.pre_crop_size_2D if self.dim == 2 else self.pre_crop_size_3D # ratio of free sampled batch elements before class balancing is triggered # (>0 to include "empty"/background patches.) self.batch_sample_slack = 0.2 # set 2D network to operate in 3D images. self.merge_2D_to_3D_preds = False # feed +/- n neighbouring slices into channel dimension. set to None for no context. self.n_3D_context = None if self.n_3D_context is not None and self.dim == 2: self.n_channels *= (self.n_3D_context * 2 + 1) ######################### # Architecture # ######################### self.start_filts = 48 if self.dim == 2 else 18 self.end_filts = self.start_filts * 4 if self.dim == 2 else self.start_filts * 2 self.res_architecture = 'resnet50' # 'resnet101' , 'resnet50' self.norm = None # one of None, 'instance_norm', 'batch_norm' - self.weight_decay = 0 + self.weight_decay = 1e-4 # one of 'xavier_uniform', 'xavier_normal', or 'kaiming_normal', None (=default = 'kaiming_uniform') self.weight_init = None ######################### # Schedule / Selection # ######################### - self.num_epochs = 16 + self.num_epochs = 11 self.num_train_batches = 100 if self.dim == 2 else 200 self.batch_size = 20 if self.dim == 2 else 8 self.do_validation = True # decide whether to validate on entire patient volumes (like testing) or sampled patches (like training) # the former is morge accurate, while the latter is faster (depending on volume size) self.val_mode = 'val_patient' # one of 'val_sampling' , 'val_patient' if self.val_mode == 'val_patient': self.max_val_patients = None # if 'None' iterates over entire val_set once. if self.val_mode == 'val_sampling': self.num_val_batches = 50 + # set dynamic_lr_scheduling to True to apply LR scheduling with below settings. + self.dynamic_lr_scheduling = True + self.lr_decay_factor = 0.5 + self.scheduling_patience = int(self.num_train_batches * self.batch_size / 2400) + self.scheduling_criterion = 'malignant_ap' + self.scheduling_mode = 'min' if "loss" in self.scheduling_criterion else 'max' + ######################### # Testing / Plotting # ######################### # set the top-n-epochs to be saved for temporal averaging in testing. self.save_n_models = 2 self.test_n_epochs = 2 # set a minimum epoch number for saving in case of instabilities in the first phase of training. self.min_save_thresh = 0 if self.dim == 2 else 0 self.report_score_level = ['patient', 'rois'] # choose list from 'patient', 'rois' self.class_dict = {1: 'benign', 2: 'malignant'} # 0 is background. self.patient_class_of_interest = 2 # patient metrics are only plotted for one class. self.ap_match_ious = [0.1] # list of ious to be evaluated for ap-scoring. self.model_selection_criteria = ['benign_ap', 'malignant_ap'] # criteria to average over for saving epochs. self.min_det_thresh = 0.1 # minimum confidence value to select predictions for evaluation. # threshold for clustering predictions together (wcs = weighted cluster scoring). # needs to be >= the expected overlap of predictions coming from one model (typically NMS threshold). # if too high, preds of the same object are separate clusters. self.wcs_iou = 1e-5 self.plot_prediction_histograms = True self.plot_stat_curves = False ######################### # Data Augmentation # ######################### self.da_kwargs={ 'do_elastic_deform': True, 'alpha':(0., 1500.), 'sigma':(30., 50.), 'do_rotation':True, 'angle_x': (0., 2 * np.pi), 'angle_y': (0., 0), 'angle_z': (0., 0), 'do_scale': True, 'scale':(0.8, 1.1), 'random_crop':False, 'rand_crop_dist': (self.patch_size[0] / 2. - 3, self.patch_size[1] / 2. - 3), 'border_mode_data': 'constant', 'border_cval_data': 0, 'order_data': 1 } if self.dim == 3: self.da_kwargs['do_elastic_deform'] = False self.da_kwargs['angle_x'] = (0, 0.0) self.da_kwargs['angle_y'] = (0, 0.0) #must be 0!! self.da_kwargs['angle_z'] = (0., 2 * np.pi) ######################### # Add model specifics # ######################### {'detection_unet': self.add_det_unet_configs, 'mrcnn': self.add_mrcnn_configs, 'ufrcnn': self.add_mrcnn_configs, 'ufrcnn_surrounding': self.add_mrcnn_configs, 'retina_net': self.add_mrcnn_configs, 'retina_unet': self.add_mrcnn_configs, 'prob_detector': self.add_mrcnn_configs, }[self.model]() def add_det_unet_configs(self): self.learning_rate = [1e-4] * self.num_epochs # aggregation from pixel perdiction to object scores (connected component). One of ['max', 'median'] self.aggregation_operation = 'max' # max number of roi candidates to identify per image (slice in 2D, volume in 3D) self.n_roi_candidates = 3 if self.dim == 2 else 8 # loss mode: either weighted cross entropy ('wce'), batch-wise dice loss ('dice), or the sum of both ('dice_wce') self.seg_loss_mode = 'dice_wce' # if <1, false positive predictions in foreground are penalized less. self.fp_dice_weight = 1 if self.dim == 2 else 1 self.wce_weights = [1, 1, 1] self.detection_min_confidence = self.min_det_thresh # if 'True', loss distinguishes all classes, else only foreground vs. background (class agnostic). self.class_specific_seg_flag = True self.num_seg_classes = 3 if self.class_specific_seg_flag else 2 self.head_classes = self.num_seg_classes def add_mrcnn_configs(self): # learning rate is a list with one entry per epoch. self.learning_rate = [1e-4] * self.num_epochs # disable mask head loss. (e.g. if no pixelwise annotations available) self.frcnn_mode = False # disable the re-sampling of mask proposals to original size for speed-up. # since evaluation is detection-driven (box-matching) and not instance segmentation-driven (iou-matching), # mask-outputs are optional. self.return_masks_in_val = True self.return_masks_in_test = False # set number of proposal boxes to plot after each epoch. self.n_plot_rpn_props = 0 if self.dim == 2 else 0 # number of classes for head networks: n_foreground_classes + 1 (background) self.head_classes = 3 # seg_classes hier refers to the first stage classifier (RPN) self.num_seg_classes = 2 # foreground vs. background # feature map strides per pyramid level are inferred from architecture. self.backbone_strides = {'xy': [4, 8, 16, 32], 'z': [1, 2, 4, 8]} # anchor scales are chosen according to expected object sizes in data set. Default uses only one anchor scale # per pyramid level. (outer list are pyramid levels (corresponding to BACKBONE_STRIDES), inner list are scales per level.) self.rpn_anchor_scales = {'xy': [[8], [16], [32], [64]], 'z': [[2], [4], [8], [16]]} # choose which pyramid levels to extract features from: P2: 0, P3: 1, P4: 2, P5: 3. self.pyramid_levels = [0, 1, 2, 3] # number of feature maps in rpn. typically lowered in 3D to save gpu-memory. self.n_rpn_features = 512 if self.dim == 2 else 128 # anchor ratios and strides per position in feature maps. self.rpn_anchor_ratios = [0.5, 1., 2.] self.rpn_anchor_stride = 1 # Threshold for first stage (RPN) non-maximum suppression (NMS): LOWER == HARDER SELECTION self.rpn_nms_threshold = 0.7 if self.dim == 2 else 0.7 # loss sampling settings. self.rpn_train_anchors_per_image = 2 #per batch element self.train_rois_per_image = 2 #per batch element self.roi_positive_ratio = 0.5 self.anchor_matching_iou = 0.7 # factor of top-k candidates to draw from per negative sample (stochastic-hard-example-mining). # poolsize to draw top-k candidates from will be shem_poolsize * n_negative_samples. self.shem_poolsize = 10 self.pool_size = (7, 7) if self.dim == 2 else (7, 7, 3) self.mask_pool_size = (14, 14) if self.dim == 2 else (14, 14, 5) self.mask_shape = (28, 28) if self.dim == 2 else (28, 28, 10) self.rpn_bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2]) self.bbox_std_dev = np.array([0.1, 0.1, 0.1, 0.2, 0.2, 0.2]) self.window = np.array([0, 0, self.patch_size[0], self.patch_size[1]]) self.scale = np.array([self.patch_size[0], self.patch_size[1], self.patch_size[0], self.patch_size[1]]) if self.dim == 2: self.rpn_bbox_std_dev = self.rpn_bbox_std_dev[:4] self.bbox_std_dev = self.bbox_std_dev[:4] self.window = self.window[:4] self.scale = self.scale[:4] # pre-selection in proposal-layer (stage 1) for NMS-speedup. applied per batch element. self.pre_nms_limit = 3000 if self.dim == 2 else 6000 # n_proposals to be selected after NMS per batch element. too high numbers blow up memory if "detect_while_training" is True, # since proposals of the entire batch are forwarded through second stage in as one "batch". self.roi_chunk_size = 800 if self.dim == 2 else 600 self.post_nms_rois_training = 500 if self.dim == 2 else 75 self.post_nms_rois_inference = 500 # Final selection of detections (refine_detections) self.model_max_instances_per_batch_element = 10 if self.dim == 2 else 30 # per batch element and class. self.detection_nms_threshold = 1e-5 # needs to be > 0, otherwise all predictions are one cluster. self.model_min_confidence = 0.1 if self.dim == 2: self.backbone_shapes = np.array( [[int(np.ceil(self.patch_size[0] / stride)), int(np.ceil(self.patch_size[1] / stride))] for stride in self.backbone_strides['xy']]) else: self.backbone_shapes = np.array( [[int(np.ceil(self.patch_size[0] / stride)), int(np.ceil(self.patch_size[1] / stride)), int(np.ceil(self.patch_size[2] / stride_z))] for stride, stride_z in zip(self.backbone_strides['xy'], self.backbone_strides['z'] )]) if self.model == 'ufrcnn': self.operate_stride1 = True self.class_specific_seg_flag = True self.num_seg_classes = 3 if self.class_specific_seg_flag else 2 self.frcnn_mode = True if self.model == 'retina_net' or self.model == 'retina_unet' or self.model == 'prob_detector': # implement extra anchor-scales according to retina-net publication. self.rpn_anchor_scales['xy'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in self.rpn_anchor_scales['xy']] self.rpn_anchor_scales['z'] = [[ii[0], ii[0] * (2 ** (1 / 3)), ii[0] * (2 ** (2 / 3))] for ii in self.rpn_anchor_scales['z']] self.n_anchors_per_pos = len(self.rpn_anchor_ratios) * 3 self.n_rpn_features = 256 if self.dim == 2 else 64 # pre-selection of detections for NMS-speedup. per entire batch. self.pre_nms_limit = 10000 if self.dim == 2 else 50000 # anchor matching iou is lower than in Mask R-CNN according to https://arxiv.org/abs/1708.02002 self.anchor_matching_iou = 0.5 # if 'True', seg loss distinguishes all classes, else only foreground vs. background (class agnostic). self.num_seg_classes = 3 if self.class_specific_seg_flag else 2 if self.model == 'retina_unet': self.operate_stride1 = True diff --git a/experiments/toy_exp/generate_toys.py b/experiments/toy_exp/generate_toys.py index 4d8acae..7dd4258 100644 --- a/experiments/toy_exp/generate_toys.py +++ b/experiments/toy_exp/generate_toys.py @@ -1,130 +1,130 @@ #!/usr/bin/env python # Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import os, time import numpy as np import pandas as pd import pickle import argparse from multiprocessing import Pool def multi_processing_create_image(inputs): out_dir, six, foreground_margin, class_diameters, mode, noisy_bg = inputs print('processing {} {}'.format(out_dir, six)) img = np.random.rand(320, 320) if noisy_bg else np.zeros((320, 320)) seg = np.zeros((320, 320)).astype('uint8') center_x = np.random.randint(foreground_margin, img.shape[0] - foreground_margin) center_y = np.random.randint(foreground_margin, img.shape[1] - foreground_margin) class_id = np.random.randint(0, 2) for y in range(img.shape[0]): for x in range(img.shape[0]): if ((x - center_x) ** 2 + (y - center_y) ** 2 - class_diameters[class_id] ** 2) < 0: img[y][x] += 0.2 seg[y][x] = 1 if 'donuts' in mode: hole_diameter = 12 if class_id == 1: for y in range(img.shape[0]): for x in range(img.shape[0]): if ((x - center_x) ** 2 + (y - center_y) ** 2 - hole_diameter ** 2) < 0: img[y][x] -= 0.2 if mode == 'donuts_shape': seg[y][x] = 0 out = np.concatenate((img[None], seg[None])) out_path = os.path.join(out_dir, '{}.npy'.format(six)) np.save(out_path, out) with open(os.path.join(out_dir, 'meta_info_{}.pickle'.format(six)), 'wb') as handle: pickle.dump([out_path, class_id, str(six)], handle) -def generate_experiment(cf, exp_name, n_train_images, n_test_images, mode, class_diameters=(20, 20)): +def generate_experiment(cf, exp_name, n_train_images, n_test_images, mode, class_diameters=(20, 20), noisy_bg=False): train_dir = os.path.join(cf.root_dir, exp_name, 'train') test_dir = os.path.join(cf.root_dir, exp_name, 'test') os.makedirs(train_dir, exist_ok=True) os.makedirs(test_dir, exist_ok=True) # enforced distance between object center and image edge. foreground_margin = int(np.ceil(np.max(class_diameters) / 1.25)) - noisy_bg = cf.pp_noisy_bg if hasattr(cf, "pp_noisy_bg") else True info = [] info += [[train_dir, six, foreground_margin, class_diameters, mode, noisy_bg] for six in range(n_train_images)] info += [[test_dir, six, foreground_margin, class_diameters, mode, noisy_bg] for six in range(n_test_images)] print('starting creation of {} images'.format(len(info))) pool = Pool(processes=os.cpu_count()-1) pool.map(multi_processing_create_image, info) pool.close() pool.join() aggregate_meta_info(train_dir) aggregate_meta_info(test_dir) def aggregate_meta_info(exp_dir): files = [os.path.join(exp_dir, f) for f in os.listdir(exp_dir) if 'meta_info' in f] df = pd.DataFrame(columns=['path', 'class_id', 'pid']) for f in files: with open(f, 'rb') as handle: df.loc[len(df)] = pickle.load(handle) df.to_pickle(os.path.join(exp_dir, 'info_df.pickle')) print ("aggregated meta info to df with length", len(df)) if __name__ == '__main__': stime = time.time() import sys sys.path.append("../..") import utils.exp_utils as utils parser = argparse.ArgumentParser() mode_choices = ['donuts_shape', 'donuts_pattern', 'circles_scale'] parser.add_argument('-m', '--modes', nargs='+', type=str, default=mode_choices, choices=mode_choices) + parser.add_argument('--noise', action='store_true', help="if given, add noise to the sample bg.") parser.add_argument('--n_train', type=int, default=1500, help="Nr. of train images to generate.") parser.add_argument('--n_test', type=int, default=1000, help="Nr. of test images to generate.") args = parser.parse_args() cf_file = utils.import_module("cf", "configs.py") cf = cf_file.configs() class_diameters = { 'donuts_shape': (20, 20), 'donuts_pattern': (20, 20), 'circles_scale': (19, 20) } for mode in args.modes: - generate_experiment(cf, mode, n_train_images=args.n_train, n_test_images=args.n_test, mode=mode, - class_diameters=class_diameters[mode]) + generate_experiment(cf, mode + ("_noise" if args.noise else ""), n_train_images=args.n_train, n_test_images=args.n_test, mode=mode, + class_diameters=class_diameters[mode], noisy_bg=args.noise) mins, secs = divmod((time.time() - stime), 60) h, mins = divmod(mins, 60) t = "{:d}h:{:02d}m:{:02d}s".format(int(h), int(mins), int(secs)) print("{} total runtime: {}".format(os.path.split(__file__)[1], t)) diff --git a/requirements.txt b/requirements.txt index 4649d7a..2d7d947 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,60 +1,62 @@ absl-py==0.9.0 backcall==0.1.0 batchgenerators==0.19.3 cachetools==4.0.0 certifi==2019.11.28 cffi==1.11.5 chardet==3.0.4 cycler==0.10.0 Cython==0.29.14 decorator==4.4.1 future==0.18.2 google-auth==1.10.0 google-auth-oauthlib==0.4.1 grpcio==1.26.0 idna==2.8 imageio==2.6.1 +ipython-genutils==0.2.0 jedi==0.15.1 joblib==0.14.1 kiwisolver==1.1.0 linecache2==1.0.0 Markdown==3.1.1 matplotlib==3.1.2 +medicaldetectiontoolkit==0.0.1 networkx==2.4 numpy==1.17.4 oauthlib==3.1.0 pandas==0.25.3 parso==0.5.2 pexpect==4.7.0 pickleshare==0.7.5 Pillow==6.2.1 prompt-toolkit==3.0.2 protobuf==3.11.2 ptyprocess==0.6.0 pyasn1==0.4.8 pyasn1-modules==0.2.7 pycparser==2.19 Pygments==2.5.2 pyparsing==2.4.5 python-dateutil==2.8.1 pytz==2019.3 PyWavelets==1.1.1 requests==2.22.0 requests-oauthlib==1.3.0 rsa==4.0 scikit-image==0.16.2 scikit-learn==0.22.1 scipy==1.3.3 six==1.13.0 sklearn==0.0 tensorboard==2.1.0 threadpoolctl==1.1.0 -torch==1.3.1 -torchvision==0.4.2 +torch==1.4.0 +torchvision==0.5.0 tqdm==4.40.2 traceback2==1.4.0 traitlets==4.3.3 unittest2==1.1.0 urllib3==1.25.7 wcwidth==0.1.7 Werkzeug==0.16.0