+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/exec.py b/exec.py
index 21c2242..271f40c 100644
--- a/exec.py
+++ b/exec.py
@@ -1,220 +1,220 @@
#!/usr/bin/env python
# Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""execution script."""
import argparse
import os
import time
import torch
import utils.exp_utils as utils
from evaluator import Evaluator
from predictor import Predictor
from plotting import plot_batch_prediction
def train(logger):
"""
perform the training routine for a given fold. saves plots and selected parameters to the experiment dir
specified in the configs.
"""
logger.info('performing training in {}D over fold {} on experiment {} with model {}'.format(
cf.dim, cf.fold, cf.exp_dir, cf.model))
net = model.net(cf, logger).cuda()
optimizer = torch.optim.Adam(net.parameters(), lr=cf.learning_rate[0], weight_decay=cf.weight_decay)
model_selector = utils.ModelSelector(cf, logger)
train_evaluator = Evaluator(cf, logger, mode='train')
val_evaluator = Evaluator(cf, logger, mode=cf.val_mode)
starting_epoch = 1
# prepare monitoring
monitor_metrics, TrainingPlot = utils.prepare_monitoring(cf)
if cf.resume_to_checkpoint:
starting_epoch, monitor_metrics = utils.load_checkpoint(cf.resume_to_checkpoint, net, optimizer)
logger.info('resumed to checkpoint {} at epoch {}'.format(cf.resume_to_checkpoint, starting_epoch))
logger.info('loading dataset and initializing batch generators...')
batch_gen = data_loader.get_train_generators(cf, logger)
for epoch in range(starting_epoch, cf.num_epochs + 1):
logger.info('starting training epoch {}'.format(epoch))
for param_group in optimizer.param_groups:
param_group['lr'] = cf.learning_rate[epoch - 1]
start_time = time.time()
net.train()
train_results_list = []
for bix in range(cf.num_train_batches):
batch = next(batch_gen['train'])
tic_fw = time.time()
results_dict = net.train_forward(batch)
tic_bw = time.time()
optimizer.zero_grad()
results_dict['torch_loss'].backward()
optimizer.step()
logger.info('tr. batch {0}/{1} (ep. {2}) fw {3:.3f}s / bw {4:.3f}s / total {5:.3f}s || '
.format(bix + 1, cf.num_train_batches, epoch, tic_bw - tic_fw,
time.time() - tic_bw, time.time() - tic_fw) + results_dict['logger_string'])
train_results_list.append([results_dict['boxes'], batch['pid']])
monitor_metrics['train']['monitor_values'][epoch].append(results_dict['monitor_values'])
_, monitor_metrics['train'] = train_evaluator.evaluate_predictions(train_results_list, monitor_metrics['train'])
train_time = time.time() - start_time
logger.info('starting validation in mode {}.'.format(cf.val_mode))
with torch.no_grad():
net.eval()
if cf.do_validation:
val_results_list = []
val_predictor = Predictor(cf, net, logger, mode='val')
for _ in range(batch_gen['n_val']):
batch = next(batch_gen[cf.val_mode])
if cf.val_mode == 'val_patient':
results_dict = val_predictor.predict_patient(batch)
elif cf.val_mode == 'val_sampling':
results_dict = net.train_forward(batch, is_validation=True)
val_results_list.append([results_dict['boxes'], batch['pid']])
monitor_metrics['val']['monitor_values'][epoch].append(results_dict['monitor_values'])
_, monitor_metrics['val'] = val_evaluator.evaluate_predictions(val_results_list, monitor_metrics['val'])
model_selector.run_model_selection(net, optimizer, monitor_metrics, epoch)
# update monitoring and prediction plots
TrainingPlot.update_and_save(monitor_metrics, epoch)
epoch_time = time.time() - start_time
logger.info('trained epoch {}: took {} sec. ({} train / {} val)'.format(
epoch, epoch_time, train_time, epoch_time-train_time))
batch = next(batch_gen['val_sampling'])
results_dict = net.train_forward(batch, is_validation=True)
logger.info('plotting predictions from validation sampling.')
plot_batch_prediction(batch, results_dict, cf)
def test(logger):
"""
perform testing for a given fold (or hold out set). save stats in evaluator.
"""
logger.info('starting testing model of fold {} in exp {}'.format(cf.fold, cf.exp_dir))
net = model.net(cf, logger).cuda()
test_predictor = Predictor(cf, net, logger, mode='test')
test_evaluator = Evaluator(cf, logger, mode='test')
batch_gen = data_loader.get_test_generator(cf, logger)
test_results_list = test_predictor.predict_test_set(batch_gen, return_results=True)
test_evaluator.evaluate_predictions(test_results_list)
test_evaluator.score_test_df()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--mode', type=str, default='train_test',
help='one out of: train / test / train_test / analysis / create_exp')
parser.add_argument('--folds', nargs='+', type=int, default=None,
help='None runs over all folds in CV. otherwise specify list of folds.')
parser.add_argument('--exp_dir', type=str, default='/path/to/experiment/directory',
help='path to experiment dir. will be created if non existent.')
parser.add_argument('--server_env', default=False, action='store_true',
help='change IO settings to deploy models on a cluster.')
parser.add_argument('--slurm_job_id', type=str, default=None, help='job scheduler info')
parser.add_argument('--use_stored_settings', default=False, action='store_true',
help='load configs from existing exp_dir instead of source dir. always done for testing, '
'but can be set to true to do the same for training. useful in job scheduler environment, '
'where source code might change before the job actually runs.')
parser.add_argument('--resume_to_checkpoint', type=str, default=None,
help='if resuming to checkpoint, the desired fold still needs to be parsed via --folds.')
parser.add_argument('--exp_source', type=str, default='experiments/toy_exp',
help='specifies, from which source experiment to load configs and data_loader.')
args = parser.parse_args()
folds = args.folds
resume_to_checkpoint = args.resume_to_checkpoint
if args.mode == 'train' or args.mode == 'train_test':
cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, args.use_stored_settings)
cf.slurm_job_id = args.slurm_job_id
model = utils.import_module('model', cf.model_path)
data_loader = utils.import_module('dl', os.path.join(args.exp_source, 'data_loader.py'))
if folds is None:
folds = range(cf.n_cv_splits)
for fold in folds:
cf.fold_dir = os.path.join(cf.exp_dir, 'fold_{}'.format(fold))
cf.fold = fold
cf.resume_to_checkpoint = resume_to_checkpoint
if not os.path.exists(cf.fold_dir):
os.mkdir(cf.fold_dir)
logger = utils.get_logger(cf.fold_dir)
train(logger)
cf.resume_to_checkpoint = None
if args.mode == 'train_test':
test(logger)
elif args.mode == 'test':
cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, is_training=False, use_stored_settings=True)
cf.slurm_job_id = args.slurm_job_id
model = utils.import_module('model', cf.model_path)
data_loader = utils.import_module('dl', os.path.join(args.exp_source, 'data_loader.py'))
if folds is None:
folds = range(cf.n_cv_splits)
for fold in folds:
cf.fold_dir = os.path.join(cf.exp_dir, 'fold_{}'.format(fold))
logger = utils.get_logger(cf.fold_dir)
cf.fold = fold
test(logger)
# load raw predictions saved by predictor during testing, run aggregation algorithms and evaluation.
elif args.mode == 'analysis':
cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, is_training=False, use_stored_settings=True)
logger = utils.get_logger(cf.exp_dir)
if cf.hold_out_test_set:
cf.folds = args.folds
predictor = Predictor(cf, net=None, logger=logger, mode='analysis')
- results_list = predictor.load_saved_predictions(apply_wbc=True, save_preds_to_csv=cf.save_preds_to_csv)
- utils.create_csv_output(cf, logger, results_list)
+ results_list = predictor.load_saved_predictions(apply_wbc=True)
+ utils.create_csv_output(results_list, cf, logger)
else:
if folds is None:
folds = range(cf.n_cv_splits)
for fold in folds:
cf.fold_dir = os.path.join(cf.exp_dir, 'fold_{}'.format(fold))
cf.fold = fold
predictor = Predictor(cf, net=None, logger=logger, mode='analysis')
- results_list = predictor.load_saved_predictions(apply_wbc=True, save_preds_to_csv=cf.save_preds_to_csv)
+ results_list = predictor.load_saved_predictions(apply_wbc=True)
logger.info('starting evaluation...')
evaluator = Evaluator(cf, logger, mode='test')
evaluator.evaluate_predictions(results_list)
evaluator.score_test_df()
# create experiment folder and copy scripts without starting job.
# usefull for cloud deployment where configs might change before job actually runs.
elif args.mode == 'create_exp':
cf = utils.prep_exp(args.exp_source, args.exp_dir, args.server_env, use_stored_settings=True)
logger = utils.get_logger(cf.exp_dir)
logger.info('created experiment directory at {}'.format(args.exp_dir))
else:
raise RuntimeError('mode specified in args is not implemented...')
diff --git a/experiments/lidc_exp/data_loader.py b/experiments/lidc_exp/data_loader.py
index 2ff3fb6..0c05a20 100644
--- a/experiments/lidc_exp/data_loader.py
+++ b/experiments/lidc_exp/data_loader.py
@@ -1,455 +1,455 @@
#!/usr/bin/env python
# Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
'''
Example Data Loader for the LIDC data set. This dataloader expects preprocessed data in .npy or .npz files per patient and
a pandas dataframe in the same directory containing the meta-info e.g. file paths, labels, foregound slice-ids.
'''
import numpy as np
import os
from collections import OrderedDict
import pandas as pd
import pickle
import time
import subprocess
import utils.dataloader_utils as dutils
# batch generator tools from https://github.com/MIC-DKFZ/batchgenerators
from batchgenerators.dataloading.data_loader import SlimDataLoaderBase
from batchgenerators.transforms.spatial_transforms import MirrorTransform as Mirror
from batchgenerators.transforms.abstract_transforms import Compose
from batchgenerators.dataloading.multi_threaded_augmenter import MultiThreadedAugmenter
from batchgenerators.dataloading import SingleThreadedAugmenter
from batchgenerators.transforms.spatial_transforms import SpatialTransform
from batchgenerators.transforms.crop_and_pad_transforms import CenterCropTransform
from batchgenerators.transforms.utility_transforms import ConvertSegToBoundingBoxCoordinates
def get_train_generators(cf, logger):
"""
wrapper function for creating the training batch generator pipeline. returns the train/val generators.
selects patients according to cv folds (generated by first run/fold of experiment):
splits the data into n-folds, where 1 split is used for val, 1 split for testing and the rest for training. (inner loop test set)
If cf.hold_out_test_set is True, adds the test split to the training data.
"""
all_data = load_dataset(cf, logger)
all_pids_list = np.unique([v['pid'] for (k, v) in all_data.items()])
if not cf.created_fold_id_pickle:
fg = dutils.fold_generator(seed=cf.seed, n_splits=cf.n_cv_splits, len_data=len(all_pids_list)).get_fold_names()
with open(os.path.join(cf.exp_dir, 'fold_ids.pickle'), 'wb') as handle:
pickle.dump(fg, handle)
cf.created_fold_id_pickle = True
else:
with open(os.path.join(cf.exp_dir, 'fold_ids.pickle'), 'rb') as handle:
fg = pickle.load(handle)
train_ix, val_ix, test_ix, _ = fg[cf.fold]
train_pids = [all_pids_list[ix] for ix in train_ix]
val_pids = [all_pids_list[ix] for ix in val_ix]
if cf.hold_out_test_set:
train_pids += [all_pids_list[ix] for ix in test_ix]
train_data = {k: v for (k, v) in all_data.items() if any(p == v['pid'] for p in train_pids)}
val_data = {k: v for (k, v) in all_data.items() if any(p == v['pid'] for p in val_pids)}
logger.info("data set loaded with: {} train / {} val / {} test patients".format(len(train_ix), len(val_ix), len(test_ix)))
batch_gen = {}
batch_gen['train'] = create_data_gen_pipeline(train_data, cf=cf, is_training=True)
batch_gen['val_sampling'] = create_data_gen_pipeline(val_data, cf=cf, is_training=False)
if cf.val_mode == 'val_patient':
batch_gen['val_patient'] = PatientBatchIterator(val_data, cf=cf)
batch_gen['n_val'] = len(val_ix) if cf.max_val_patients is None else cf.max_val_patients
else:
batch_gen['n_val'] = cf.num_val_batches
return batch_gen
def get_test_generator(cf, logger):
"""
wrapper function for creating the test batch generator pipeline.
selects patients according to cv folds (generated by first run/fold of experiment)
If cf.hold_out_test_set is True, gets the data from an external folder instead.
"""
if cf.hold_out_test_set:
cf.pp_data_path = cf.pp_test_data_path
test_ix = None
else:
with open(os.path.join(cf.exp_dir, 'fold_ids.pickle'), 'rb') as handle:
fold_list = pickle.load(handle)
_, _, test_ix, _ = fold_list[cf.fold]
# warnings.warn('WARNING: using validation set for testing!!!')
test_data = load_dataset(cf, logger, test_ix)
logger.info("data set loaded with: {} test patients".format(len(test_ix)))
batch_gen = {}
batch_gen['test'] = PatientBatchIterator(test_data, cf=cf)
batch_gen['n_test'] = len(test_ix)
return batch_gen
def load_dataset(cf, logger, subset_ixs=None):
"""
loads the dataset. if deployed in cloud also copies and unpacks the data to the working directory.
:param subset_ixs: subset indices to be loaded from the dataset. used e.g. for testing to only load the test folds.
:return: data: dictionary with one entry per patient (in this case per patient-breast, since they are treated as
individual images for training) each entry is a dictionary containing respective meta-info as well as paths to the preprocessed
numpy arrays to be loaded during batch-generation
"""
if cf.server_env:
copy_data = True
target_dir = os.path.join('/ssd', cf.slurm_job_id, cf.pp_name, cf.crop_name)
if not os.path.exists(target_dir):
cf.data_source_dir = cf.pp_data_path
os.makedirs(target_dir)
subprocess.call('rsync -av {} {}'.format(
os.path.join(cf.data_source_dir, cf.input_df_name), os.path.join(target_dir, cf.input_df_name)), shell=True)
logger.info('created target dir and info df at {}'.format(os.path.join(target_dir, cf.input_df_name)))
elif subset_ixs is None:
copy_data = False
cf.pp_data_path = target_dir
p_df = pd.read_pickle(os.path.join(cf.pp_data_path, cf.input_df_name))
if cf.select_prototype_subset is not None:
prototype_pids = p_df.pid.tolist()[:cf.select_prototype_subset]
p_df = p_df[p_df.pid.isin(prototype_pids)]
logger.warning('WARNING: using prototyping data subset!!!')
if subset_ixs is not None:
subset_pids = [np.unique(p_df.pid.tolist())[ix] for ix in subset_ixs]
p_df = p_df[p_df.pid.isin(subset_pids)]
logger.info('subset: selected {} instances from df'.format(len(p_df)))
if cf.server_env:
if copy_data:
copy_and_unpack_data(logger, p_df.pid.tolist(), cf.fold_dir, cf.data_source_dir, target_dir)
class_targets = p_df['class_target'].tolist()
pids = p_df.pid.tolist()
imgs = [os.path.join(cf.pp_data_path, '{}_img.npy'.format(pid)) for pid in pids]
segs = [os.path.join(cf.pp_data_path,'{}_rois.npy'.format(pid)) for pid in pids]
data = OrderedDict()
for ix, pid in enumerate(pids):
# for the experiment conducted here, malignancy scores are binarized: (benign: 1-2, malignant: 3-5)
targets = [1 if ii >= 3 else 0 for ii in class_targets[ix]]
data[pid] = {'data': imgs[ix], 'seg': segs[ix], 'pid': pid, 'class_target': targets}
data[pid]['fg_slices'] = p_df['fg_slices'].tolist()[ix]
return data
def create_data_gen_pipeline(patient_data, cf, is_training=True):
"""
create mutli-threaded train/val/test batch generation and augmentation pipeline.
:param patient_data: dictionary containing one dictionary per patient in the train/test subset.
:param is_training: (optional) whether to perform data augmentation (training) or not (validation/testing)
:return: multithreaded_generator
"""
# create instance of batch generator as first element in pipeline.
data_gen = BatchGenerator(patient_data, batch_size=cf.batch_size, cf=cf)
# add transformations to pipeline.
my_transforms = []
if is_training:
mirror_transform = Mirror(axes=np.arange(cf.dim))
my_transforms.append(mirror_transform)
spatial_transform = SpatialTransform(patch_size=cf.patch_size[:cf.dim],
patch_center_dist_from_border=cf.da_kwargs['rand_crop_dist'],
do_elastic_deform=cf.da_kwargs['do_elastic_deform'],
alpha=cf.da_kwargs['alpha'], sigma=cf.da_kwargs['sigma'],
do_rotation=cf.da_kwargs['do_rotation'], angle_x=cf.da_kwargs['angle_x'],
angle_y=cf.da_kwargs['angle_y'], angle_z=cf.da_kwargs['angle_z'],
do_scale=cf.da_kwargs['do_scale'], scale=cf.da_kwargs['scale'],
random_crop=cf.da_kwargs['random_crop'])
my_transforms.append(spatial_transform)
else:
my_transforms.append(CenterCropTransform(crop_size=cf.patch_size[:cf.dim]))
my_transforms.append(ConvertSegToBoundingBoxCoordinates(cf.dim, get_rois_from_seg_flag=False, class_specific_seg_flag=cf.class_specific_seg_flag))
all_transforms = Compose(my_transforms)
# multithreaded_generator = SingleThreadedAugmenter(data_gen, all_transforms)
multithreaded_generator = MultiThreadedAugmenter(data_gen, all_transforms, num_processes=cf.n_workers, seeds=range(cf.n_workers))
return multithreaded_generator
class BatchGenerator(SlimDataLoaderBase):
"""
creates the training/validation batch generator. Samples n_batch_size patients (draws a slice from each patient if 2D)
from the data set while maintaining foreground-class balance. Returned patches are cropped/padded to pre_crop_size.
Actual patch_size is obtained after data augmentation.
:param data: data dictionary as provided by 'load_dataset'.
:param batch_size: number of patients to sample for the batch
:return dictionary containing the batch data (b, c, x, y, (z)) / seg (b, 1, x, y, (z)) / pids / class_target
"""
def __init__(self, data, batch_size, cf):
super(BatchGenerator, self).__init__(data, batch_size)
self.cf = cf
self.crop_margin = np.array(self.cf.patch_size)/8. #min distance of ROI center to edge of cropped_patch.
self.p_fg = 0.5
def generate_train_batch(self):
batch_data, batch_segs, batch_pids, batch_targets, batch_patient_labels = [], [], [], [], []
class_targets_list = [v['class_target'] for (k, v) in self._data.items()]
if self.cf.head_classes > 2:
# samples patients towards equilibrium of foreground classes on a roi-level (after randomly sampling the ratio "batch_sample_slack).
batch_ixs = dutils.get_class_balanced_patients(
class_targets_list, self.batch_size, self.cf.head_classes - 1, slack_factor=self.cf.batch_sample_slack)
else:
batch_ixs = np.random.choice(len(class_targets_list), self.batch_size)
patients = list(self._data.items())
for b in batch_ixs:
patient = patients[b][1]
data = np.transpose(np.load(patient['data'], mmap_mode='r'), axes=(1, 2, 0))[np.newaxis] # (c, y, x, z)
seg = np.transpose(np.load(patient['seg'], mmap_mode='r'), axes=(1, 2, 0))
batch_pids.append(patient['pid'])
batch_targets.append(patient['class_target'])
if self.cf.dim == 2:
# draw random slice from patient while oversampling slices containing foreground objects with p_fg.
if len(patient['fg_slices']) > 0:
fg_prob = self.p_fg / len(patient['fg_slices'])
bg_prob = (1 - self.p_fg) / (data.shape[3] - len(patient['fg_slices']))
slices_prob = [fg_prob if ix in patient['fg_slices'] else bg_prob for ix in range(data.shape[3])]
slice_id = np.random.choice(data.shape[3], p=slices_prob)
else:
slice_id = np.random.choice(data.shape[3])
# if set to not None, add neighbouring slices to each selected slice in channel dimension.
if self.cf.n_3D_context is not None:
padded_data = dutils.pad_nd_image(data[0], [(data.shape[-1] + (self.cf.n_3D_context*2))], mode='constant')
padded_slice_id = slice_id + self.cf.n_3D_context
data = (np.concatenate([padded_data[..., ii][np.newaxis] for ii in range(
padded_slice_id - self.cf.n_3D_context, padded_slice_id + self.cf.n_3D_context + 1)], axis=0))
else:
data = data[..., slice_id]
seg = seg[..., slice_id]
# pad data if smaller than pre_crop_size.
if np.any([data.shape[dim + 1] < ps for dim, ps in enumerate(self.cf.pre_crop_size)]):
new_shape = [np.max([data.shape[dim + 1], ps]) for dim, ps in enumerate(self.cf.pre_crop_size)]
data = dutils.pad_nd_image(data, new_shape, mode='constant')
seg = dutils.pad_nd_image(seg, new_shape, mode='constant')
# crop patches of size pre_crop_size, while sampling patches containing foreground with p_fg.
crop_dims = [dim for dim, ps in enumerate(self.cf.pre_crop_size) if data.shape[dim + 1] > ps]
if len(crop_dims) > 0:
fg_prob_sample = np.random.rand(1)
# with p_fg: sample random pixel from random ROI and shift center by random value.
if fg_prob_sample < self.p_fg and np.sum(seg) > 0:
seg_ixs = np.argwhere(seg == np.random.choice(np.unique(seg)[1:], 1))
roi_anchor_pixel = seg_ixs[np.random.choice(seg_ixs.shape[0], 1)][0]
assert seg[tuple(roi_anchor_pixel)] > 0
# sample the patch center coords. constrained by edges of images - pre_crop_size /2. And by
# distance to the desired ROI < patch_size /2.
# (here final patch size to account for center_crop after data augmentation).
sample_seg_center = {}
for ii in crop_dims:
low = np.max((self.cf.pre_crop_size[ii]//2, roi_anchor_pixel[ii] - (self.cf.patch_size[ii]//2 - self.crop_margin[ii])))
high = np.min((data.shape[ii + 1] - self.cf.pre_crop_size[ii]//2,
roi_anchor_pixel[ii] + (self.cf.patch_size[ii]//2 - self.crop_margin[ii])))
# happens if lesion on the edge of the image. dont care about roi anymore,
# just make sure pre-crop is inside image.
if low >= high:
low = data.shape[ii + 1] // 2 - (data.shape[ii + 1] // 2 - self.cf.pre_crop_size[ii] // 2)
high = data.shape[ii + 1] // 2 + (data.shape[ii + 1] // 2 - self.cf.pre_crop_size[ii] // 2)
sample_seg_center[ii] = np.random.randint(low=low, high=high)
else:
# not guaranteed to be empty. probability of emptiness depends on the data.
sample_seg_center = {ii: np.random.randint(low=self.cf.pre_crop_size[ii]//2,
high=data.shape[ii + 1] - self.cf.pre_crop_size[ii]//2) for ii in crop_dims}
for ii in crop_dims:
min_crop = int(sample_seg_center[ii] - self.cf.pre_crop_size[ii] // 2)
max_crop = int(sample_seg_center[ii] + self.cf.pre_crop_size[ii] // 2)
data = np.take(data, indices=range(min_crop, max_crop), axis=ii + 1)
seg = np.take(seg, indices=range(min_crop, max_crop), axis=ii)
batch_data.append(data)
batch_segs.append(seg[np.newaxis])
data = np.array(batch_data)
seg = np.array(batch_segs).astype(np.uint8)
class_target = np.array(batch_targets)
return {'data': data, 'seg': seg, 'pid': batch_pids, 'class_target': class_target}
class PatientBatchIterator(SlimDataLoaderBase):
"""
creates a test generator that iterates over entire given dataset returning 1 patient per batch.
Can be used for monitoring if cf.val_mode = 'patient_val' for a monitoring closer to actualy evaluation (done in 3D),
if willing to accept speed-loss during training.
:return: out_batch: dictionary containing one patient with batch_size = n_3D_patches in 3D or
batch_size = n_2D_patches in 2D .
"""
def __init__(self, data, cf): #threads in augmenter
super(PatientBatchIterator, self).__init__(data, 0)
self.cf = cf
self.patient_ix = 0
self.dataset_pids = [v['pid'] for (k, v) in data.items()]
self.patch_size = cf.patch_size
if len(self.patch_size) == 2:
self.patch_size = self.patch_size + [1]
def generate_train_batch(self):
pid = self.dataset_pids[self.patient_ix]
patient = self._data[pid]
data = np.transpose(np.load(patient['data'], mmap_mode='r'), axes=(1, 2, 0))[np.newaxis] # (c, y, x, z)
seg = np.transpose(np.load(patient['seg'], mmap_mode='r'), axes=(1, 2, 0))
batch_class_targets = np.array([patient['class_target']])
# pad data if smaller than patch_size seen during training.
if np.any([data.shape[dim + 1] < ps for dim, ps in enumerate(self.patch_size)]):
new_shape = [data.shape[0]] + [np.max([data.shape[dim + 1], self.patch_size[dim]]) for dim, ps in enumerate(self.patch_size)]
data = dutils.pad_nd_image(data, new_shape) # use 'return_slicer' to crop image back to original shape.
seg = dutils.pad_nd_image(seg, new_shape)
# get 3D targets for evaluation, even if network operates in 2D. 2D predictions will be merged to 3D in predictor.
if self.cf.dim == 3 or self.cf.merge_2D_to_3D_preds:
out_data = data[np.newaxis]
out_seg = seg[np.newaxis, np.newaxis]
out_targets = batch_class_targets
batch_3D = {'data': out_data, 'seg': out_seg, 'class_target': out_targets, 'pid': pid}
converter = ConvertSegToBoundingBoxCoordinates(dim=3, get_rois_from_seg_flag=False, class_specific_seg_flag=self.cf.class_specific_seg_flag)
batch_3D = converter(**batch_3D)
batch_3D.update({'patient_bb_target': batch_3D['bb_target'],
'patient_roi_labels': batch_3D['roi_labels'],
'original_img_shape': out_data.shape})
if self.cf.dim == 2:
out_data = np.transpose(data, axes=(3, 0, 1, 2)) # (z, c, x, y )
out_seg = np.transpose(seg, axes=(2, 0, 1))[:, np.newaxis]
out_targets = np.array(np.repeat(batch_class_targets, out_data.shape[0], axis=0))
# if set to not None, add neighbouring slices to each selected slice in channel dimension.
if self.cf.n_3D_context is not None:
slice_range = range(self.cf.n_3D_context, out_data.shape[0] + self.cf.n_3D_context)
out_data = np.pad(out_data, ((self.cf.n_3D_context, self.cf.n_3D_context), (0, 0), (0, 0), (0, 0)), 'constant', constant_values=0)
out_data = np.array(
[np.concatenate([out_data[ii] for ii in range(
slice_id - self.cf.n_3D_context, slice_id + self.cf.n_3D_context + 1)], axis=0) for slice_id in
slice_range])
batch_2D = {'data': out_data, 'seg': out_seg, 'class_target': out_targets, 'pid': pid}
converter = ConvertSegToBoundingBoxCoordinates(dim=2, get_rois_from_seg_flag=False, class_specific_seg_flag=self.cf.class_specific_seg_flag)
batch_2D = converter(**batch_2D)
if self.cf.merge_2D_to_3D_preds:
batch_2D.update({'patient_bb_target': batch_3D['patient_bb_target'],
'patient_roi_labels': batch_3D['patient_roi_labels'],
'original_img_shape': out_data.shape})
else:
batch_2D.update({'patient_bb_target': batch_2D['bb_target'],
'patient_roi_labels': batch_2D['roi_labels'],
'original_img_shape': out_data.shape})
out_batch = batch_3D if self.cf.dim == 3 else batch_2D
patient_batch = out_batch
# crop patient-volume to patches of patch_size used during training. stack patches up in batch dimension.
# in this case, 2D is treated as a special case of 3D with patch_size[z] = 1.
if np.any([data.shape[dim + 1] > self.patch_size[dim] for dim in range(3)]):
patch_crop_coords_list = dutils.get_patch_crop_coords(data[0], self.patch_size)
new_img_batch, new_seg_batch, new_class_targets_batch = [], [], []
for cix, c in enumerate(patch_crop_coords_list):
seg_patch = seg[c[0]:c[1], c[2]: c[3], c[4]:c[5]]
new_seg_batch.append(seg_patch)
# if set to not None, add neighbouring slices to each selected slice in channel dimension.
# correct patch_crop coordinates by added slices of 3D context.
if self.cf.dim == 2 and self.cf.n_3D_context is not None:
tmp_c_5 = c[5] + (self.cf.n_3D_context * 2)
if cix == 0:
data = np.pad(data, ((0, 0), (0, 0), (0, 0), (self.cf.n_3D_context, self.cf.n_3D_context)), 'constant', constant_values=0)
else:
tmp_c_5 = c[5]
- new_img_batch.append(data[c[0]:c[1], c[2]:c[3], c[4]:tmp_c_5])
+ new_img_batch.append(data[:, c[0]:c[1], c[2]:c[3], c[4]:tmp_c_5])
data = np.array(new_img_batch) # (n_patches, c, x, y, z)
seg = np.array(new_seg_batch)[:, np.newaxis] # (n_patches, 1, x, y, z)
batch_class_targets = np.repeat(batch_class_targets, len(patch_crop_coords_list), axis=0)
if self.cf.dim == 2:
if self.cf.n_3D_context is not None:
data = np.transpose(data[:, 0], axes=(0, 3, 1, 2))
else:
# all patches have z dimension 1 (slices). discard dimension
data = data[..., 0]
seg = seg[..., 0]
patch_batch = {'data': data, 'seg': seg, 'class_target': batch_class_targets, 'pid': pid}
patch_batch['patch_crop_coords'] = np.array(patch_crop_coords_list)
patch_batch['patient_bb_target'] = patient_batch['patient_bb_target']
patch_batch['patient_roi_labels'] = patient_batch['patient_roi_labels']
patch_batch['original_img_shape'] = patient_batch['original_img_shape']
converter = ConvertSegToBoundingBoxCoordinates(self.cf.dim, get_rois_from_seg_flag=False, class_specific_seg_flag=self.cf.class_specific_seg_flag)
patch_batch = converter(**patch_batch)
out_batch = patch_batch
self.patient_ix += 1
if self.patient_ix == len(self.dataset_pids):
self.patient_ix = 0
return out_batch
def copy_and_unpack_data(logger, pids, fold_dir, source_dir, target_dir):
start_time = time.time()
with open(os.path.join(fold_dir, 'file_list.txt'), 'w') as handle:
for pid in pids:
handle.write('{}_img.npz\n'.format(pid))
handle.write('{}_rois.npz\n'.format(pid))
subprocess.call('rsync -av --files-from {} {} {}'.format(os.path.join(fold_dir, 'file_list.txt'),
source_dir, target_dir), shell=True)
dutils.unpack_dataset(target_dir)
copied_files = os.listdir(target_dir)
logger.info("copying and unpacking data set finsihed : {} files in target dir: {}. took {} sec".format(
len(copied_files), target_dir, np.round(time.time() - start_time, 0)))
diff --git a/utils/exp_utils.py b/utils/exp_utils.py
index 866d5a0..51bbc05 100644
--- a/utils/exp_utils.py
+++ b/utils/exp_utils.py
@@ -1,349 +1,349 @@
#!/usr/bin/env python
# Copyright 2018 Division of Medical Image Computing, German Cancer Research Center (DKFZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import logging
import subprocess
import os
import torch
from collections import OrderedDict
import plotting
import sys
import importlib.util
import pandas as pd
import pickle
def get_logger(exp_dir):
"""
creates logger instance. writing out info to file and to terminal.
:param exp_dir: experiment directory, where exec.log file is stored.
:return: logger instance.
"""
logger = logging.getLogger('medicaldetectiontoolkit')
logger.setLevel(logging.DEBUG)
log_file = exp_dir + '/exec.log'
hdlr = logging.FileHandler(log_file)
print('Logging to {}'.format(log_file))
logger.addHandler(hdlr)
logger.addHandler(ColorHandler())
logger.propagate = False
return logger
def prep_exp(dataset_path, exp_path, server_env, use_stored_settings=True, is_training=True):
"""
I/O handling, creating of experiment folder structure. Also creates a snapshot of configs/model scripts and copies them to the exp_dir.
This way the exp_dir contains all info needed to conduct an experiment, independent to changes in actual source code. Thus, training/inference of this experiment can be started at anytime. Therefore, the model script is copied back to the source code dir as tmp_model (tmp_backbone).
Provides robust structure for cloud deployment.
:param dataset_path: path to source code for specific data set. (e.g. medicaldetectiontoolkit/lidc_exp)
:param exp_path: path to experiment directory.
:param server_env: boolean flag. pass to configs script for cloud deployment.
:param use_stored_settings: boolean flag. When starting training: If True, starts training from snapshot in existing experiment directory, else creates experiment directory on the fly using configs/model scripts from source code.
:param is_training: boolean flag. distinguishes train vs. inference mode.
:return:
"""
if is_training:
# the first process of an experiment creates the directories and copies the config to exp_path.
if not os.path.exists(exp_path):
os.mkdir(exp_path)
os.mkdir(os.path.join(exp_path, 'plots'))
subprocess.call('cp {} {}'.format(os.path.join(dataset_path, 'configs.py'), os.path.join(exp_path, 'configs.py')), shell=True)
subprocess.call('cp {} {}'.format('default_configs.py', os.path.join(exp_path, 'default_configs.py')), shell=True)
if use_stored_settings:
subprocess.call('cp {} {}'.format('default_configs.py', os.path.join(exp_path, 'default_configs.py')), shell=True)
cf_file = import_module('cf', os.path.join(exp_path, 'configs.py'))
cf = cf_file.configs(server_env)
# only the first process copies the model selcted in configs to exp_path.
if not os.path.isfile(os.path.join(exp_path, 'model.py')):
subprocess.call('cp {} {}'.format(cf.model_path, os.path.join(exp_path, 'model.py')), shell=True)
subprocess.call('cp {} {}'.format(os.path.join(cf.backbone_path), os.path.join(exp_path, 'backbone.py')), shell=True)
# copy the snapshot model scripts from exp_dir back to the source_dir as tmp_model / tmp_backbone.
tmp_model_path = os.path.join(cf.source_dir, 'models', 'tmp_model.py')
tmp_backbone_path = os.path.join(cf.source_dir, 'models', 'tmp_backbone.py')
subprocess.call('cp {} {}'.format(os.path.join(exp_path, 'model.py'), tmp_model_path), shell=True)
subprocess.call('cp {} {}'.format(os.path.join(exp_path, 'backbone.py'), tmp_backbone_path), shell=True)
cf.model_path = tmp_model_path
cf.backbone_path = tmp_backbone_path
else:
# run training with source code info and copy snapshot of model to exp_dir for later testing (overwrite scripts if exp_dir already exists.)
cf_file = import_module('cf', os.path.join(dataset_path, 'configs.py'))
cf = cf_file.configs(server_env)
subprocess.call('cp {} {}'.format(cf.model_path, os.path.join(exp_path, 'model.py')), shell=True)
subprocess.call('cp {} {}'.format(cf.backbone_path, os.path.join(exp_path, 'backbone.py')), shell=True)
subprocess.call('cp {} {}'.format('default_configs.py', os.path.join(exp_path, 'default_configs.py')), shell=True)
subprocess.call('cp {} {}'.format(os.path.join(dataset_path, 'configs.py'), os.path.join(exp_path, 'configs.py')), shell=True)
else:
# for testing copy the snapshot model scripts from exp_dir back to the source_dir as tmp_model / tmp_backbone.
cf_file = import_module('cf', os.path.join(exp_path, 'configs.py'))
cf = cf_file.configs(server_env)
if cf.hold_out_test_set:
cf.pp_data_path = cf.pp_test_data_path
cf.pp_name = cf.pp_test_name
tmp_model_path = os.path.join(cf.source_dir, 'models', 'tmp_model.py')
tmp_backbone_path = os.path.join(cf.source_dir, 'models', 'tmp_backbone.py')
subprocess.call('cp {} {}'.format(os.path.join(exp_path, 'model.py'), tmp_model_path), shell=True)
subprocess.call('cp {} {}'.format(os.path.join(exp_path, 'backbone.py'), tmp_backbone_path), shell=True)
cf.model_path = tmp_model_path
cf.backbone_path = tmp_backbone_path
cf.exp_dir = exp_path
cf.test_dir = os.path.join(cf.exp_dir, 'test')
cf.plot_dir = os.path.join(cf.exp_dir, 'plots')
cf.experiment_name = exp_path.split("/")[-1]
cf.server_env = server_env
cf.created_fold_id_pickle = False
return cf
def import_module(name, path):
"""
correct way of importing a module dynamically in python 3.
:param name: name given to module instance.
:param path: path to module.
:return: module: returned module instance.
"""
spec = importlib.util.spec_from_file_location(name, path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
class ModelSelector:
'''
saves a checkpoint after each epoch as 'last_state' (can be loaded to continue interrupted training).
saves the top-k (k=cf.save_n_models) ranked epochs. In inference, predictions of multiple epochs can be ensembled to improve performance.
'''
def __init__(self, cf, logger):
self.cf = cf
self.saved_epochs = [-1] * cf.save_n_models
self.logger = logger
def run_model_selection(self, net, optimizer, monitor_metrics, epoch):
# take the mean over all selection criteria in each epoch
non_nan_scores = np.mean(np.array([[0 if ii is None else ii for ii in monitor_metrics['val'][sc]] for sc in self.cf.model_selection_criteria]), 0)
epochs_scores = [ii for ii in non_nan_scores[1:]]
# ranking of epochs according to model_selection_criterion
epoch_ranking = np.argsort(epochs_scores)[::-1] + 1 #epochs start at 1
# if set in configs, epochs < min_save_thresh are discarded from saving process.
epoch_ranking = epoch_ranking[epoch_ranking >= self.cf.min_save_thresh]
# check if current epoch is among the top-k epchs.
if epoch in epoch_ranking[:self.cf.save_n_models]:
save_dir = os.path.join(self.cf.fold_dir, '{}_best_checkpoint'.format(epoch))
if not os.path.exists(save_dir):
os.mkdir(save_dir)
torch.save(net.state_dict(), os.path.join(save_dir, 'params.pth'))
with open(os.path.join(save_dir, 'monitor_metrics.pickle'), 'wb') as handle:
pickle.dump(monitor_metrics, handle)
# save epoch_ranking to keep info for inference.
np.save(os.path.join(self.cf.fold_dir, 'epoch_ranking'), epoch_ranking[:self.cf.save_n_models])
np.save(os.path.join(save_dir, 'epoch_ranking'), epoch_ranking[:self.cf.save_n_models])
self.logger.info(
"saving current epoch {} at rank {}".format(epoch, np.argwhere(epoch_ranking == epoch)))
# delete params of the epoch that just fell out of the top-k epochs.
for se in [int(ii.split('_')[0]) for ii in os.listdir(self.cf.fold_dir) if 'best_checkpoint' in ii]:
if se in epoch_ranking[self.cf.save_n_models:]:
subprocess.call('rm -rf {}'.format(os.path.join(self.cf.fold_dir, '{}_best_checkpoint'.format(se))), shell=True)
self.logger.info('deleting epoch {} at rank {}'.format(se, np.argwhere(epoch_ranking == se)))
state = {
'epoch': epoch,
'state_dict': net.state_dict(),
'optimizer': optimizer.state_dict(),
}
# save checkpoint of current epoch.
save_dir = os.path.join(self.cf.fold_dir, 'last_checkpoint'.format(epoch))
if not os.path.exists(save_dir):
os.mkdir(save_dir)
torch.save(state, os.path.join(save_dir, 'params.pth'))
np.save(os.path.join(save_dir, 'epoch_ranking'), epoch_ranking[:self.cf.save_n_models])
with open(os.path.join(save_dir, 'monitor_metrics.pickle'), 'wb') as handle:
pickle.dump(monitor_metrics, handle)
def load_checkpoint(checkpoint_path, net, optimizer):
checkpoint_params = torch.load(os.path.join(checkpoint_path, 'params.pth'))
net.load_state_dict(checkpoint_params['state_dict'])
optimizer.load_state_dict(checkpoint_params['optimizer'])
with open(os.path.join(checkpoint_path, 'monitor_metrics.pickle'), 'rb') as handle:
monitor_metrics = pickle.load(handle)
starting_epoch = checkpoint_params['epoch'] + 1
return starting_epoch, monitor_metrics
def prepare_monitoring(cf):
"""
creates dictionaries, where train/val metrics are stored.
"""
metrics = {}
# first entry for loss dict accounts for epoch starting at 1.
metrics['train'] = OrderedDict()
metrics['val'] = OrderedDict()
metric_classes = []
if 'rois' in cf.report_score_level:
metric_classes.extend([v for k, v in cf.class_dict.items()])
if 'patient' in cf.report_score_level:
metric_classes.extend(['patient'])
for cl in metric_classes:
metrics['train'][cl + '_ap'] = [None]
metrics['val'][cl + '_ap'] = [None]
if cl == 'patient':
metrics['train'][cl + '_auc'] = [None]
metrics['val'][cl + '_auc'] = [None]
metrics['train']['monitor_values'] = [[] for _ in range(cf.num_epochs + 1)]
metrics['val']['monitor_values'] = [[] for _ in range(cf.num_epochs + 1)]
# generate isntance of monitor plot class.
TrainingPlot = plotting.TrainingPlot_2Panel(cf)
return metrics, TrainingPlot
-def create_results_csv(results_list, cf, logger):
+def create_csv_output(results_list, cf, logger):
"""
Write out test set predictions to .csv file. output format is one line per prediction:
PatientID | PredictionID | [y1 x1 y2 x2 (z1) (z2)] | score | pred_classID
Note, that prediction coordinates correspond to images as loaded for training/testing and need to be adapted when
plotted over raw data (before preprocessing/resampling).
:param results_list: [[patient_results, patient_id], [patient_results, patient_id], ...]
"""
logger.info('creating csv output file at {}'.format(os.path.join(cf.exp_dir, 'results.csv')))
predictions_df = pd.DataFrame(columns = ['patientID', 'predictionID', 'coords', 'score', 'pred_classID'])
for r in results_list:
pid = r[1]
#optionally load resampling info from preprocessing to match output predictions with raw data.
#with open(os.path.join(cf.exp_dir, 'test_resampling_info', pid), 'rb') as handle:
# resampling_info = pickle.load(handle)
for bix, box in enumerate(r[0][0]):
assert box['box_type'] == 'det', box['box_type']
coords = box['box_coords']
score = box['box_score']
pred_class_id = box['box_pred_class_id']
out_coords = []
if score >= cf.min_det_thresh:
out_coords.append(coords[0]) #* resampling_info['scale'][0])
out_coords.append(coords[1]) #* resampling_info['scale'][1])
out_coords.append(coords[2]) #* resampling_info['scale'][0])
out_coords.append(coords[3]) #* resampling_info['scale'][1])
if len(coords) > 4:
out_coords.append(coords[4]) #* resampling_info['scale'][2] + resampling_info['z_crop'])
out_coords.append(coords[5]) #* resampling_info['scale'][2] + resampling_info['z_crop'])
predictions_df.loc[len(predictions_df)] = [pid, bix, out_coords, score, pred_class_id]
try:
fold = cf.fold
except:
fold = 'hold_out'
predictions_df.to_csv(os.path.join(cf.exp_dir, 'results_{}.csv'.format(fold)), index=False)
class _AnsiColorizer(object):
"""
A colorizer is an object that loosely wraps around a stream, allowing
callers to write text to the stream in a particular color.
Colorizer classes must implement C{supported()} and C{write(text, color)}.
"""
_colors = dict(black=30, red=31, green=32, yellow=33,
blue=34, magenta=35, cyan=36, white=37, default=39)
def __init__(self, stream):
self.stream = stream
@classmethod
def supported(cls, stream=sys.stdout):
"""
A class method that returns True if the current platform supports
coloring terminal output using this method. Returns False otherwise.
"""
if not stream.isatty():
return False # auto color only on TTYs
try:
import curses
except ImportError:
return False
else:
try:
try:
return curses.tigetnum("colors") > 2
except curses.error:
curses.setupterm()
return curses.tigetnum("colors") > 2
except:
raise
# guess false in case of error
return False
def write(self, text, color):
"""
Write the given text to the stream in the given color.
@param text: Text to be written to the stream.
@param color: A string label for a color. e.g. 'red', 'white'.
"""
color = self._colors[color]
self.stream.write('\x1b[%sm%s\x1b[0m' % (color, text))
class ColorHandler(logging.StreamHandler):
def __init__(self, stream=sys.stdout):
super(ColorHandler, self).__init__(_AnsiColorizer(stream))
def emit(self, record):
msg_colors = {
logging.DEBUG: "green",
logging.INFO: "default",
logging.WARNING: "red",
logging.ERROR: "red"
}
color = msg_colors.get(record.levelno, "blue")
self.stream.write(record.msg + "\n", color)