diff --git a/Modules/Biophotonics/python/iMC/mc/batches.py b/Modules/Biophotonics/python/iMC/mc/batches.py index 0a8b26a0f0..58693ae906 100644 --- a/Modules/Biophotonics/python/iMC/mc/batches.py +++ b/Modules/Biophotonics/python/iMC/mc/batches.py @@ -1,227 +1,245 @@ ''' Created on Oct 15, 2015 @author: wirkert ''' import numpy as np from pandas import DataFrame import pandas as pd class AbstractBatch(object): """summarizes a batch of simulated mc spectra""" def __init__(self): self._nr_layers = 0 # internally keeps track of number of layers my_index = pd.MultiIndex(levels=[[], []], labels=[[], []]) self.df = DataFrame(columns=my_index) def create_parameters(self, nr_samples): """create the parameters for the batch, the simulation has to create the resulting reflectances""" pass def nr_elements(self): return self.df.shape[0] - - class GenericBatch(AbstractBatch): """generic 3-layer batch with each layer having the same oxygenation """ def __init__(self): super(GenericBatch, self).__init__() def append_one_layer(self, saO2, nr_samples): """helper function to create parameters for one layer""" # scales data to lie between maxi and mini instead of 0 and 1 scale = lambda x, mini, maxi: x * (maxi - mini) + mini # shortcut to random generator gen = np.random.random_sample gen_n = np.random.normal # create layer elements self.df["layer" + str(self._nr_layers), "vhb"] = \ scale(gen(nr_samples), 0, 1.) self.df["layer" + str(self._nr_layers), "sao2"] = \ saO2 self.df["layer" + str(self._nr_layers), "a_mie"] = \ np.clip(gen_n(loc=18.9, scale=10.2, size=nr_samples), 0.1, np.inf) * 100 # to 1/m self.df["layer" + str(self._nr_layers), "b_mie"] = \ np.clip(gen_n(loc=1.286, scale=0.521, size=nr_samples), 0, np.inf) self.df["layer" + str(self._nr_layers), "d"] = \ scale(gen(nr_samples), 0, 1.) self.df["layer" + str(self._nr_layers), "n"] = \ scale(gen(nr_samples), 1.33, 1.54) self.df["layer" + str(self._nr_layers), "g"] = \ scale(gen(nr_samples), 0.8, 0.95) self._nr_layers += 1 def create_parameters(self, nr_samples): """Create generic three layer batch with a total diameter of 2mm. saO2 is the same in all layers, but all other parameters vary randomly within each layer""" saO2 = np.random.random_sample(size=nr_samples) # create three layers with random samples self.append_one_layer(saO2, nr_samples) self.append_one_layer(saO2, nr_samples) self.append_one_layer(saO2, nr_samples) # "normalize" d to 2mm # first extract all layers from df self.df layers = [l for l in self.df.columns.levels[0] if "layer" in l] # summarize all ds sum_d = 0 for l in layers: sum_d += self.df[l, "d"] for l in layers: self.df[l, "d"] = self.df[l, "d"] / sum_d * 2000. * 10 ** -6 self.df[l, "d"] = np.clip(self.df[l, "d"], 25 * 10 ** -6, np.inf) class LessGenericBatch(AbstractBatch): """less generic three layer batch. This only varies blood volume fraction w.r.t. the ColonMuscleBatch. Let's see if DA works in this case.""" def __init__(self): super(LessGenericBatch, self).__init__() def append_one_layer(self, saO2, n, d_ranges, nr_samples): """helper function to create parameters for one layer""" # scales data to lie between maxi and mini instead of 0 and 1 scale = lambda x, mini, maxi: x * (maxi - mini) + mini # shortcut to random generator gen = np.random.random_sample # create as generic batch super(LessGenericBatch, self).append_one_layer(saO2, nr_samples) self._nr_layers -= 1 # we're not finished # but some changes in specific layer elements # more specific layer thicknesses self.df["layer" + str(self._nr_layers), "d"] = \ scale(gen(nr_samples), d_ranges[0], d_ranges[1]) # more specific n self.df["layer" + str(self._nr_layers), "n"] = \ n self._nr_layers += 1 def create_parameters(self, nr_samples): """Create generic three layer batch with a total diameter of 2mm. saO2 is the same in all layers, but all other parameters vary randomly within each layer""" saO2 = np.random.random_sample(size=nr_samples) n = np.ones_like(saO2) # create three layers with random samples # muscle self.append_one_layer(saO2, n * 1.36, (600.*10 ** -6, 1010.*10 ** -6), nr_samples) # submucosa self.append_one_layer(saO2, n * 1.36, (415.*10 ** -6, 847.*10 ** -6), nr_samples) # mucosa self.append_one_layer(saO2, n * 1.38, (395.*10 ** -6, 603.*10 ** -6), nr_samples) - - class ColonMuscleBatch(GenericBatch): """three layer batch simulating colonic tissue""" def __init__(self): super(ColonMuscleBatch, self).__init__() def append_one_layer(self, saO2, n, d_ranges, nr_samples): """helper function to create parameters for one layer""" # scales data to lie between maxi and mini instead of 0 and 1 scale = lambda x, mini, maxi: x * (maxi - mini) + mini # shortcut to random generator gen = np.random.random_sample # create as generic batch super(ColonMuscleBatch, self).append_one_layer(saO2, nr_samples) self._nr_layers -= 1 # we're not finished # but some changes in specific layer elements # less blood self.df["layer" + str(self._nr_layers), "vhb"] = \ scale(gen(nr_samples), 0, 0.1) # more specific layer thicknesses self.df["layer" + str(self._nr_layers), "d"] = \ scale(gen(nr_samples), d_ranges[0], d_ranges[1]) # more specific n self.df["layer" + str(self._nr_layers), "n"] = \ n self._nr_layers += 1 def create_parameters(self, nr_samples): """Create generic three layer batch with a total diameter of 2mm. saO2 is the same in all layers, but all other parameters vary randomly within each layer""" saO2 = np.random.random_sample(size=nr_samples) n = np.ones_like(saO2) # create three layers with random samples # muscle self.append_one_layer(saO2, n * 1.36, (600.*10 ** -6, 1010.*10 ** -6), nr_samples) # submucosa self.append_one_layer(saO2, n * 1.36, (415.*10 ** -6, 847.*10 ** -6), nr_samples) # mucosa self.append_one_layer(saO2, n * 1.38, (395.*10 ** -6, 603.*10 ** -6), nr_samples) + +class ColonMuscleMeanScatteringBatch(ColonMuscleBatch): + """three layer batch simulating colonic tissue""" + + def __init__(self): + super(ColonMuscleMeanScatteringBatch, self).__init__() + + def append_one_layer(self, saO2, n, d_ranges, nr_samples): + """helper function to create parameters for one layer""" + + # create as generic batch + super(ColonMuscleMeanScatteringBatch, self).append_one_layer(saO2, + n, + d_ranges, + nr_samples) + self._nr_layers -= 1 # we're not finished + + # restrict exponential scattering to mean value for soft tissue. + self.df["layer" + str(self._nr_layers), "b_mie"] = 1.286 + + self._nr_layers += 1 + # class VisualizationBatch(AbstractBatch): # """batch used for visualization of different spectra. Feel free to adapt # for your visualization purposes.""" # # def __init__(self): # super(VisualizationBatch, self).__init__() # # self._wavelengths = np.arange(470, 680, 10) * 10 ** -9 # # def append_one_layer(self, bvf, saO2, a_mie, a_ray, d, n, g, nr_samples): # """helper function to create parameters for one layer""" # samples = np.zeros((nr_samples, 7)) # # scale samples to correct ranges # samples[:, 0] = bvf # samples[:, 1] = saO2 # samples[:, 2] = a_mie # samples[:, 3] = a_ray # # d will be normalized later to 2mm total depth # samples[:, 4] = d # samples[:, 5] = n # samples[:, 6] = g # # append as last layer # self.layers.append(samples) # # def create_parameters(self, nr_samples): # # bvf = np.linspace(0.0, .1, nr_samples) # # saO2 = np.linspace(0., 1., nr_samples) # # d = np.linspace(175, 735, nr_samples) * 10 ** -6 # # a_mie = np.linspace(5., 30., nr_samples) * 100 # # a_ray = np.linspace(0., 60., nr_samples) * 100 # # n = np.linspace(1.33, 1.54, nr_samples) # # g = np.linspace(0, 0.95, nr_samples) # # create three layers with random samples # self.append_one_layer(0.02, 0.1, 30.*100., 0., 500 * 10 ** -6, # 1.38, 0.9, # nr_samples) # self.append_one_layer(0.04, 0.7, 5.0 * 100, 0.*100, 500 * 10 ** -6, # 1.36, 0.9, # nr_samples) # self.append_one_layer(0.04, 0.7, 5.0 * 100, 0.*100, 500 * 10 ** -6, # 1.36, 0.9, # nr_samples) diff --git a/Modules/Biophotonics/python/iMC/mc/dfmanipulations.py b/Modules/Biophotonics/python/iMC/mc/dfmanipulations.py index a97696e8ab..d7c214578d 100644 --- a/Modules/Biophotonics/python/iMC/mc/dfmanipulations.py +++ b/Modules/Biophotonics/python/iMC/mc/dfmanipulations.py @@ -1,52 +1,39 @@ ''' Created on Oct 19, 2015 @author: wirkert ''' -import numpy as np from scipy.interpolate import interp1d +import pandas as pd -def fold_by_sliding_average(batch, window_size): +def fold_by_sliding_average(df, window_size): """take a batch and apply a sliding average with given window size to the reflectances. window_size is elements to the left and to the right. There will be some boundary effect on the edges.""" - sliding_average = lambda x: np.convolve(x, - np.ones(window_size, dtype=float) / - float(window_size), mode="same") - batch.reflectances = np.apply_along_axis(sliding_average, - axis=1, - arr=batch.reflectances) + # next line does the folding. + df.reflectances = pd.rolling_mean(df.reflectances.T, window_size, + center=True).T + # let's get rid of NaN columns which are created at the boundaries + df.dropna(axis="columns", inplace=True) -def interpolate_wavelengths(batch, new_wavelengths): +def interpolate_wavelengths(df, new_wavelengths): """ interpolate image data to fit new_wavelengths. Current implementation performs simple linear interpolation. Neither existing nor new _wavelengths need to be sorted. """ - interpolator = interp1d(batch._wavelengths, - batch.reflectances, assume_sorted=False, + # build an interpolator using the inormation provided by the dataframes + # reflectance column + interpolator = interp1d(df.reflectances.columns.astype(float), + df.reflectances.as_matrix(), assume_sorted=False, bounds_error=False) - batch.reflectances = interpolator(new_wavelengths) - batch._wavelengths = new_wavelengths + # use this to create new reflectances + new_reflectances = interpolator(new_wavelengths) + # build a new dataframe out of this information and set the original df + # to the new information. This seems hacky, can't it be done easier? + df.drop(df["reflectances"].columns, axis=1, level=1, inplace=True) + for i, nw in enumerate(new_wavelengths): + df["reflectances", nw] = new_reflectances[:, i] - -def sortout_bands(batch, bands_to_sortout=None): - """ TODO: Documentation and test """ - if bands_to_sortout is not None: - # get rid of sorted out bands - batch.reflectances = np.delete(batch.reflectances, - bands_to_sortout, axis=1) - batch._wavelengths = np.delete(batch._wavelengths, - bands_to_sortout) - - -def select_n(batch, n): - """ randomly select n elements from batch. TODO: Test """ - perms = np.random.permutation(batch.reflectances.shape[0]) - first_n_perms = perms[0:n] - batch.reflectances = batch.reflectances[first_n_perms, :] - for i, l in enumerate(batch.layers): - batch.layers[i] = np.atleast_2d(l)[first_n_perms, :] - return batch diff --git a/Modules/Biophotonics/python/iMC/mc/factories.py b/Modules/Biophotonics/python/iMC/mc/factories.py index 3e91ca64d6..822b0e3532 100644 --- a/Modules/Biophotonics/python/iMC/mc/factories.py +++ b/Modules/Biophotonics/python/iMC/mc/factories.py @@ -1,74 +1,88 @@ ''' Created on Oct 15, 2015 @author: wirkert ''' from mc.tissuemodels import AbstractTissue, GenericTissue -from mc.batches import AbstractBatch, GenericBatch, ColonMuscleBatch +from mc.batches import AbstractBatch, GenericBatch, LessGenericBatch +from mc.batches import ColonMuscleBatch, ColonMuscleMeanScatteringBatch class AbstractMcFactory(object): ''' Monte Carlo Factory. Will create fitting models and batches, dependent on your task ''' def create_tissue_model(self): return AbstractTissue() def create_batch_to_simulate(self): return AbstractBatch() def __init__(self): ''' Constructor ''' + class GenericMcFactory(AbstractMcFactory): def create_tissue_model(self): return GenericTissue() def create_batch_to_simulate(self): return GenericBatch() def __init__(self): ''' Constructor ''' class LessGenericMcFactory(GenericMcFactory): def create_batch_to_simulate(self): return LessGenericBatch() def __init__(self): ''' Constructor ''' class ColonMuscleMcFactory(GenericMcFactory): def create_batch_to_simulate(self): return ColonMuscleBatch() def __init__(self): ''' Constructor ''' + +class ColonMuscleMeanScatteringFactory(GenericMcFactory): + + def create_batch_to_simulate(self): + return ColonMuscleMeanScatteringBatch() + + def __init__(self): + ''' + Constructor + ''' + + class VisualizationMcFactory(AbstractMcFactory): def create_tissue_model(self): return GenericTissue() def create_batch_to_simulate(self): return VisualizationBatch() def __init__(self): ''' Constructor ''' diff --git a/Modules/Biophotonics/python/iMC/script_FAP_DA_analyzer.py b/Modules/Biophotonics/python/iMC/script_FAP_DA_analyzer.py index 9369de787a..df8f60058e 100644 --- a/Modules/Biophotonics/python/iMC/script_FAP_DA_analyzer.py +++ b/Modules/Biophotonics/python/iMC/script_FAP_DA_analyzer.py @@ -1,89 +1,89 @@ # -*- coding: utf-8 -*- """ Created on Fri Aug 14 11:09:18 2015 @author: wirkert """ import os import luigi import tasks_regression as rt import tasks_preprocessing as ppt import tasks_analyze as at import scriptpaths as sp sp.FINALS_FOLDER = "FAP_DA" def resort_wavelengths(msi): """somehow the wavelength sorting got mixed up. This function fixes the mess.""" collapsed_image = imgmani.collapse_image(msi.get_image()) collapsed_image = collapsed_image[:, [2, 1, 0, 3, 4, 5, 6, 7]] msi.set_image(np.reshape(collapsed_image, msi.get_image().shape)) # rebind this method since in this recoding the wavelengths got messed up sp.resort_wavelengths = resort_wavelengths class EstimateTissueParametersTaskDA(at.EstimateTissueParametersTask): def requires(self): return ppt.PreprocessMSI(imageName=self.imageName), \ rt.TrainForestDA(imageName=self.imageName, file_name_prefix_training= - self.batch_prefix) + self.df_prefix) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, "processed", sp.FINALS_FOLDER, self.imageName + "_" + - self.batch_prefix + + self.df_prefix + "estimate.nrrd")) class CreateNiceParametricImagesTaskDA(at.CreateNiceParametricImagesTask): def requires(self): return EstimateTissueParametersTaskDA(imageName=self.imageName, file_name_prefix_training= - self.batch_prefix), \ + self.df_prefix), \ ppt.CorrectImagingSetupTask(imageName=self.imageName), \ ppt.SegmentMSI(imageName=self.imageName), \ ppt.PreprocessMSI(imageName=self.imageName), \ at.ReprojectReflectancesTask(imageName=self.imageName, file_name_prefix_training= - self.batch_prefix) + self.df_prefix) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, self.imageName + "_" + - self.batch_prefix + + self.df_prefix + "_summary.png")) if __name__ == '__main__': # root folder there the data lies luigi.interface.setup_interface_logging() sch = luigi.scheduler.CentralPlannerScheduler() w = luigi.worker.Worker(scheduler=sch) # run over all subfolders (non-recursively) # to collect the data and generate the results for root, dirs, files in os.walk(os.path.join(sp.ROOT_FOLDER, sp.DATA_FOLDER)): for name in files: main_task = CreateNiceParametricImagesTaskDA( imageName=name, file_name_prefix_training= "2015June0809:51PMNoisyRandomTraining") w.add(main_task) w.run() diff --git a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py index 8f4ee0040d..2584732b68 100644 --- a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py +++ b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py @@ -1,663 +1,326 @@ # -*- coding: utf-8 -*- """ Created on Fri Aug 14 11:09:18 2015 @author: wirkert """ - import os -import pickle +import logging +import datetime from collections import namedtuple + +import numpy as np +import pandas as pd +from pandas import DataFrame import luigi import matplotlib.pyplot as plt -import matplotlib from sklearn.ensemble.forest import RandomForestRegressor +import seaborn as sns import tasks_mc import scriptpaths as sp from regression.preprocessing import preprocess, preprocess2 from regression.linear import LinearSaO2Unmixing -from regression.domain_adaptation import * -from regression.estimation import standard_score - sp.ROOT_FOLDER = "/media/wirkert/data/Data/" + \ - "2015_11_12_IPCAI_in_silico" -sp.FINALS_FOLDER = "Images" + "2016_02_22_IPCAI_revision_in_silico" +sp.FINALS_FOLDER = "finals" sp.RECORDED_WAVELENGTHS = np.arange(470, 680, 10) * 10 ** -9 -sp.MC_DATA_FOLDER = "mc_data2" - -w_standard = 0.1 # for this evaluation we add 5% noise - - -matplotlib.rcParams.update({'font.size': 22}) - -EvaluationStruct = namedtuple("EvaluationStruct", - "name regressor data color alpha") +sp.MC_DATA_FOLDER = "mc_data" +w_standard = 0.1 # for this evaluation we add 10% noise +sns.set_style("whitegrid") +# sns.set_context("poster") # setup standard random forest rf = RandomForestRegressor(10, min_samples_leaf=10, max_depth=9, n_jobs=-1) +EvaluationStruct = namedtuple("EvaluationStruct", + "name regressor color alpha") +# standard evaluation setup +standard_evaluation_setups = [EvaluationStruct("Linear Beer-Lambert", + LinearSaO2Unmixing(), + sns.xkcd_rgb["pale red"], 0.5) + , EvaluationStruct("Proposed", rf, + sns.xkcd_rgb["medium green"], 0.5)] -class TrainingSamplePlots(luigi.Task): +# standard noise levels +noise_levels = np.arange(0.00, 0.18, 0.02) - which = luigi.Parameter() + +class TrainingSamplePlot(luigi.Task): + which_train = luigi.Parameter() + which_test = luigi.Parameter() def requires(self): - return tasks_mc.CameraBatch("ipcai_" + self.which + "_train"), \ - tasks_mc.CameraBatch("ipcai_" + self.which + "_test") + return tasks_mc.CameraBatch(self.which_train), \ + tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, - sp.RESULTS_FOLDER, - sp.FINALS_FOLDER, - "samples_plot_" + self.which + ".png")) + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "sample_plot_train_" + self.which_train + + "_test_" + self.which_test + ".png")) + def run(self): # get data - f = open(self.input()[0].path, "r") - batch_train = pickle.load(f) - f = open(self.input()[1].path, "r") - batch_test = pickle.load(f) - # setup testing function and data - X_test, y_test = preprocess(batch_test, w_percent=w_standard) - X_train, y_train = preprocess(batch_train, w_percent=w_standard) - # setup structures to save test data - evaluation_setups = [ -# EvaluationStruct("Linear Beer-Lambert", -# LinearSaO2Unmixing(), [], "red", 0.5), - EvaluationStruct("Proposed", rf, "green", 0.5)] - # do validation + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + + # for this plot we write a custom evaluation function as it is built + # a little different + + # create a new dataframe which will hold all the generated errors + df = pd.DataFrame() + nr_training_samples = np.arange(10, 15060, 50).astype(int) # not very pythonic, don't care for n in nr_training_samples: - print "number samples ", str(n) - for e in evaluation_setups: - X_j = X_train[0:n] - y_j = y_train[0:n] - lr = e.regressor - lr.fit(X_j, y_j) - # save results - e.data.append(standard_score(lr, X_test, y_test) * 100.) - # make a nice plot for results - plt.figure() - legend_handles = [] - for e in evaluation_setups: - h, = plt.plot(nr_training_samples, - e.data, color=e.color, - label=e.name) - legend_handles.append(h) - # minor_ticks_x = np.arange(0, 1000, 50) - # minor_ticks_y = np.arange(5, 16, 0.5) - # plt.gca().set_xticks(minor_ticks_x, minor=True) - # plt.gca().set_yticks(minor_ticks_y, minor=True) - plt.grid() - # plt.legend(handles=legend_handles) - # plt.title("dependency on number of training samples") + X_test, y_test = preprocess(df_test, w_percent=w_standard) + # only take n samples for training + X_train, y_train = preprocess(df_train, nr_samples=n, + w_percent=w_standard) + + regressor = rf + regressor.fit(X_train, y_train) + y_pred = regressor.predict(X_test) + # save results to a dataframe + errors = np.abs(y_pred - y_test) + errors = errors.reshape(len(errors), 1) + current_df = DataFrame(errors * 100, + columns=["Errors"]) + current_df["Method"] = "Proposed" + current_df["Number Samples"] = n + df = pd.concat([df, current_df], ignore_index=True) + logging.info( + "Finished training classifier with {0} samples".format( + str(n))) + + df = df.groupby("Number Samples").describe() + # get the error description in the rows: + df = df.unstack(-1) + # get rid of multiindex by dropping "Error" level + df.columns = df.columns.droplevel(0) + + plt.plot(df.index, df["50%"], color=sns.xkcd_rgb["medium green"]) + + # tidy up the plot plt.xlabel("number of training samples") plt.ylabel("absolute error [%]") - - for tick in plt.gca().xaxis.get_major_ticks(): - tick.label.set_fontsize(14) - plt.ylim((2, 10)) - plt.xlim((0, 15000)) - plt.savefig(self.output().path, - dpi=500, bbox_inches='tight') - - -class WrongNoisePlots(luigi.Task): - - which = luigi.Parameter() - - def requires(self): - return tasks_mc.CameraBatch("ipcai_" + self.which + "_train"), \ - tasks_mc.CameraBatch("ipcai_" + self.which + "_test") - - def output(self): - return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, - sp.RESULTS_FOLDER, - sp.FINALS_FOLDER, - "noise_wrong_plot_" + self.which + ".png")) - - - def run(self): - # get data - f = open(self.input()[0].path, "r") - batch_train = pickle.load(f) - f = open(self.input()[1].path, "r") - batch_test = pickle.load(f) - - evaluation_setups = [ - EvaluationStruct("Linear Beer-Lambert", - LinearSaO2Unmixing(), [], "red", 0.25) - , EvaluationStruct("Proposed", rf, [], "green", 0.5) - ] - # iterate over different levels of noise - noise_levels = np.arange(0.00, 0.17, 0.02) - for w in noise_levels: - # setup testing function - X_test, y_test = preprocess(batch_test, w_percent=w) - # extract noisy data - X_train, y_train = preprocess(batch_train, w_percent=0.1) - for e in evaluation_setups: - lr = e.regressor - lr.fit(X_train, y_train) - y_pred = lr.predict(X_test) - # save results - errors = np.abs(y_pred - y_test) - e.data.append(errors) - # make a nice plot for results - plt.figure() - legend_handles = [] - median0 = 0 - for e in evaluation_setups: - p25 = lambda x: np.percentile(x, 25) * 100. - p75 = lambda x: np.percentile(x, 75) * 100. - m = lambda x: np.median(x) * 100. - plt.fill_between(np.array(noise_levels) * 100., - map(p25, e.data), - map(p75, e.data), - edgecolor=e.color, facecolor=e.color, - alpha=e.alpha) - h, = plt.plot(np.array(noise_levels) * 100., - map(m, e.data), color=e.color, - label=e.name) - legend_handles.append(h) - if e.name is "Proposed": - median0 = m(e.data[0]) - minor_ticks = np.arange(0, 15, 5) - plt.gca().set_xticks(minor_ticks, minor=True) - plt.gca().set_yticks(minor_ticks, minor=True) - - plt.gca().annotate('error: ' + - "{0:.2f}".format( - np.median(evaluation_setups[-1].data[0]) - * 100) + "%", - xy=(0, median0), xytext=(5, 10), size=15, - va="center", - ha="center", bbox=dict(boxstyle="round4", - fc="g", alpha=0.5), - arrowprops=dict(arrowstyle="-|>", - connectionstyle="arc3,rad=-0.2", - fc="w"), - ) - plt.grid() - plt.legend(handles=legend_handles) - # plt.title("dependency on wrong training noise [w_training = 10%]") - plt.xlabel("noise added for testing [sigma %]") - plt.ylabel("absolute error [%]") - plt.ylim((0, 20)) + plt.ylim((0, 15)) plt.xlim((0, 15)) + + # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') - -class NoisePlotsBVF(luigi.Task): - - which = luigi.Parameter() +class VhbPlot(luigi.Task): + which_train = luigi.Parameter() + which_test = luigi.Parameter() def requires(self): - return tasks_mc.CameraBatch("ipcai_" + self.which + "_train"), \ - tasks_mc.CameraBatch("ipcai_" + self.which + "_test") + return tasks_mc.CameraBatch(self.which_train), \ + tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, - sp.RESULTS_FOLDER, - sp.FINALS_FOLDER, - "noise_plot_bvf_" + self.which + ".png")) + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "vhb_noise_plot_train_" + self.which_train + + "_test_" + self.which_test + ".png")) + + @staticmethod + def preprocess_vhb(batch, nr_samples=None, w_percent=None, + magnification=None, bands_to_sortout=None): + """ For evaluating vhb we extract labels for vhb instead of sao2""" + X, y = preprocess2(batch, nr_samples, w_percent, + magnification, bands_to_sortout) + + return X, y["vhb"] def run(self): # get data - f = open(self.input()[0].path, "r") - batch_train = pickle.load(f) - f = open(self.input()[1].path, "r") - batch_test = pickle.load(f) - - evaluation_setups = [ - EvaluationStruct("Proposed", rf, [], "green", 0.5) - ] - # iterate over different levels of noise - noise_levels = np.arange(0.00, 0.17, 0.02) - for w in noise_levels: - # setup testing function - X_test, y_test = preprocess2(batch_test, w_percent=w) - # extract noisy data - X_train, y_train = preprocess2(batch_train, w_percent=w) - for e in evaluation_setups: - lr = e.regressor - lr.fit(X_train, y_train) - y_pred = lr.predict(X_test) - # save results - errors = np.abs(y_pred[:, 0] - y_test[:, 0]) - e.data.append(errors) - # make a nice plot for results - plt.figure() - legend_handles = [] - for e in evaluation_setups: - p25 = lambda x: np.percentile(x, 25) * 100. - p75 = lambda x: np.percentile(x, 75) * 100. - m = lambda x: np.median(x) * 100. - plt.fill_between(np.array(noise_levels) * 100., - map(p25, e.data), - map(p75, e.data), - edgecolor=e.color, facecolor=e.color, - alpha=e.alpha) - h, = plt.plot(np.array(noise_levels) * 100., - map(m, e.data), color=e.color, - label=e.name) - legend_handles.append(h) - minor_ticks = np.arange(0, 15, 5) - plt.gca().set_xticks(minor_ticks, minor=True) - - plt.grid() - # plt.legend(handles=legend_handles) - # plt.title("dependency on noise") - plt.xlabel("noise added [sigma %]") - plt.ylabel("absolute error [%]") + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + + # for vhb we only evaluate the proposed method since the linear + # beer-lambert is not applicable + evaluation_setups = [EvaluationStruct("Proposed", rf, + sns.xkcd_rgb["medium green"], + 0.5)] + standard_plotting(df_train, noise_levels, df_test, noise_levels, + evaluation_setups=evaluation_setups, + preprocessing=self.preprocess_vhb, + xytext_position=(2, 3)) plt.ylim((0, 4)) - plt.xlim((0, 15)) + + # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') - -class DomainNoisePlot(luigi.Task): +class NoisePlot(luigi.Task): + which_train = luigi.Parameter() + which_test = luigi.Parameter() def requires(self): - return tasks_mc.CameraBatch("ipcai_colon_muscle_train"), \ - tasks_mc.CameraBatch("ipcai_generic") + return tasks_mc.CameraBatch(self.which_train), \ + tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, - sp.RESULTS_FOLDER, - sp.FINALS_FOLDER, - "noise_da_plot_.png")) + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "noise_plot_train_" + self.which_train + + "_test_" + self.which_test + ".png")) def run(self): # get data - f = open(self.input()[0].path, "r") - batch_train = pickle.load(f) - f = open(self.input()[1].path, "r") - batch_test = pickle.load(f) - - evaluation_setups = [ - EvaluationStruct("Linear Beer-Lambert", - LinearSaO2Unmixing(), [], "red", 0.25) - , EvaluationStruct("Proposed", rf, [], "green", 0.5) - ] - # iterate over different levels of noise - noise_levels = np.arange(0.00, 0.17, 0.02) - for w in noise_levels: - # setup testing function - X_test, y_test = preprocess(batch_test, w_percent=w) - # extract noisy data - X_train, y_train = preprocess(batch_train, w_percent=w) - for e in evaluation_setups: - lr = e.regressor - lr.fit(X_train, y_train) - y_pred = lr.predict(X_test) - # save results - errors = np.abs(y_pred - y_test) - e.data.append(errors) - # make a nice plot for results - plt.figure() - legend_handles = [] - for e in evaluation_setups: - p25 = lambda x: np.percentile(x, 25) * 100. - p75 = lambda x: np.percentile(x, 75) * 100. - m = lambda x: np.median(x) * 100. - plt.fill_between(np.array(noise_levels) * 100., - map(p25, e.data), - map(p75, e.data), - edgecolor=e.color, facecolor=e.color, - alpha=e.alpha) - h, = plt.plot(np.array(noise_levels) * 100., - map(m, e.data), color=e.color, - label=e.name) - legend_handles.append(h) - if e.name is "Proposed": - median0 = m(e.data[0]) - minor_ticks = np.arange(0, 15, 5) - plt.gca().set_xticks(minor_ticks, minor=True) - plt.gca().set_yticks(minor_ticks, minor=True) - - plt.gca().annotate('error: ' + - "{0:.2f}".format( - np.median(evaluation_setups[-1].data[0]) - * 100) + "%", - xy=(0, median0), xytext=(5, 12), size=15, - va="center", ha="center", bbox=dict( - boxstyle="round4", fc="g", alpha=0.5), - arrowprops=dict(arrowstyle="-|>", - connectionstyle="arc3,rad=-0.2", - fc="w"), - ) - plt.grid() - plt.legend(handles=legend_handles) - - # plt.title("dependency on noise") - plt.xlabel("noise added [sigma %]") - plt.ylabel("absolute error [%]") - plt.ylim((0, 20)) - plt.xlim((0, 15)) - plt.savefig(self.output().path, dpi=500, - bbox_inches='tight') + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + standard_plotting(df_train, noise_levels, df_test, noise_levels) - -class NoisePlots(luigi.Task): - - which = luigi.Parameter() - - def requires(self): - return tasks_mc.CameraBatch("ipcai_" + self.which + "_train"), \ - tasks_mc.CameraBatch("ipcai_" + self.which + "_test") - - def output(self): - return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, - sp.RESULTS_FOLDER, - sp.FINALS_FOLDER, - "noise_plot_" + self.which + ".png")) - - def run(self): - # get data - f = open(self.input()[0].path, "r") - batch_train = pickle.load(f) - f = open(self.input()[1].path, "r") - batch_test = pickle.load(f) - - evaluation_setups = [ - EvaluationStruct("Linear Beer-Lambert", - LinearSaO2Unmixing(), [], "red", 0.25) - , EvaluationStruct("Proposed", rf, [], "green", 0.5) - ] - # iterate over different levels of noise - noise_levels = np.arange(0.00, 0.17, 0.02) - for w in noise_levels: - # setup testing function - X_test, y_test = preprocess(batch_test, w_percent=w) - # extract noisy data - X_train, y_train = preprocess(batch_train, w_percent=w) - for e in evaluation_setups: - lr = e.regressor - lr.fit(X_train, y_train) - y_pred = lr.predict(X_test) - # save results - errors = np.abs(y_pred - y_test) - e.data.append(errors) - # make a nice plot for results - plt.figure() - legend_handles = [] - for e in evaluation_setups: - p25 = lambda x: np.percentile(x, 25) * 100. - p75 = lambda x: np.percentile(x, 75) * 100. - m = lambda x: np.median(x) * 100. - plt.fill_between(np.array(noise_levels) * 100., - map(p25, e.data), - map(p75, e.data), - edgecolor=e.color, facecolor=e.color, - alpha=e.alpha) - h, = plt.plot(np.array(noise_levels) * 100., - map(m, e.data), color=e.color, - label=e.name) - legend_handles.append(h) - if e.name is "Proposed": - median0 = m(e.data[0]) - minor_ticks = np.arange(0, 15, 5) - plt.gca().set_xticks(minor_ticks, minor=True) - plt.gca().set_yticks(minor_ticks, minor=True) - - plt.gca().annotate('error: ' + - "{0:.2f}".format( - np.median(evaluation_setups[-1].data[0]) - * 100) + "%", - xy=(0, median0), xytext=(5, 10), size=15, va="center", - ha="center", bbox=dict(boxstyle="round4", - fc="g", alpha=0.5), - arrowprops=dict(arrowstyle="-|>", - connectionstyle="arc3,rad=-0.2", - fc="w"), - ) - plt.grid() - plt.legend(handles=legend_handles) - # plt.title("dependency on noise") - plt.xlabel("noise added [sigma %]") - plt.ylabel("absolute error [%]") - plt.ylim((0, 20)) - plt.xlim((0, 15)) + # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') - -class WeightsCalculation(luigi.Task): - source_domain = luigi.Parameter() - target_domain = luigi.Parameter() +class WrongNoisePlot(luigi.Task): + which_train = luigi.Parameter() + which_test = luigi.Parameter() def requires(self): - return tasks_mc.CameraBatch(self.source_domain), \ - tasks_mc.CameraBatch(self.target_domain) + return tasks_mc.CameraBatch(self.which_train), \ + tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, - sp.RESULTS_FOLDER, - sp.FINALS_FOLDER, - "weights_" + self.source_domain + "to" + - self.target_domain + ".npy")) + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "wrong_noise_plot_train_" + self.which_train + + "_test_" + self.which_test + ".png")) def run(self): # get data - f = open(self.input()[0].path, "r") - batch_train = pickle.load(f) - f = open(self.input()[1].path, "r") - batch_da_train = pickle.load(f) - X_train, y_train = preprocess(batch_train, w_percent=w_standard, - magnification=None) - X_da_train, y_da_train = preprocess(batch_da_train, - w_percent=w_standard, - magnification=None) - X_ws, y_ws = preprocess(batch_train, w_percent=w_standard) - weights = estimate_weights_random_forests(X_train, X_da_train, X_ws) - - - no_zero = ~(batch_train.reflectances <= 0).any(axis=1) - bvfs = batch_train.layers[0][no_zero, 0] - saO2s = batch_train.layers[0][no_zero, 1] - a_mies = batch_train.layers[0][no_zero, 2] - ds = batch_train.layers[0][no_zero, 4] - - correct_high_weights = np.logical_and(bvfs < 0.10, ds < 1000 * 10 ** -6, ds > 600 * 10 ** -6) - - plt.figure() - plt.hist(bvfs, weights=weights, alpha=0.5, label="found_bvfs") - plt.hist(bvfs[correct_high_weights], alpha=0.5, label="true_bvfs") - plt.title("weight distr bvf") - plt.legend(loc='upper right') - plt.figure() - plt.hist(saO2s, weights=weights, alpha=0.5, label="found_saO2s") - plt.hist(saO2s[correct_high_weights], alpha=0.5, label="true_saO2s") - plt.title("weight distr saO2") - plt.legend(loc='upper right') - plt.figure() - plt.hist(a_mies, weights=weights, alpha=0.5, label="found_a_mies") - plt.hist(a_mies[correct_high_weights], alpha=0.5, label="true_a_mies") - plt.title("weight distr a_mie") - plt.legend(loc='upper right') - plt.figure() - plt.hist(ds, weights=weights, alpha=0.5, label="found_ds") - plt.hist(ds[correct_high_weights], alpha=0.5, label="true_ds") - plt.title("weight distr d") - plt.legend(loc='upper right') - plt.show() - - np.save(self.output().path, weights) - - -class NewDAPlot(luigi.Task): - source_train_set = luigi.Parameter() - target_train_set = luigi.Parameter() - target_test_set = luigi.Parameter() + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) - def requires(self): - return tasks_mc.CameraBatch(self.source_train_set), \ - tasks_mc.CameraBatch(self.target_test_set), \ - tasks_mc.CameraBatch(self.target_train_set), \ - WeightsCalculation(self.source_train_set, self.target_train_set) - - def output(self): - return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, - sp.RESULTS_FOLDER, - sp.FINALS_FOLDER, - "new_da.png")) + # do same as in NoisePlot but with standard noise input + standard_plotting(df_train, np.ones_like(noise_levels) * w_standard, + df_test, noise_levels) - def run(self): - # get data - f = open(self.input()[0].path, "r") - batch_train = pickle.load(f) - f = open(self.input()[1].path, "r") - batch_test = pickle.load(f) - f = open(self.input()[2].path, "r") - batch_target_train = pickle.load(f) - f = open(self.input()[3].path, "r") - weights = np.load(f) - - evaluation_setups = [ - EvaluationStruct("Proposed no DS", - rf, [], "red", 0.25) - , EvaluationStruct("Proposed", rf, [], "yellow", 0.5) - , EvaluationStruct("Proposed DA", rf, [], "green", 0.5) - , EvaluationStruct("Proposed Fake", rf, [], "black", 0.1) - ] - - X_train, y_train = preprocess(batch_train, w_percent=w_standard) - X_da_test, y_da_test = preprocess(batch_test, w_percent=w_standard) - X_target_train, y_target_train = preprocess(batch_target_train, - w_percent=w_standard) -# X_train_sampled, y_train_sampled, weights_sampled = \ -# resample(X_train, y_train, weights, 8000) - - - f, axarr = plt.subplots(X_train.shape[1], 1) - for i in range(X_train.shape[1]): - y, binEdges = np.histogram(X_train[:, i], weights=weights) - bincenters = 0.5 * (binEdges[1:] + binEdges[:-1]) - axarr[i].plot(bincenters, y, '-+', label="adapted") - - y, binEdges = np.histogram(X_train[:, i]) - bincenters = 0.5 * (binEdges[1:] + binEdges[:-1]) - axarr[i].plot(bincenters, y, '-o', label="original") - - y, binEdges = np.histogram(X_da_test[:, i], - weights=np.ones(X_da_test.shape[0]) * - X_train.shape[0] / X_da_test.shape[0]) - bincenters = 0.5 * (binEdges[1:] + binEdges[:-1]) - axarr[i].plot(bincenters, y, '-*', label="searched") -# -# y, binEdges = np.histogram(X_train_sampled[:, i], -# weights=np.ones(X_train_sampled.shape[0]) * -# X_train.shape[0] / X_train_sampled.shape[0]) -# bincenters = 0.5 * (binEdges[1:] + binEdges[:-1]) -# axarr[i].plot(bincenters, y, '-^', label="sampled") - - axarr[i].xaxis.set_visible(False) - axarr[i].yaxis.set_visible(False) - axarr[i].set_title("feature " + str(i) + " distribution") - - plt.legend(loc="best") - # plt.tight_layout(h_pad=.5) - plt.show() - - # iterate over different levels of noise - noise_levels = np.arange(0.00, 0.17, 0.02) - for w in noise_levels: - # setup testing function - X_test, y_test = preprocess(batch_test, w_percent=w) - - # extract noisy data - X_train, y_train = preprocess(batch_train, w_percent=w) - - X_fake_train, y_fake_train = preprocess2(batch_train, - w_percent=w) - correct_bvf_train = y_fake_train[:, 0] < 0.1 - X_fake_train = X_fake_train[correct_bvf_train, :] - y_fake_train = y_fake_train[correct_bvf_train, 1] - - for e in evaluation_setups: - lr = e.regressor - if "DA" in e.name: - lr.fit(X_train, y_train, weights) - elif "DS" in e.name: - lr.fit(X_target_train, y_target_train) - elif "Fake" in e.name: - lr.fit(X_fake_train, y_fake_train) - else: - lr.fit(X_train, y_train) - y_pred = lr.predict(X_test) - # save results - errors = np.abs(y_pred - y_test) - e.data.append(errors) - # make a nice plot for results - plt.figure() - legend_handles = [] - for e in evaluation_setups: - p25 = lambda x: np.percentile(x, 25) * 100. - p75 = lambda x: np.percentile(x, 75) * 100. - m = lambda x: np.median(x) * 100. - plt.fill_between(np.array(noise_levels) * 100., - map(p25, e.data), - map(p75, e.data), - edgecolor=e.color, facecolor=e.color, - alpha=e.alpha) - h, = plt.plot(np.array(noise_levels) * 100., - map(m, e.data), color=e.color, - label=e.name) - legend_handles.append(h) - median0 = m(e.data[0]) - minor_ticks = np.arange(0, 15, 5) - plt.gca().set_xticks(minor_ticks, minor=True) - plt.gca().set_yticks(minor_ticks, minor=True) - - plt.gca().annotate('error: ' + - "{0:.2f}".format( - np.median(evaluation_setups[-1].data[0]) - * 100) + "%", - xy=(0, median0), xytext=(5, 10), size=15, va="center", - ha="center", bbox=dict(boxstyle="round4", - fc="g", alpha=0.5), - arrowprops=dict(arrowstyle="-|>", - connectionstyle="arc3,rad=-0.2", - fc="w"), - ) - plt.grid() - plt.legend(handles=legend_handles) - - # plt.title("dependency on noise") - plt.xlabel("noise added [sigma %]") - plt.ylabel("absolute error [%]") - plt.ylim((0, 20)) - plt.xlim((0, 15)) - plt.savefig(self.output().path + "temp.png", dpi=500, + # finally save the figure + plt.savefig(self.output().path, dpi=500, bbox_inches='tight') +def evaluate_data(df_train, w_train, df_test, w_test, + evaluation_setups=None, preprocessing=None): + """ Our standard method to evaluate the data. It will fill a DataFrame df + which saves the errors for each evaluated setup""" + if evaluation_setups is None: + evaluation_setups = standard_evaluation_setups + if preprocessing is None: + preprocessing = preprocess + + # create a new dataframe which will hold all the generated errors + df = pd.DataFrame() + for one_w_train, one_w_test in zip(w_train, w_test): + # setup testing function + X_test, y_test = preprocessing(df_test, w_percent=one_w_test) + # extract noisy data + X_train, y_train = preprocessing(df_train, w_percent=one_w_train) + for e in evaluation_setups: + regressor = e.regressor + regressor.fit(X_train, y_train) + y_pred = regressor.predict(X_test) + # save results to a dataframe + errors = np.abs(y_pred - y_test) + errors = errors.reshape(len(errors), 1) + current_df = DataFrame(errors * 100, + columns=["Errors"]) + current_df["Method"] = e.name + current_df["noise added [sigma %]"] = int(one_w_test * 100) + df = pd.concat([df, current_df], ignore_index=True) + + return df + + +def standard_plotting(df_train, w_train, df_test, w_test, + evaluation_setups=None, preprocessing=None, + xytext_position=None): + if evaluation_setups is None: + evaluation_setups = standard_evaluation_setups + if xytext_position is None: + xytext_position = (2, 15) + + plt.figure() + + # evaluate the data which is used to generate the plot + df = evaluate_data(df_train, w_train, df_test, w_test, + evaluation_setups, preprocessing) + + # group it by method and noise level and get description on the errors + df = df.groupby(['Method', 'noise added [sigma %]']).describe() + # get the error description in the rows: + df = df.unstack(-1) + # get rid of multiindex by dropping "Error" level + df.columns = df.columns.droplevel(0) + + # iterate over methods to plot linegraphs with error tube + # probably this can be done nicer, but no idea how exactly + for e in evaluation_setups: + df_method = df.loc[e.name] + plt.plot(df_method.index, df_method["50%"], + color=e.color, label=e.name) + plt.fill_between(df_method.index, + df_method["25%"], df_method["75%"], + facecolor=e.color, edgecolor=e.color, + alpha=e.alpha) + plt.xlabel("noise added [sigma %]") + plt.ylabel("absolute error [%]") + # add annotation: + median_proposed = df.T["Proposed", 0].loc["50%"] + plt.gca().annotate('error: ' + + "{0:.1f}".format(median_proposed) + "%", + xy=(0, median_proposed), xytext=xytext_position, + va="center", ha="center", + bbox=dict(boxstyle="round4", + fc=sns.xkcd_rgb["medium green"], + alpha=0.5), + arrowprops=dict(arrowstyle="-|>", + connectionstyle="arc3,rad=-0.2", + fc="w")) + # tidy up the plot + plt.ylim((0, 20)) + plt.xlim((0, 15)) + plt.legend() if __name__ == '__main__': - # root folder there the data lies + logging.basicConfig(filename='log/in_silico_plots_' + + str(datetime.datetime.now()) + + '.log', level=logging.INFO) + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + logger = logging.getLogger() + logger.addHandler(ch) luigi.interface.setup_interface_logging() + sch = luigi.scheduler.CentralPlannerScheduler() w = luigi.worker.Worker(scheduler=sch) - w.add(WrongNoisePlots(which="colon_muscle")) - # w.add(WrongNoisePlots(which="generic")) - w.add(TrainingSamplePlots(which="colon_muscle")) - w.add(TrainingSamplePlots(which="generic")) - w.add(NoisePlots(which="colon_muscle")) - w.add(DomainNoisePlot()) - w.add(NoisePlotsBVF(which="colon_muscle")) - # w.add(NoisePlots(which="generic")) - w.add(NewDAPlot(source_train_set="ipcai_less_generic", - target_train_set="ipcai_colon_muscle_train", - target_test_set="ipcai_colon_muscle_test")) + w.add(TrainingSamplePlot(which_train="ipcai_revision_colon_train", + which_test="ipcai_revision_colon_test")) + w.add(NoisePlot(which_train="ipcai_revision_colon_train", + which_test="ipcai_revision_colon_test")) + w.add(WrongNoisePlot(which_train="ipcai_revision_colon_train", + which_test="ipcai_revision_colon_test")) + # Set a different testing domain to evaluate domain sensitivity + w.add(NoisePlot(which_train="ipcai_revision_colon_train", + which_test="ipcai_revision_generic")) + w.add(VhbPlot(which_train="ipcai_revision_colon_train", + which_test="ipcai_revision_colon_test")) w.run() -