diff --git a/Modules/Biophotonics/python/iMC/mc/batches.py b/Modules/Biophotonics/python/iMC/mc/batches.py
index 0a8b26a0f0..58693ae906 100644
--- a/Modules/Biophotonics/python/iMC/mc/batches.py
+++ b/Modules/Biophotonics/python/iMC/mc/batches.py
@@ -1,227 +1,245 @@
 '''
 Created on Oct 15, 2015
 
 @author: wirkert
 '''
 
 import numpy as np
 from pandas import DataFrame
 import pandas as pd
 
 class AbstractBatch(object):
     """summarizes a batch of simulated mc spectra"""
 
     def __init__(self):
         self._nr_layers = 0  # internally keeps track of number of layers
         my_index = pd.MultiIndex(levels=[[], []],
                              labels=[[], []])
         self.df = DataFrame(columns=my_index)
 
     def create_parameters(self, nr_samples):
         """create the parameters for the batch, the simulation has
         to create the resulting reflectances"""
         pass
 
     def nr_elements(self):
         return self.df.shape[0]
 
 
-
-
 class GenericBatch(AbstractBatch):
     """generic 3-layer batch with each layer having the same oxygenation """
 
     def __init__(self):
         super(GenericBatch, self).__init__()
 
     def append_one_layer(self, saO2, nr_samples):
         """helper function to create parameters for one layer"""
 
         # scales data to lie between maxi and mini instead of 0 and 1
         scale = lambda x, mini, maxi: x * (maxi - mini) + mini
         # shortcut to random generator
         gen = np.random.random_sample
         gen_n = np.random.normal
 
         # create layer elements
         self.df["layer" + str(self._nr_layers), "vhb"] = \
             scale(gen(nr_samples), 0, 1.)
         self.df["layer" + str(self._nr_layers), "sao2"] = \
             saO2
         self.df["layer" + str(self._nr_layers), "a_mie"] = \
             np.clip(gen_n(loc=18.9, scale=10.2, size=nr_samples),
                     0.1, np.inf) * 100  # to 1/m
         self.df["layer" + str(self._nr_layers), "b_mie"] = \
             np.clip(gen_n(loc=1.286, scale=0.521, size=nr_samples), 0, np.inf)
         self.df["layer" + str(self._nr_layers), "d"] = \
             scale(gen(nr_samples), 0, 1.)
         self.df["layer" + str(self._nr_layers), "n"] = \
             scale(gen(nr_samples), 1.33, 1.54)
         self.df["layer" + str(self._nr_layers), "g"] = \
             scale(gen(nr_samples), 0.8, 0.95)
         self._nr_layers += 1
 
     def create_parameters(self, nr_samples):
         """Create generic three layer batch with a total diameter of 2mm.
         saO2 is the same in all layers, but all other parameters vary randomly
         within each layer"""
         saO2 = np.random.random_sample(size=nr_samples)
 
         # create three layers with random samples
         self.append_one_layer(saO2, nr_samples)
         self.append_one_layer(saO2, nr_samples)
         self.append_one_layer(saO2, nr_samples)
 
         # "normalize" d to 2mm
         # first extract all layers from df
         self.df
 
         layers = [l for l in self.df.columns.levels[0] if "layer" in l]
         # summarize all ds
         sum_d = 0
         for l in layers:
             sum_d += self.df[l, "d"]
         for l in layers:
             self.df[l, "d"] = self.df[l, "d"] / sum_d * 2000. * 10 ** -6
             self.df[l, "d"] = np.clip(self.df[l, "d"], 25 * 10 ** -6, np.inf)
 
 
 class LessGenericBatch(AbstractBatch):
     """less generic three layer batch. This only varies blood volume fraction
     w.r.t. the ColonMuscleBatch. Let's see if DA works in this case."""
 
     def __init__(self):
         super(LessGenericBatch, self).__init__()
 
 
     def append_one_layer(self, saO2, n, d_ranges, nr_samples):
         """helper function to create parameters for one layer"""
 
         # scales data to lie between maxi and mini instead of 0 and 1
         scale = lambda x, mini, maxi: x * (maxi - mini) + mini
         # shortcut to random generator
         gen = np.random.random_sample
 
         # create as generic batch
         super(LessGenericBatch, self).append_one_layer(saO2, nr_samples)
         self._nr_layers -= 1  # we're not finished
 
         # but some changes in specific layer elements
         # more specific layer thicknesses
         self.df["layer" + str(self._nr_layers), "d"] = \
             scale(gen(nr_samples), d_ranges[0], d_ranges[1])
         # more specific n
         self.df["layer" + str(self._nr_layers), "n"] = \
             n
 
         self._nr_layers += 1
 
     def create_parameters(self, nr_samples):
         """Create generic three layer batch with a total diameter of 2mm.
         saO2 is the same in all layers, but all other parameters vary randomly
         within each layer"""
         saO2 = np.random.random_sample(size=nr_samples)
         n = np.ones_like(saO2)
         # create three layers with random samples
         # muscle
         self.append_one_layer(saO2, n * 1.36, (600.*10 ** -6, 1010.*10 ** -6),
                               nr_samples)
         # submucosa
         self.append_one_layer(saO2, n * 1.36, (415.*10 ** -6, 847.*10 ** -6),
                               nr_samples)
         # mucosa
         self.append_one_layer(saO2, n * 1.38, (395.*10 ** -6, 603.*10 ** -6),
                               nr_samples)
 
 
-
-
 class ColonMuscleBatch(GenericBatch):
     """three layer batch simulating colonic tissue"""
 
     def __init__(self):
         super(ColonMuscleBatch, self).__init__()
 
     def append_one_layer(self, saO2, n, d_ranges, nr_samples):
         """helper function to create parameters for one layer"""
 
         # scales data to lie between maxi and mini instead of 0 and 1
         scale = lambda x, mini, maxi: x * (maxi - mini) + mini
         # shortcut to random generator
         gen = np.random.random_sample
 
         # create as generic batch
         super(ColonMuscleBatch, self).append_one_layer(saO2, nr_samples)
         self._nr_layers -= 1  # we're not finished
 
         # but some changes in specific layer elements
         # less blood
         self.df["layer" + str(self._nr_layers), "vhb"] = \
             scale(gen(nr_samples), 0, 0.1)
         # more specific layer thicknesses
         self.df["layer" + str(self._nr_layers), "d"] = \
             scale(gen(nr_samples), d_ranges[0], d_ranges[1])
         # more specific n
         self.df["layer" + str(self._nr_layers), "n"] = \
             n
 
         self._nr_layers += 1
 
     def create_parameters(self, nr_samples):
         """Create generic three layer batch with a total diameter of 2mm.
         saO2 is the same in all layers, but all other parameters vary randomly
         within each layer"""
         saO2 = np.random.random_sample(size=nr_samples)
         n = np.ones_like(saO2)
         # create three layers with random samples
         # muscle
         self.append_one_layer(saO2, n * 1.36, (600.*10 ** -6, 1010.*10 ** -6),
                               nr_samples)
         # submucosa
         self.append_one_layer(saO2, n * 1.36, (415.*10 ** -6, 847.*10 ** -6),
                               nr_samples)
         # mucosa
         self.append_one_layer(saO2, n * 1.38, (395.*10 ** -6, 603.*10 ** -6),
                               nr_samples)
 
+
+class ColonMuscleMeanScatteringBatch(ColonMuscleBatch):
+    """three layer batch simulating colonic tissue"""
+
+    def __init__(self):
+        super(ColonMuscleMeanScatteringBatch, self).__init__()
+
+    def append_one_layer(self, saO2, n, d_ranges, nr_samples):
+        """helper function to create parameters for one layer"""
+
+        # create as generic batch
+        super(ColonMuscleMeanScatteringBatch, self).append_one_layer(saO2,
+                                                                     n,
+                                                                     d_ranges,
+                                                                     nr_samples)
+        self._nr_layers -= 1  # we're not finished
+
+        # restrict exponential scattering to mean value for soft tissue.
+        self.df["layer" + str(self._nr_layers), "b_mie"] = 1.286
+
+        self._nr_layers += 1
+
 # class VisualizationBatch(AbstractBatch):
 #     """batch used for visualization of different spectra. Feel free to adapt
 #     for your visualization purposes."""
 #
 #     def __init__(self):
 #         super(VisualizationBatch, self).__init__()
 #         # self._wavelengths = np.arange(470, 680, 10) * 10 ** -9
 #
 #     def append_one_layer(self, bvf, saO2, a_mie, a_ray, d, n, g, nr_samples):
 #         """helper function to create parameters for one layer"""
 #         samples = np.zeros((nr_samples, 7))
 #         # scale samples to correct ranges
 #         samples[:, 0] = bvf
 #         samples[:, 1] = saO2
 #         samples[:, 2] = a_mie
 #         samples[:, 3] = a_ray
 #         # d will be normalized later to 2mm total depth
 #         samples[:, 4] = d
 #         samples[:, 5] = n
 #         samples[:, 6] = g
 #         # append as last layer
 #         self.layers.append(samples)
 #
 #     def create_parameters(self, nr_samples):
 #         # bvf = np.linspace(0.0, .1, nr_samples)
 #         # saO2 = np.linspace(0., 1., nr_samples)
 #         # d = np.linspace(175, 735, nr_samples) * 10 ** -6
 #         # a_mie = np.linspace(5., 30., nr_samples) * 100
 #         # a_ray = np.linspace(0., 60., nr_samples) * 100
 #         # n = np.linspace(1.33, 1.54, nr_samples)
 #         # g = np.linspace(0, 0.95, nr_samples)
 #         # create three layers with random samples
 #         self.append_one_layer(0.02, 0.1, 30.*100., 0., 500 * 10 ** -6,
 #                               1.38, 0.9,
 #                               nr_samples)
 #         self.append_one_layer(0.04, 0.7, 5.0 * 100, 0.*100, 500 * 10 ** -6,
 #                               1.36, 0.9,
 #                               nr_samples)
 #         self.append_one_layer(0.04, 0.7, 5.0 * 100, 0.*100, 500 * 10 ** -6,
 #                               1.36, 0.9,
 #                               nr_samples)
diff --git a/Modules/Biophotonics/python/iMC/mc/dfmanipulations.py b/Modules/Biophotonics/python/iMC/mc/dfmanipulations.py
index a97696e8ab..d7c214578d 100644
--- a/Modules/Biophotonics/python/iMC/mc/dfmanipulations.py
+++ b/Modules/Biophotonics/python/iMC/mc/dfmanipulations.py
@@ -1,52 +1,39 @@
 '''
 Created on Oct 19, 2015
 
 @author: wirkert
 '''
 
-import numpy as np
 from scipy.interpolate import interp1d
+import pandas as pd
 
 
-def fold_by_sliding_average(batch, window_size):
+def fold_by_sliding_average(df, window_size):
     """take a batch and apply a sliding average with given window size to
     the reflectances.
     window_size is elements to the left and to the right.
     There will be some boundary effect on the edges."""
-    sliding_average = lambda x: np.convolve(x,
-                                            np.ones(window_size, dtype=float) /
-                                            float(window_size), mode="same")
-    batch.reflectances = np.apply_along_axis(sliding_average,
-                                             axis=1,
-                                             arr=batch.reflectances)
+    # next line does the folding.
+    df.reflectances = pd.rolling_mean(df.reflectances.T, window_size,
+                                           center=True).T
+    # let's get rid of NaN columns which are created at the boundaries
+    df.dropna(axis="columns", inplace=True)
 
 
-def interpolate_wavelengths(batch, new_wavelengths):
+def interpolate_wavelengths(df, new_wavelengths):
     """ interpolate image data to fit new_wavelengths. Current implementation
     performs simple linear interpolation. Neither existing nor new _wavelengths
     need to be sorted. """
-    interpolator = interp1d(batch._wavelengths,
-                            batch.reflectances, assume_sorted=False,
+    # build an interpolator using the inormation provided by the dataframes
+    # reflectance column
+    interpolator = interp1d(df.reflectances.columns.astype(float),
+                            df.reflectances.as_matrix(), assume_sorted=False,
                             bounds_error=False)
-    batch.reflectances = interpolator(new_wavelengths)
-    batch._wavelengths = new_wavelengths
+    # use this to create new reflectances
+    new_reflectances = interpolator(new_wavelengths)
+    # build a new dataframe out of this information and set the original df
+    # to the new information. This seems hacky, can't it be done easier?
+    df.drop(df["reflectances"].columns, axis=1, level=1, inplace=True)
+    for i, nw in enumerate(new_wavelengths):
+        df["reflectances", nw] = new_reflectances[:, i]
 
-
-def sortout_bands(batch, bands_to_sortout=None):
-    """ TODO: Documentation and test """
-    if bands_to_sortout is not None:
-        # get rid of sorted out bands
-        batch.reflectances = np.delete(batch.reflectances,
-                                              bands_to_sortout, axis=1)
-        batch._wavelengths = np.delete(batch._wavelengths,
-                                              bands_to_sortout)
-
-
-def select_n(batch, n):
-    """ randomly select n elements from batch. TODO: Test """
-    perms = np.random.permutation(batch.reflectances.shape[0])
-    first_n_perms = perms[0:n]
-    batch.reflectances = batch.reflectances[first_n_perms, :]
-    for i, l in enumerate(batch.layers):
-        batch.layers[i] = np.atleast_2d(l)[first_n_perms, :]
-    return batch
diff --git a/Modules/Biophotonics/python/iMC/mc/factories.py b/Modules/Biophotonics/python/iMC/mc/factories.py
index 3e91ca64d6..822b0e3532 100644
--- a/Modules/Biophotonics/python/iMC/mc/factories.py
+++ b/Modules/Biophotonics/python/iMC/mc/factories.py
@@ -1,74 +1,88 @@
 '''
 Created on Oct 15, 2015
 
 @author: wirkert
 '''
 
 from mc.tissuemodels import AbstractTissue, GenericTissue
-from mc.batches import AbstractBatch, GenericBatch, ColonMuscleBatch
+from mc.batches import AbstractBatch, GenericBatch, LessGenericBatch
+from mc.batches import ColonMuscleBatch, ColonMuscleMeanScatteringBatch
 
 
 class AbstractMcFactory(object):
     '''
     Monte Carlo Factory.
     Will create fitting models and batches, dependent on your task
     '''
 
     def create_tissue_model(self):
         return AbstractTissue()
 
     def create_batch_to_simulate(self):
         return AbstractBatch()
 
     def __init__(self):
         '''
         Constructor
         '''
 
+
 class GenericMcFactory(AbstractMcFactory):
 
     def create_tissue_model(self):
         return GenericTissue()
 
     def create_batch_to_simulate(self):
         return GenericBatch()
 
     def __init__(self):
         '''
         Constructor
         '''
 
 
 class LessGenericMcFactory(GenericMcFactory):
 
     def create_batch_to_simulate(self):
         return LessGenericBatch()
 
     def __init__(self):
         '''
         Constructor
         '''
 
 
 class ColonMuscleMcFactory(GenericMcFactory):
 
     def create_batch_to_simulate(self):
         return ColonMuscleBatch()
 
     def __init__(self):
         '''
         Constructor
         '''
 
+
+class ColonMuscleMeanScatteringFactory(GenericMcFactory):
+
+    def create_batch_to_simulate(self):
+        return ColonMuscleMeanScatteringBatch()
+
+    def __init__(self):
+        '''
+        Constructor
+        '''
+
+
 class VisualizationMcFactory(AbstractMcFactory):
 
     def create_tissue_model(self):
         return GenericTissue()
 
     def create_batch_to_simulate(self):
         return VisualizationBatch()
 
     def __init__(self):
         '''
         Constructor
         '''
diff --git a/Modules/Biophotonics/python/iMC/script_FAP_DA_analyzer.py b/Modules/Biophotonics/python/iMC/script_FAP_DA_analyzer.py
index 9369de787a..df8f60058e 100644
--- a/Modules/Biophotonics/python/iMC/script_FAP_DA_analyzer.py
+++ b/Modules/Biophotonics/python/iMC/script_FAP_DA_analyzer.py
@@ -1,89 +1,89 @@
 # -*- coding: utf-8 -*-
 """
 Created on Fri Aug 14 11:09:18 2015
 
 @author: wirkert
 """
 
 import os
 import luigi
 
 import tasks_regression as rt
 import tasks_preprocessing as ppt
 import tasks_analyze as at
 import scriptpaths as sp
 
 sp.FINALS_FOLDER = "FAP_DA"
 
 
 def resort_wavelengths(msi):
     """somehow the wavelength sorting got mixed up.
     This function fixes the mess."""
     collapsed_image = imgmani.collapse_image(msi.get_image())
     collapsed_image = collapsed_image[:, [2, 1, 0, 3, 4, 5, 6, 7]]
     msi.set_image(np.reshape(collapsed_image, msi.get_image().shape))
 
 # rebind this method since in this recoding the wavelengths got messed up
 sp.resort_wavelengths = resort_wavelengths
 
 
 class EstimateTissueParametersTaskDA(at.EstimateTissueParametersTask):
 
     def requires(self):
         return ppt.PreprocessMSI(imageName=self.imageName), \
             rt.TrainForestDA(imageName=self.imageName,
                         file_name_prefix_training=
-                        self.batch_prefix)
+                        self.df_prefix)
 
     def output(self):
         return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, "processed",
                                               sp.FINALS_FOLDER,
                                               self.imageName + "_" +
-                                              self.batch_prefix +
+                                              self.df_prefix +
                                               "estimate.nrrd"))
 
 
 
 class CreateNiceParametricImagesTaskDA(at.CreateNiceParametricImagesTask):
 
     def requires(self):
         return EstimateTissueParametersTaskDA(imageName=self.imageName,
                                             file_name_prefix_training=
-                                            self.batch_prefix), \
+                                            self.df_prefix), \
             ppt.CorrectImagingSetupTask(imageName=self.imageName), \
             ppt.SegmentMSI(imageName=self.imageName), \
             ppt.PreprocessMSI(imageName=self.imageName), \
             at.ReprojectReflectancesTask(imageName=self.imageName,
                                       file_name_prefix_training=
-                                      self.batch_prefix)
+                                      self.df_prefix)
 
     def output(self):
         return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER,
                                               sp.RESULTS_FOLDER,
                                               sp.FINALS_FOLDER,
                                               self.imageName + "_" +
-                                              self.batch_prefix +
+                                              self.df_prefix +
                                               "_summary.png"))
 
 
 
 if __name__ == '__main__':
 
     # root folder there the data lies
     luigi.interface.setup_interface_logging()
     sch = luigi.scheduler.CentralPlannerScheduler()
     w = luigi.worker.Worker(scheduler=sch)
 
     # run over all subfolders (non-recursively)
     # to collect the data and generate the results
     for root, dirs, files in os.walk(os.path.join(sp.ROOT_FOLDER,
                                                   sp.DATA_FOLDER)):
         for name in files:
             main_task = CreateNiceParametricImagesTaskDA(
                 imageName=name,
                 file_name_prefix_training=
                 "2015June0809:51PMNoisyRandomTraining")
             w.add(main_task)
     w.run()
 
 
diff --git a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py
index 8f4ee0040d..2584732b68 100644
--- a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py
+++ b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py
@@ -1,663 +1,326 @@
 # -*- coding: utf-8 -*-
 """
 Created on Fri Aug 14 11:09:18 2015
 
 @author: wirkert
 """
 
-
 import os
-import pickle
+import logging
+import datetime
 from collections import namedtuple
+
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
 import luigi
 import matplotlib.pyplot as plt
-import matplotlib
 from sklearn.ensemble.forest import RandomForestRegressor
+import seaborn as sns
 
 import tasks_mc
 import scriptpaths as sp
 from regression.preprocessing import preprocess, preprocess2
 from regression.linear import LinearSaO2Unmixing
-from regression.domain_adaptation import *
-from regression.estimation import standard_score
-
 
 sp.ROOT_FOLDER = "/media/wirkert/data/Data/" + \
-            "2015_11_12_IPCAI_in_silico"
-sp.FINALS_FOLDER = "Images"
+                 "2016_02_22_IPCAI_revision_in_silico"
+sp.FINALS_FOLDER = "finals"
 sp.RECORDED_WAVELENGTHS = np.arange(470, 680, 10) * 10 ** -9
-sp.MC_DATA_FOLDER = "mc_data2"
-
-w_standard = 0.1  # for this evaluation we add 5% noise
-
-
-matplotlib.rcParams.update({'font.size': 22})
-
-EvaluationStruct = namedtuple("EvaluationStruct",
-                              "name regressor data color alpha")
+sp.MC_DATA_FOLDER = "mc_data"
 
+w_standard = 0.1  # for this evaluation we add 10% noise
+sns.set_style("whitegrid")
+# sns.set_context("poster")
 
 # setup standard random forest
 rf = RandomForestRegressor(10, min_samples_leaf=10, max_depth=9, n_jobs=-1)
+EvaluationStruct = namedtuple("EvaluationStruct",
+                              "name regressor color alpha")
+# standard evaluation setup
+standard_evaluation_setups = [EvaluationStruct("Linear Beer-Lambert",
+                                               LinearSaO2Unmixing(),
+                                               sns.xkcd_rgb["pale red"], 0.5)
+    , EvaluationStruct("Proposed", rf,
+                       sns.xkcd_rgb["medium green"], 0.5)]
 
-class TrainingSamplePlots(luigi.Task):
+# standard noise levels
+noise_levels = np.arange(0.00, 0.18, 0.02)
 
-    which = luigi.Parameter()
+
+class TrainingSamplePlot(luigi.Task):
+    which_train = luigi.Parameter()
+    which_test = luigi.Parameter()
 
     def requires(self):
-        return tasks_mc.CameraBatch("ipcai_" + self.which + "_train"), \
-                tasks_mc.CameraBatch("ipcai_" + self.which + "_test")
+        return tasks_mc.CameraBatch(self.which_train), \
+               tasks_mc.CameraBatch(self.which_test)
 
     def output(self):
         return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER,
-                                 sp.RESULTS_FOLDER,
-                                 sp.FINALS_FOLDER,
-                                 "samples_plot_" + self.which + ".png"))
+                                              sp.RESULTS_FOLDER,
+                                              sp.FINALS_FOLDER,
+                                              "sample_plot_train_" + self.which_train +
+                                              "_test_" + self.which_test + ".png"))
+
     def run(self):
         # get data
-        f = open(self.input()[0].path, "r")
-        batch_train = pickle.load(f)
-        f = open(self.input()[1].path, "r")
-        batch_test = pickle.load(f)
-        # setup testing function and data
-        X_test, y_test = preprocess(batch_test, w_percent=w_standard)
-        X_train, y_train = preprocess(batch_train, w_percent=w_standard)
-        # setup structures to save test data
-        evaluation_setups = [
-#                     EvaluationStruct("Linear Beer-Lambert",
-#                                      LinearSaO2Unmixing(), [], "red", 0.5),
-                    EvaluationStruct("Proposed", rf, "green", 0.5)]
-        # do  validation
+        df_train = pd.read_csv(self.input()[0].path, header=[0, 1])
+        df_test = pd.read_csv(self.input()[1].path, header=[0, 1])
+
+        # for this plot we write a custom evaluation function as it is built
+        # a little different
+
+        # create a new dataframe which will hold all the generated errors
+        df = pd.DataFrame()
+
         nr_training_samples = np.arange(10, 15060, 50).astype(int)
         # not very pythonic, don't care
         for n in nr_training_samples:
-            print "number samples ", str(n)
-            for e in evaluation_setups:
-                X_j = X_train[0:n]
-                y_j = y_train[0:n]
-                lr = e.regressor
-                lr.fit(X_j, y_j)
-                # save results
-                e.data.append(standard_score(lr, X_test, y_test) * 100.)
-        # make a nice plot for results
-        plt.figure()
-        legend_handles = []
-        for e in evaluation_setups:
-            h, = plt.plot(nr_training_samples,
-                          e.data, color=e.color,
-                          label=e.name)
-            legend_handles.append(h)
-        # minor_ticks_x = np.arange(0, 1000, 50)
-        # minor_ticks_y = np.arange(5, 16, 0.5)
-        # plt.gca().set_xticks(minor_ticks_x, minor=True)
-        # plt.gca().set_yticks(minor_ticks_y, minor=True)
-        plt.grid()
-        # plt.legend(handles=legend_handles)
-        # plt.title("dependency on number of training samples")
+            X_test, y_test = preprocess(df_test, w_percent=w_standard)
+            # only take n samples for training
+            X_train, y_train = preprocess(df_train, nr_samples=n,
+                                          w_percent=w_standard)
+
+            regressor = rf
+            regressor.fit(X_train, y_train)
+            y_pred = regressor.predict(X_test)
+            # save results to a dataframe
+            errors = np.abs(y_pred - y_test)
+            errors = errors.reshape(len(errors), 1)
+            current_df = DataFrame(errors * 100,
+                                   columns=["Errors"])
+            current_df["Method"] = "Proposed"
+            current_df["Number Samples"] = n
+            df = pd.concat([df, current_df], ignore_index=True)
+            logging.info(
+                    "Finished training classifier with {0} samples".format(
+                        str(n)))
+
+        df = df.groupby("Number Samples").describe()
+        # get the error description in the rows:
+        df = df.unstack(-1)
+        # get rid of multiindex by dropping "Error" level
+        df.columns = df.columns.droplevel(0)
+
+        plt.plot(df.index, df["50%"], color=sns.xkcd_rgb["medium green"])
+
+        # tidy up the plot
         plt.xlabel("number of training samples")
         plt.ylabel("absolute error [%]")
-
-        for tick in plt.gca().xaxis.get_major_ticks():
-            tick.label.set_fontsize(14)
-        plt.ylim((2, 10))
-        plt.xlim((0, 15000))
-        plt.savefig(self.output().path,
-                    dpi=500, bbox_inches='tight')
-
-
-class WrongNoisePlots(luigi.Task):
-
-    which = luigi.Parameter()
-
-    def requires(self):
-        return tasks_mc.CameraBatch("ipcai_" + self.which + "_train"), \
-                tasks_mc.CameraBatch("ipcai_" + self.which + "_test")
-
-    def output(self):
-        return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER,
-                                 sp.RESULTS_FOLDER,
-                                 sp.FINALS_FOLDER,
-                                 "noise_wrong_plot_" + self.which + ".png"))
-
-
-    def run(self):
-        # get data
-        f = open(self.input()[0].path, "r")
-        batch_train = pickle.load(f)
-        f = open(self.input()[1].path, "r")
-        batch_test = pickle.load(f)
-
-        evaluation_setups = [
-                        EvaluationStruct("Linear Beer-Lambert",
-                                         LinearSaO2Unmixing(), [], "red", 0.25)
-                        , EvaluationStruct("Proposed", rf, [], "green", 0.5)
-                    ]
-        # iterate over different levels of noise
-        noise_levels = np.arange(0.00, 0.17, 0.02)
-        for w in noise_levels:
-            # setup testing function
-            X_test, y_test = preprocess(batch_test, w_percent=w)
-            # extract noisy data
-            X_train, y_train = preprocess(batch_train, w_percent=0.1)
-            for e in evaluation_setups:
-                lr = e.regressor
-                lr.fit(X_train, y_train)
-                y_pred = lr.predict(X_test)
-                # save results
-                errors = np.abs(y_pred - y_test)
-                e.data.append(errors)
-        # make a nice plot for results
-        plt.figure()
-        legend_handles = []
-        median0 = 0
-        for e in evaluation_setups:
-            p25 = lambda x: np.percentile(x, 25) * 100.
-            p75 = lambda x: np.percentile(x, 75) * 100.
-            m = lambda x: np.median(x) * 100.
-            plt.fill_between(np.array(noise_levels) * 100.,
-                             map(p25, e.data),
-                             map(p75, e.data),
-                             edgecolor=e.color, facecolor=e.color,
-                             alpha=e.alpha)
-            h, = plt.plot(np.array(noise_levels) * 100.,
-                          map(m, e.data), color=e.color,
-                          label=e.name)
-            legend_handles.append(h)
-            if e.name is "Proposed":
-                median0 = m(e.data[0])
-        minor_ticks = np.arange(0, 15, 5)
-        plt.gca().set_xticks(minor_ticks, minor=True)
-        plt.gca().set_yticks(minor_ticks, minor=True)
-
-        plt.gca().annotate('error: ' +
-                           "{0:.2f}".format(
-                                        np.median(evaluation_setups[-1].data[0])
-                                        * 100) + "%",
-                           xy=(0, median0), xytext=(5, 10), size=15,
-                           va="center",
-                           ha="center", bbox=dict(boxstyle="round4",
-                                                  fc="g", alpha=0.5),
-                           arrowprops=dict(arrowstyle="-|>",
-                                  connectionstyle="arc3,rad=-0.2",
-                                  fc="w"),
-                    )
-        plt.grid()
-        plt.legend(handles=legend_handles)
-        # plt.title("dependency on wrong training noise [w_training = 10%]")
-        plt.xlabel("noise added for testing [sigma %]")
-        plt.ylabel("absolute error [%]")
-        plt.ylim((0, 20))
+        plt.ylim((0, 15))
         plt.xlim((0, 15))
+
+        # finally save the figure
         plt.savefig(self.output().path, dpi=500,
                     bbox_inches='tight')
 
 
-
-class NoisePlotsBVF(luigi.Task):
-
-    which = luigi.Parameter()
+class VhbPlot(luigi.Task):
+    which_train = luigi.Parameter()
+    which_test = luigi.Parameter()
 
     def requires(self):
-        return tasks_mc.CameraBatch("ipcai_" + self.which + "_train"), \
-                tasks_mc.CameraBatch("ipcai_" + self.which + "_test")
+        return tasks_mc.CameraBatch(self.which_train), \
+               tasks_mc.CameraBatch(self.which_test)
 
     def output(self):
         return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER,
-                                 sp.RESULTS_FOLDER,
-                                 sp.FINALS_FOLDER,
-                                 "noise_plot_bvf_" + self.which + ".png"))
+                                              sp.RESULTS_FOLDER,
+                                              sp.FINALS_FOLDER,
+                                              "vhb_noise_plot_train_" + self.which_train +
+                                              "_test_" + self.which_test + ".png"))
+
+    @staticmethod
+    def preprocess_vhb(batch, nr_samples=None, w_percent=None,
+                       magnification=None, bands_to_sortout=None):
+        """ For evaluating vhb we extract labels for vhb instead of sao2"""
+        X, y = preprocess2(batch, nr_samples, w_percent,
+                           magnification, bands_to_sortout)
+
+        return X, y["vhb"]
 
     def run(self):
         # get data
-        f = open(self.input()[0].path, "r")
-        batch_train = pickle.load(f)
-        f = open(self.input()[1].path, "r")
-        batch_test = pickle.load(f)
-
-        evaluation_setups = [
-                    EvaluationStruct("Proposed", rf, [], "green", 0.5)
-                    ]
-        # iterate over different levels of noise
-        noise_levels = np.arange(0.00, 0.17, 0.02)
-        for w in noise_levels:
-            # setup testing function
-            X_test, y_test = preprocess2(batch_test, w_percent=w)
-            # extract noisy data
-            X_train, y_train = preprocess2(batch_train, w_percent=w)
-            for e in evaluation_setups:
-                lr = e.regressor
-                lr.fit(X_train, y_train)
-                y_pred = lr.predict(X_test)
-                # save results
-                errors = np.abs(y_pred[:, 0] - y_test[:, 0])
-                e.data.append(errors)
-        # make a nice plot for results
-        plt.figure()
-        legend_handles = []
-        for e in evaluation_setups:
-            p25 = lambda x: np.percentile(x, 25) * 100.
-            p75 = lambda x: np.percentile(x, 75) * 100.
-            m = lambda x: np.median(x) * 100.
-            plt.fill_between(np.array(noise_levels) * 100.,
-                             map(p25, e.data),
-                             map(p75, e.data),
-                             edgecolor=e.color, facecolor=e.color,
-                             alpha=e.alpha)
-            h, = plt.plot(np.array(noise_levels) * 100.,
-                          map(m, e.data), color=e.color,
-                          label=e.name)
-            legend_handles.append(h)
-        minor_ticks = np.arange(0, 15, 5)
-        plt.gca().set_xticks(minor_ticks, minor=True)
-
-        plt.grid()
-        # plt.legend(handles=legend_handles)
-        # plt.title("dependency on noise")
-        plt.xlabel("noise added [sigma %]")
-        plt.ylabel("absolute error [%]")
+        df_train = pd.read_csv(self.input()[0].path, header=[0, 1])
+        df_test = pd.read_csv(self.input()[1].path, header=[0, 1])
+
+        # for vhb we only evaluate the proposed method since the linear
+        # beer-lambert is not applicable
+        evaluation_setups = [EvaluationStruct("Proposed", rf,
+                                              sns.xkcd_rgb["medium green"],
+                                              0.5)]
+        standard_plotting(df_train, noise_levels, df_test, noise_levels,
+                          evaluation_setups=evaluation_setups,
+                          preprocessing=self.preprocess_vhb,
+                          xytext_position=(2, 3))
         plt.ylim((0, 4))
-        plt.xlim((0, 15))
+
+        # finally save the figure
         plt.savefig(self.output().path, dpi=500,
                     bbox_inches='tight')
 
 
-
-class DomainNoisePlot(luigi.Task):
+class NoisePlot(luigi.Task):
+    which_train = luigi.Parameter()
+    which_test = luigi.Parameter()
 
     def requires(self):
-        return tasks_mc.CameraBatch("ipcai_colon_muscle_train"), \
-                tasks_mc.CameraBatch("ipcai_generic")
+        return tasks_mc.CameraBatch(self.which_train), \
+               tasks_mc.CameraBatch(self.which_test)
 
     def output(self):
         return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER,
-                                 sp.RESULTS_FOLDER,
-                                 sp.FINALS_FOLDER,
-                                 "noise_da_plot_.png"))
+                                              sp.RESULTS_FOLDER,
+                                              sp.FINALS_FOLDER,
+                                              "noise_plot_train_" + self.which_train +
+                                              "_test_" + self.which_test + ".png"))
 
     def run(self):
         # get data
-        f = open(self.input()[0].path, "r")
-        batch_train = pickle.load(f)
-        f = open(self.input()[1].path, "r")
-        batch_test = pickle.load(f)
-
-        evaluation_setups = [
-                    EvaluationStruct("Linear Beer-Lambert",
-                                     LinearSaO2Unmixing(), [], "red", 0.25)
-                    , EvaluationStruct("Proposed", rf, [], "green", 0.5)
-                    ]
-        # iterate over different levels of noise
-        noise_levels = np.arange(0.00, 0.17, 0.02)
-        for w in noise_levels:
-            # setup testing function
-            X_test, y_test = preprocess(batch_test, w_percent=w)
-            # extract noisy data
-            X_train, y_train = preprocess(batch_train, w_percent=w)
-            for e in evaluation_setups:
-                lr = e.regressor
-                lr.fit(X_train, y_train)
-                y_pred = lr.predict(X_test)
-                # save results
-                errors = np.abs(y_pred - y_test)
-                e.data.append(errors)
-        # make a nice plot for results
-        plt.figure()
-        legend_handles = []
-        for e in evaluation_setups:
-            p25 = lambda x: np.percentile(x, 25) * 100.
-            p75 = lambda x: np.percentile(x, 75) * 100.
-            m = lambda x: np.median(x) * 100.
-            plt.fill_between(np.array(noise_levels) * 100.,
-                             map(p25, e.data),
-                             map(p75, e.data),
-                             edgecolor=e.color, facecolor=e.color,
-                             alpha=e.alpha)
-            h, = plt.plot(np.array(noise_levels) * 100.,
-                          map(m, e.data), color=e.color,
-                          label=e.name)
-            legend_handles.append(h)
-            if e.name is "Proposed":
-                median0 = m(e.data[0])
-        minor_ticks = np.arange(0, 15, 5)
-        plt.gca().set_xticks(minor_ticks, minor=True)
-        plt.gca().set_yticks(minor_ticks, minor=True)
-
-        plt.gca().annotate('error: ' +
-                           "{0:.2f}".format(
-                                        np.median(evaluation_setups[-1].data[0])
-                                        * 100) + "%",
-                           xy=(0, median0), xytext=(5, 12), size=15,
-                           va="center", ha="center", bbox=dict(
-                                        boxstyle="round4", fc="g", alpha=0.5),
-                           arrowprops=dict(arrowstyle="-|>",
-                                  connectionstyle="arc3,rad=-0.2",
-                                  fc="w"),
-                    )
-        plt.grid()
-        plt.legend(handles=legend_handles)
-
-        # plt.title("dependency on noise")
-        plt.xlabel("noise added [sigma %]")
-        plt.ylabel("absolute error [%]")
-        plt.ylim((0, 20))
-        plt.xlim((0, 15))
-        plt.savefig(self.output().path, dpi=500,
-                    bbox_inches='tight')
+        df_train = pd.read_csv(self.input()[0].path, header=[0, 1])
+        df_test = pd.read_csv(self.input()[1].path, header=[0, 1])
 
+        standard_plotting(df_train, noise_levels, df_test, noise_levels)
 
-
-class NoisePlots(luigi.Task):
-
-    which = luigi.Parameter()
-
-    def requires(self):
-        return tasks_mc.CameraBatch("ipcai_" + self.which + "_train"), \
-                tasks_mc.CameraBatch("ipcai_" + self.which + "_test")
-
-    def output(self):
-        return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER,
-                                 sp.RESULTS_FOLDER,
-                                 sp.FINALS_FOLDER,
-                                 "noise_plot_" + self.which + ".png"))
-
-    def run(self):
-        # get data
-        f = open(self.input()[0].path, "r")
-        batch_train = pickle.load(f)
-        f = open(self.input()[1].path, "r")
-        batch_test = pickle.load(f)
-
-        evaluation_setups = [
-                    EvaluationStruct("Linear Beer-Lambert",
-                                     LinearSaO2Unmixing(), [], "red", 0.25)
-                    , EvaluationStruct("Proposed", rf, [], "green", 0.5)
-                    ]
-        # iterate over different levels of noise
-        noise_levels = np.arange(0.00, 0.17, 0.02)
-        for w in noise_levels:
-            # setup testing function
-            X_test, y_test = preprocess(batch_test, w_percent=w)
-            # extract noisy data
-            X_train, y_train = preprocess(batch_train, w_percent=w)
-            for e in evaluation_setups:
-                lr = e.regressor
-                lr.fit(X_train, y_train)
-                y_pred = lr.predict(X_test)
-                # save results
-                errors = np.abs(y_pred - y_test)
-                e.data.append(errors)
-        # make a nice plot for results
-        plt.figure()
-        legend_handles = []
-        for e in evaluation_setups:
-            p25 = lambda x: np.percentile(x, 25) * 100.
-            p75 = lambda x: np.percentile(x, 75) * 100.
-            m = lambda x: np.median(x) * 100.
-            plt.fill_between(np.array(noise_levels) * 100.,
-                             map(p25, e.data),
-                             map(p75, e.data),
-                             edgecolor=e.color, facecolor=e.color,
-                             alpha=e.alpha)
-            h, = plt.plot(np.array(noise_levels) * 100.,
-                          map(m, e.data), color=e.color,
-                          label=e.name)
-            legend_handles.append(h)
-            if e.name is "Proposed":
-                median0 = m(e.data[0])
-        minor_ticks = np.arange(0, 15, 5)
-        plt.gca().set_xticks(minor_ticks, minor=True)
-        plt.gca().set_yticks(minor_ticks, minor=True)
-
-        plt.gca().annotate('error: ' +
-                           "{0:.2f}".format(
-                                        np.median(evaluation_setups[-1].data[0])
-                                        * 100) + "%",
-                           xy=(0, median0), xytext=(5, 10), size=15, va="center",
-                           ha="center", bbox=dict(boxstyle="round4",
-                                                  fc="g", alpha=0.5),
-                           arrowprops=dict(arrowstyle="-|>",
-                                  connectionstyle="arc3,rad=-0.2",
-                                  fc="w"),
-                    )
-        plt.grid()
-        plt.legend(handles=legend_handles)
-        # plt.title("dependency on noise")
-        plt.xlabel("noise added [sigma %]")
-        plt.ylabel("absolute error [%]")
-        plt.ylim((0, 20))
-        plt.xlim((0, 15))
+        # finally save the figure
         plt.savefig(self.output().path, dpi=500,
                     bbox_inches='tight')
 
 
-
-class WeightsCalculation(luigi.Task):
-    source_domain = luigi.Parameter()
-    target_domain = luigi.Parameter()
+class WrongNoisePlot(luigi.Task):
+    which_train = luigi.Parameter()
+    which_test = luigi.Parameter()
 
     def requires(self):
-        return tasks_mc.CameraBatch(self.source_domain), \
-               tasks_mc.CameraBatch(self.target_domain)
+        return tasks_mc.CameraBatch(self.which_train), \
+               tasks_mc.CameraBatch(self.which_test)
 
     def output(self):
         return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER,
-                                 sp.RESULTS_FOLDER,
-                                 sp.FINALS_FOLDER,
-                                 "weights_" + self.source_domain + "to" +
-                                 self.target_domain + ".npy"))
+                                              sp.RESULTS_FOLDER,
+                                              sp.FINALS_FOLDER,
+                                              "wrong_noise_plot_train_" + self.which_train +
+                                              "_test_" + self.which_test + ".png"))
 
     def run(self):
         # get data
-        f = open(self.input()[0].path, "r")
-        batch_train = pickle.load(f)
-        f = open(self.input()[1].path, "r")
-        batch_da_train = pickle.load(f)
-        X_train, y_train = preprocess(batch_train, w_percent=w_standard,
-                                      magnification=None)
-        X_da_train, y_da_train = preprocess(batch_da_train,
-                                            w_percent=w_standard,
-                                            magnification=None)
-        X_ws, y_ws = preprocess(batch_train, w_percent=w_standard)
-        weights = estimate_weights_random_forests(X_train, X_da_train, X_ws)
-
-
-        no_zero = ~(batch_train.reflectances <= 0).any(axis=1)
-        bvfs = batch_train.layers[0][no_zero, 0]
-        saO2s = batch_train.layers[0][no_zero, 1]
-        a_mies = batch_train.layers[0][no_zero, 2]
-        ds = batch_train.layers[0][no_zero, 4]
-
-        correct_high_weights = np.logical_and(bvfs < 0.10, ds < 1000 * 10 ** -6, ds > 600 * 10 ** -6)
-
-        plt.figure()
-        plt.hist(bvfs, weights=weights, alpha=0.5, label="found_bvfs")
-        plt.hist(bvfs[correct_high_weights], alpha=0.5, label="true_bvfs")
-        plt.title("weight distr bvf")
-        plt.legend(loc='upper right')
-        plt.figure()
-        plt.hist(saO2s, weights=weights, alpha=0.5, label="found_saO2s")
-        plt.hist(saO2s[correct_high_weights], alpha=0.5, label="true_saO2s")
-        plt.title("weight distr saO2")
-        plt.legend(loc='upper right')
-        plt.figure()
-        plt.hist(a_mies, weights=weights, alpha=0.5, label="found_a_mies")
-        plt.hist(a_mies[correct_high_weights], alpha=0.5, label="true_a_mies")
-        plt.title("weight distr a_mie")
-        plt.legend(loc='upper right')
-        plt.figure()
-        plt.hist(ds, weights=weights, alpha=0.5, label="found_ds")
-        plt.hist(ds[correct_high_weights], alpha=0.5, label="true_ds")
-        plt.title("weight distr d")
-        plt.legend(loc='upper right')
-        plt.show()
-
-        np.save(self.output().path, weights)
-
-
-class NewDAPlot(luigi.Task):
-    source_train_set = luigi.Parameter()
-    target_train_set = luigi.Parameter()
-    target_test_set = luigi.Parameter()
+        df_train = pd.read_csv(self.input()[0].path, header=[0, 1])
+        df_test = pd.read_csv(self.input()[1].path, header=[0, 1])
 
-    def requires(self):
-        return tasks_mc.CameraBatch(self.source_train_set), \
-               tasks_mc.CameraBatch(self.target_test_set), \
-               tasks_mc.CameraBatch(self.target_train_set), \
-               WeightsCalculation(self.source_train_set, self.target_train_set)
-
-    def output(self):
-        return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER,
-                                 sp.RESULTS_FOLDER,
-                                 sp.FINALS_FOLDER,
-                                 "new_da.png"))
+        # do same as in NoisePlot but with standard noise input
+        standard_plotting(df_train, np.ones_like(noise_levels) * w_standard,
+                          df_test, noise_levels)
 
-    def run(self):
-        # get data
-        f = open(self.input()[0].path, "r")
-        batch_train = pickle.load(f)
-        f = open(self.input()[1].path, "r")
-        batch_test = pickle.load(f)
-        f = open(self.input()[2].path, "r")
-        batch_target_train = pickle.load(f)
-        f = open(self.input()[3].path, "r")
-        weights = np.load(f)
-
-        evaluation_setups = [
-                    EvaluationStruct("Proposed no DS",
-                                     rf, [], "red", 0.25)
-                    , EvaluationStruct("Proposed", rf, [], "yellow", 0.5)
-                    , EvaluationStruct("Proposed DA", rf, [], "green", 0.5)
-                    , EvaluationStruct("Proposed Fake", rf, [], "black", 0.1)
-                    ]
-
-        X_train, y_train = preprocess(batch_train, w_percent=w_standard)
-        X_da_test, y_da_test = preprocess(batch_test, w_percent=w_standard)
-        X_target_train, y_target_train = preprocess(batch_target_train,
-                                                    w_percent=w_standard)
-#         X_train_sampled, y_train_sampled, weights_sampled = \
-#             resample(X_train, y_train, weights, 8000)
-
-
-        f, axarr = plt.subplots(X_train.shape[1], 1)
-        for i in range(X_train.shape[1]):
-            y, binEdges = np.histogram(X_train[:, i], weights=weights)
-            bincenters = 0.5 * (binEdges[1:] + binEdges[:-1])
-            axarr[i].plot(bincenters, y, '-+', label="adapted")
-
-            y, binEdges = np.histogram(X_train[:, i])
-            bincenters = 0.5 * (binEdges[1:] + binEdges[:-1])
-            axarr[i].plot(bincenters, y, '-o', label="original")
-
-            y, binEdges = np.histogram(X_da_test[:, i],
-                                       weights=np.ones(X_da_test.shape[0]) *
-                                       X_train.shape[0] / X_da_test.shape[0])
-            bincenters = 0.5 * (binEdges[1:] + binEdges[:-1])
-            axarr[i].plot(bincenters, y, '-*', label="searched")
-#
-#             y, binEdges = np.histogram(X_train_sampled[:, i],
-#                                     weights=np.ones(X_train_sampled.shape[0]) *
-#                                     X_train.shape[0] / X_train_sampled.shape[0])
-#             bincenters = 0.5 * (binEdges[1:] + binEdges[:-1])
-#             axarr[i].plot(bincenters, y, '-^', label="sampled")
-
-            axarr[i].xaxis.set_visible(False)
-            axarr[i].yaxis.set_visible(False)
-            axarr[i].set_title("feature " + str(i) + " distribution")
-
-        plt.legend(loc="best")
-        # plt.tight_layout(h_pad=.5)
-        plt.show()
-
-        # iterate over different levels of noise
-        noise_levels = np.arange(0.00, 0.17, 0.02)
-        for w in noise_levels:
-            # setup testing function
-            X_test, y_test = preprocess(batch_test, w_percent=w)
-
-            # extract noisy data
-            X_train, y_train = preprocess(batch_train, w_percent=w)
-
-            X_fake_train, y_fake_train = preprocess2(batch_train,
-                                                 w_percent=w)
-            correct_bvf_train = y_fake_train[:, 0] < 0.1
-            X_fake_train = X_fake_train[correct_bvf_train, :]
-            y_fake_train = y_fake_train[correct_bvf_train, 1]
-
-            for e in evaluation_setups:
-                lr = e.regressor
-                if "DA" in e.name:
-                    lr.fit(X_train, y_train, weights)
-                elif "DS" in e.name:
-                    lr.fit(X_target_train, y_target_train)
-                elif "Fake" in e.name:
-                    lr.fit(X_fake_train, y_fake_train)
-                else:
-                    lr.fit(X_train, y_train)
-                y_pred = lr.predict(X_test)
-                # save results
-                errors = np.abs(y_pred - y_test)
-                e.data.append(errors)
-        # make a nice plot for results
-        plt.figure()
-        legend_handles = []
-        for e in evaluation_setups:
-            p25 = lambda x: np.percentile(x, 25) * 100.
-            p75 = lambda x: np.percentile(x, 75) * 100.
-            m = lambda x: np.median(x) * 100.
-            plt.fill_between(np.array(noise_levels) * 100.,
-                             map(p25, e.data),
-                             map(p75, e.data),
-                             edgecolor=e.color, facecolor=e.color,
-                             alpha=e.alpha)
-            h, = plt.plot(np.array(noise_levels) * 100.,
-                          map(m, e.data), color=e.color,
-                          label=e.name)
-            legend_handles.append(h)
-            median0 = m(e.data[0])
-        minor_ticks = np.arange(0, 15, 5)
-        plt.gca().set_xticks(minor_ticks, minor=True)
-        plt.gca().set_yticks(minor_ticks, minor=True)
-
-        plt.gca().annotate('error: ' +
-                           "{0:.2f}".format(
-                                        np.median(evaluation_setups[-1].data[0])
-                                        * 100) + "%",
-                           xy=(0, median0), xytext=(5, 10), size=15, va="center",
-                           ha="center", bbox=dict(boxstyle="round4",
-                                                  fc="g", alpha=0.5),
-                           arrowprops=dict(arrowstyle="-|>",
-                                  connectionstyle="arc3,rad=-0.2",
-                                  fc="w"),
-                    )
-        plt.grid()
-        plt.legend(handles=legend_handles)
-
-        # plt.title("dependency on noise")
-        plt.xlabel("noise added [sigma %]")
-        plt.ylabel("absolute error [%]")
-        plt.ylim((0, 20))
-        plt.xlim((0, 15))
-        plt.savefig(self.output().path + "temp.png", dpi=500,
+        # finally save the figure
+        plt.savefig(self.output().path, dpi=500,
                     bbox_inches='tight')
 
 
+def evaluate_data(df_train, w_train, df_test, w_test,
+                  evaluation_setups=None, preprocessing=None):
+    """ Our standard method to evaluate the data. It will fill a DataFrame df
+    which saves the errors for each evaluated setup"""
+    if evaluation_setups is None:
+        evaluation_setups = standard_evaluation_setups
+    if preprocessing is None:
+        preprocessing = preprocess
+
+    # create a new dataframe which will hold all the generated errors
+    df = pd.DataFrame()
+    for one_w_train, one_w_test in zip(w_train, w_test):
+        # setup testing function
+        X_test, y_test = preprocessing(df_test, w_percent=one_w_test)
+        # extract noisy data
+        X_train, y_train = preprocessing(df_train, w_percent=one_w_train)
+        for e in evaluation_setups:
+            regressor = e.regressor
+            regressor.fit(X_train, y_train)
+            y_pred = regressor.predict(X_test)
+            # save results to a dataframe
+            errors = np.abs(y_pred - y_test)
+            errors = errors.reshape(len(errors), 1)
+            current_df = DataFrame(errors * 100,
+                                   columns=["Errors"])
+            current_df["Method"] = e.name
+            current_df["noise added [sigma %]"] = int(one_w_test * 100)
+            df = pd.concat([df, current_df], ignore_index=True)
+
+    return df
+
+
+def standard_plotting(df_train, w_train, df_test, w_test,
+                      evaluation_setups=None, preprocessing=None,
+                      xytext_position=None):
+    if evaluation_setups is None:
+        evaluation_setups = standard_evaluation_setups
+    if xytext_position is None:
+        xytext_position = (2, 15)
+
+    plt.figure()
+
+    # evaluate the data which is used to generate the plot
+    df = evaluate_data(df_train, w_train, df_test, w_test,
+                       evaluation_setups, preprocessing)
+
+    # group it by method and noise level and get description on the errors
+    df = df.groupby(['Method', 'noise added [sigma %]']).describe()
+    # get the error description in the rows:
+    df = df.unstack(-1)
+    # get rid of multiindex by dropping "Error" level
+    df.columns = df.columns.droplevel(0)
+
+    # iterate over methods to plot linegraphs with error tube
+    # probably this can be done nicer, but no idea how exactly
+    for e in evaluation_setups:
+        df_method = df.loc[e.name]
+        plt.plot(df_method.index, df_method["50%"],
+                 color=e.color, label=e.name)
+        plt.fill_between(df_method.index,
+                         df_method["25%"], df_method["75%"],
+                         facecolor=e.color, edgecolor=e.color,
+                         alpha=e.alpha)
+    plt.xlabel("noise added [sigma %]")
+    plt.ylabel("absolute error [%]")
+    # add annotation:
+    median_proposed = df.T["Proposed", 0].loc["50%"]
+    plt.gca().annotate('error: ' +
+                       "{0:.1f}".format(median_proposed) + "%",
+                       xy=(0, median_proposed), xytext=xytext_position,
+                       va="center", ha="center",
+                       bbox=dict(boxstyle="round4",
+                                 fc=sns.xkcd_rgb["medium green"],
+                                 alpha=0.5),
+                       arrowprops=dict(arrowstyle="-|>",
+                                       connectionstyle="arc3,rad=-0.2",
+                                       fc="w"))
+    # tidy up the plot
+    plt.ylim((0, 20))
+    plt.xlim((0, 15))
+    plt.legend()
 
 
 if __name__ == '__main__':
-    # root folder there the data lies
+    logging.basicConfig(filename='log/in_silico_plots_' +
+                                 str(datetime.datetime.now()) +
+                                 '.log', level=logging.INFO)
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.INFO)
+    logger = logging.getLogger()
+    logger.addHandler(ch)
     luigi.interface.setup_interface_logging()
+
     sch = luigi.scheduler.CentralPlannerScheduler()
     w = luigi.worker.Worker(scheduler=sch)
-    w.add(WrongNoisePlots(which="colon_muscle"))
-    # w.add(WrongNoisePlots(which="generic"))
-    w.add(TrainingSamplePlots(which="colon_muscle"))
-    w.add(TrainingSamplePlots(which="generic"))
-    w.add(NoisePlots(which="colon_muscle"))
-    w.add(DomainNoisePlot())
-    w.add(NoisePlotsBVF(which="colon_muscle"))
-    # w.add(NoisePlots(which="generic"))
-    w.add(NewDAPlot(source_train_set="ipcai_less_generic",
-                    target_train_set="ipcai_colon_muscle_train",
-                    target_test_set="ipcai_colon_muscle_test"))
+    w.add(TrainingSamplePlot(which_train="ipcai_revision_colon_train",
+                             which_test="ipcai_revision_colon_test"))
+    w.add(NoisePlot(which_train="ipcai_revision_colon_train",
+                    which_test="ipcai_revision_colon_test"))
+    w.add(WrongNoisePlot(which_train="ipcai_revision_colon_train",
+                         which_test="ipcai_revision_colon_test"))
+    # Set a different testing domain to evaluate domain sensitivity
+    w.add(NoisePlot(which_train="ipcai_revision_colon_train",
+                    which_test="ipcai_revision_generic"))
+    w.add(VhbPlot(which_train="ipcai_revision_colon_train",
+                  which_test="ipcai_revision_colon_test"))
     w.run()
-