diff --git a/Modules/Biophotonics/python/iMC/regression/linear.py b/Modules/Biophotonics/python/iMC/regression/linear.py index e25e70c687..0905b513cc 100644 --- a/Modules/Biophotonics/python/iMC/regression/linear.py +++ b/Modules/Biophotonics/python/iMC/regression/linear.py @@ -1,125 +1,125 @@ ''' Created on Oct 19, 2015 @author: wirkert ''' import numpy as np from scipy.interpolate import interp1d from mc.usuag import get_haemoglobin_extinction_coefficients class LinearSaO2Unmixing(object): ''' classdocs ''' def __init__(self, use_LCTF=False): eHb02 = 0 eHb = 0 if use_LCTF: # oxygenated haemoglobin extinction coefficients eHbO2 = np.array([34772.8, 27840.93333, 23748.8 , 21550.8 , 21723.46667, 28064.8 , 39131.73333, 45402.93333, 42955.06667, 40041.73333, 42404.4 , 36333.6 , 22568.26667, 6368.933333, 1882.666667, 1019.333333, 664.6666667, 473.3333333, 376.5333333, 327.2 , 297.0666667],) # deoxygenated haemoglobin extinction coefficients eHb = [18031.73333 , 15796.8 , 17365.33333 , 21106.53333 , 26075.06667 , 32133.2 , 39072.66667 , 46346.8 , 51264 , 50757.33333 , 45293.33333 , 36805.46667 , 26673.86667 , 17481.73333 , 10210.13333 , 7034 , 5334.533333 , 4414.706667 , 3773.96 , 3257.266667 , 2809.866667] else: window_size = 6 eHbO2_map, eHb_map = get_haemoglobin_extinction_coefficients() parse_wavelengths = np.arange(450, 720, 2) * 10 ** -9 eHbO2_parsed = eHbO2_map(parse_wavelengths) eHb_parsed = eHb_map(parse_wavelengths) eHbO2 = np.convolve(eHbO2_parsed, np.ones(window_size, dtype=float) / float(window_size), mode="same") eHb = np.convolve(eHb_parsed, np.ones(window_size, dtype=float) / float(window_size), mode="same") eHbO2_map2 = interp1d(parse_wavelengths, eHbO2) eHb_map2 = interp1d(parse_wavelengths, eHb) wavelengths = np.arange(470, 680, 10) * 10 ** -9 eHbO2 = eHbO2_map2(wavelengths) eHb = eHb_map2(wavelengths) nr_total_wavelengths = len(eHb) # to account for scattering losses we allow a constant offset scattering = np.ones(nr_total_wavelengths) # put eHbO2, eHb and scattering term in one measurement matrix self.H = np.vstack((eHbO2, eHb, scattering)).T self.selected_features = np.arange(0, nr_total_wavelengths, 1) def set_selected_features(self, selected_features): """ Parameters: selected_features: index array with indices of selected features. Example np.array([0, 3, 19]) would select the 1st, 4th, and 19th feature. Will be initialized by default to all features. """ self.selected_features = selected_features - def fit(self, X, y): + def fit(self, X, y, weights=None): """only implemented to fit to the standard sklearn framework.""" pass def predict(self, X): """predict like in sklearn: Parameters: X: nrsamples x nr_features matrix of samples to predict for regression Returns: y: array of shape [nr_samples] with values for predicted oxygenation """ # sortout not selected features H_sel = self.H[self.selected_features, :] X_sel = X[:, self.selected_features] # do least squares estimation oxy_test, deoxy, s = np.linalg.lstsq(H_sel, X_sel.T)[0] # calculate oxygenation = oxygenated blood / total blood saO2 = oxy_test / (oxy_test + deoxy) return np.clip(saO2, 0., 1.) diff --git a/Modules/Biophotonics/python/iMC/script_analyze_da_in_silico.py b/Modules/Biophotonics/python/iMC/script_analyze_da_in_silico.py new file mode 100644 index 0000000000..383a925ce3 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/script_analyze_da_in_silico.py @@ -0,0 +1,289 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Aug 14 11:09:18 2015 + +@author: wirkert +""" + +# import everything as done in IPCAI in silico evaluation +from script_analyze_ipcai_in_silico import * +# additionally we need the weights estimation functionality +from regression.domain_adaptation import estimate_weights_random_forests + +# also the folder has changed there we store the data. +sp.ROOT_FOLDER = "/media/wirkert/data/Data/" + \ + "2016_02_22_DA_in_silico" + + +class WeightedBatch(luigi.Task): + which_source = luigi.Parameter() + which_target = luigi.Parameter() + + def requires(self): + return tasks_mc.CameraBatch(self.which_source), \ + tasks_mc.CameraBatch(self.which_target) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + "adapted_" + + self.which_source + + "_with_" + self.which_target + + ".txt")) + + def run(self): + # get data + df_source = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_target = pd.read_csv(self.input()[1].path, header=[0, 1]) + + # first extract X_source and X_target, preprocessed at standard noise + # level + X_source, y_source = preprocess(df_source, w_percent=w_standard) + X_target, y_target = preprocess(df_target, w_percent=w_standard) + + # train a classifier to determine probability for specific class + weights = estimate_weights_random_forests(X_source, X_target, X_source) + # add weight to dataframe + df_source["weights"] = weights + + # finally save the dataframe with the added weights + df_source.to_csv(self.output().path, index=False) + + +class DaNoisePlot(luigi.Task): + """ + Very similar to NoisePlot in IPCAI in silico evaluation but with + weighted data coming in. + """ + which_train = luigi.Parameter() + which_test = luigi.Parameter() + + def requires(self): + return WeightedBatch(self.which_train, self.which_test), \ + tasks_mc.CameraBatch(self.which_test) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "noise_da_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".png")) + + def run(self): + # get data + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + + df = evaluate_data(df_train, noise_levels, df_test, noise_levels) + standard_plotting(df) + + # finally save the figure + plt.savefig(self.output().path, dpi=500, + bbox_inches='tight') + + +class GeneratingDistributionPlot(luigi.Task): + which_source = luigi.Parameter() + which_target = luigi.Parameter() + + def requires(self): + return WeightedBatch(self.which_source, self.which_target), \ + tasks_mc.CameraBatch(self.which_target) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "generating_distribution_" + + self.which_source + + "_adapted_to_" + + self.which_target + + ".png")) + + def run(self): + # get data + df_source = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_target = pd.read_csv(self.input()[1].path, header=[0, 1]) + + # create dataframe suited for plotting + # we do a weighted sampling with replacement to be able to create some + # plots there the data distribution is visible. + nr_samples = 100 + # first data weighted by domain adaptation + df_source_adapted = df_source["layer0"].copy() + df_source_adapted["weights"] = df_source["weights"] + df_source_adapted["data"] = "adapted" + df_source_adapted = df_source_adapted.sample(n=nr_samples, replace=True, + weights="weights") + # now original source data + df_source = df_source["layer0"].copy() + df_source["weights"] = 1 # we set the weights here to 1 + df_source["data"] = "source" + df_source = df_source.sample(n=nr_samples, replace=True, + weights="weights") + # now the target data + df_target = df_target["layer0"] + df_target["weights"] = 1 + df_target["data"] = "target" + df_target = df_target.sample(n=nr_samples, replace=True, + weights="weights") + # now merge all three dataframes to the dataframe used for the plotting + df = pd.concat([df_source, df_source_adapted, df_target]) + # since we already sampled we can get rid of weights + df.drop("weights", axis=1, inplace=True) + # d to um + df["d"] *= 10**6 + # vhb and sao2 to % + df["vhb"] *= 100 + df["sao2"] *= 100 + + # do some serious plotting + g = sns.pairplot(df, vars=["vhb", "sao2", "d"], + hue="data", markers=["o", "s", "D"]) + + # tidy up plot + g.add_legend() + + # finally save the figure + plt.savefig(self.output().path + ".png", dpi=500, + bbox_inches='tight') + + +class WeightDistributionPlot(luigi.Task): + which_source = luigi.Parameter() + which_target = luigi.Parameter() + + def requires(self): + return WeightedBatch(self.which_source, self.which_target) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "weight_distribution_" + + self.which_source + + "_adapted_to_" + + self.which_target + + ".png")) + + def run(self): + # get data + df_source = pd.read_csv(self.input().path, header=[0, 1]) + + df_source["weights"].plot.hist(bins=100) + plt.axvline(x=1, ymin=0, ymax=df_source.shape[0]) + # TODO: add cumsum on top + + # finally save the figure + plt.savefig(self.output().path, dpi=500, + bbox_inches='tight') + + +class FeatureDistributionPlot(luigi.Task): + which_source = luigi.Parameter() + which_target = luigi.Parameter() + + def requires(self): + return WeightedBatch(self.which_source, self.which_target), \ + tasks_mc.CameraBatch(self.which_target) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "feature_distribution_" + + self.which_source + + "_adapted_to_" + + self.which_target + + ".png")) + + def run(self): + # get data + df_source = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_target = pd.read_csv(self.input()[1].path, header=[0, 1]) + + df_f_source = format_dataframe_for_distribution_plotting(df_source) + df_f_target = format_dataframe_for_distribution_plotting(df_target) + df_f_adapted = format_dataframe_for_distribution_plotting(df_source, + weights=df_source["weights"].values.squeeze()) + + # build a combined df + df_f_source["data"] = "source" + df_f_target["data"] = "target" + df_f_adapted["data"] = "adapted" + df = pd.concat([df_f_source, df_f_target, df_f_adapted]) + + # do the plotting + grid = sns.FacetGrid(df, col="w", hue="data", col_wrap=3, size=1.5) + grid.map(plt.plot, "bins", "frequency") + + # tidy up plot + grid.fig.tight_layout(w_pad=1) + grid.add_legend() + grid.set(xticks=(0, 1)) + + # finally save the figure + plt.savefig(self.output().path, dpi=500) + + +def format_dataframe_for_distribution_plotting(df, weights=None): + if weights is None: + weights = np.ones(df.shape[0]) + + bins = np.arange(0, 1, 0.01) + + # we're only interested in reflectance information + df_formatted = df.loc[:, "reflectances"] + # to [nm] for plotting + df_formatted.rename(columns=lambda x: float(x)*10**9, inplace=True) + + # transform data to a histogram + df_formatted = df_formatted.apply(lambda x: + pd.Series(np.histogram(x, bins=bins, + weights=weights, + normed=True)[0]), + axis=0) + # convert to long form using bins as identifier + df_formatted["bins"] = bins[1:] + df_formatted = pd.melt(df_formatted, id_vars=["bins"], + var_name="w", value_name="frequency") + + return df_formatted + + +if __name__ == '__main__': + logging.basicConfig(filename='log/da_in_silico_plots_' + + str(datetime.datetime.now()) + + '.log', level=logging.INFO) + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + logger = logging.getLogger() + logger.addHandler(ch) + luigi.interface.setup_interface_logging() + + source_domain = "ipcai_revision_generic" + target_domain = "ipcai_revision_colon_test" + + sch = luigi.scheduler.CentralPlannerScheduler() + w = luigi.worker.Worker(scheduler=sch) + # check how the graph looks with same domains for training and testing + w.add(DaNoisePlot(which_train="ipcai_revision_colon_train", + which_test="ipcai_revision_colon_test")) + # check how the graph looks without domain adaptation + w.add(NoisePlot(which_train=source_domain, which_test=target_domain)) + # Set a different testing domain to evaluate domain sensitivity + w.add(DaNoisePlot(which_train=source_domain, which_test=target_domain)) + + w.add(WeightDistributionPlot(which_source=source_domain, + which_target=target_domain)) + w.add(FeatureDistributionPlot(which_source=source_domain, + which_target=target_domain)) + # also plot distributions for equal domains to check for errors in data + w.add(FeatureDistributionPlot(which_source="ipcai_revision_colon_train", + which_target="ipcai_revision_colon_test")) + # plot how the generating model data (e.g. sao2 and vhb) is distributed + w.add(GeneratingDistributionPlot(which_source=source_domain, + which_target=target_domain)) + w.run() diff --git a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py index 2584732b68..25be6462a0 100644 --- a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py +++ b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py @@ -1,326 +1,336 @@ # -*- coding: utf-8 -*- """ Created on Fri Aug 14 11:09:18 2015 @author: wirkert """ import os import logging import datetime from collections import namedtuple import numpy as np import pandas as pd from pandas import DataFrame import luigi import matplotlib.pyplot as plt from sklearn.ensemble.forest import RandomForestRegressor import seaborn as sns import tasks_mc import scriptpaths as sp from regression.preprocessing import preprocess, preprocess2 from regression.linear import LinearSaO2Unmixing sp.ROOT_FOLDER = "/media/wirkert/data/Data/" + \ "2016_02_22_IPCAI_revision_in_silico" sp.FINALS_FOLDER = "finals" sp.RECORDED_WAVELENGTHS = np.arange(470, 680, 10) * 10 ** -9 sp.MC_DATA_FOLDER = "mc_data" w_standard = 0.1 # for this evaluation we add 10% noise sns.set_style("whitegrid") # sns.set_context("poster") # setup standard random forest rf = RandomForestRegressor(10, min_samples_leaf=10, max_depth=9, n_jobs=-1) EvaluationStruct = namedtuple("EvaluationStruct", - "name regressor color alpha") + "name regressor") # standard evaluation setup standard_evaluation_setups = [EvaluationStruct("Linear Beer-Lambert", - LinearSaO2Unmixing(), - sns.xkcd_rgb["pale red"], 0.5) - , EvaluationStruct("Proposed", rf, - sns.xkcd_rgb["medium green"], 0.5)] + LinearSaO2Unmixing()) + , EvaluationStruct("Proposed", rf)] + +# color palette +my_colors = [sns.xkcd_rgb["pale red"], sns.xkcd_rgb["medium green"]] # standard noise levels noise_levels = np.arange(0.00, 0.18, 0.02) class TrainingSamplePlot(luigi.Task): which_train = luigi.Parameter() which_test = luigi.Parameter() def requires(self): return tasks_mc.CameraBatch(self.which_train), \ tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, - "sample_plot_train_" + self.which_train + - "_test_" + self.which_test + ".png")) + "sample_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".png")) def run(self): # get data df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) # for this plot we write a custom evaluation function as it is built # a little different # create a new dataframe which will hold all the generated errors df = pd.DataFrame() nr_training_samples = np.arange(10, 15060, 50).astype(int) # not very pythonic, don't care for n in nr_training_samples: X_test, y_test = preprocess(df_test, w_percent=w_standard) # only take n samples for training X_train, y_train = preprocess(df_train, nr_samples=n, w_percent=w_standard) regressor = rf regressor.fit(X_train, y_train) y_pred = regressor.predict(X_test) # save results to a dataframe errors = np.abs(y_pred - y_test) errors = errors.reshape(len(errors), 1) current_df = DataFrame(errors * 100, columns=["Errors"]) current_df["Method"] = "Proposed" current_df["Number Samples"] = n df = pd.concat([df, current_df], ignore_index=True) logging.info( "Finished training classifier with {0} samples".format( - str(n))) + str(n))) df = df.groupby("Number Samples").describe() # get the error description in the rows: df = df.unstack(-1) # get rid of multiindex by dropping "Error" level df.columns = df.columns.droplevel(0) + plt.figure() plt.plot(df.index, df["50%"], color=sns.xkcd_rgb["medium green"]) # tidy up the plot plt.xlabel("number of training samples") plt.ylabel("absolute error [%]") - plt.ylim((0, 15)) - plt.xlim((0, 15)) + plt.ylim((0, 20)) + plt.xlim((0, 15000)) # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') class VhbPlot(luigi.Task): which_train = luigi.Parameter() which_test = luigi.Parameter() def requires(self): return tasks_mc.CameraBatch(self.which_train), \ tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, - "vhb_noise_plot_train_" + self.which_train + - "_test_" + self.which_test + ".png")) + "vhb_noise_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".png")) @staticmethod def preprocess_vhb(batch, nr_samples=None, w_percent=None, magnification=None, bands_to_sortout=None): """ For evaluating vhb we extract labels for vhb instead of sao2""" X, y = preprocess2(batch, nr_samples, w_percent, magnification, bands_to_sortout) return X, y["vhb"] def run(self): # get data df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) # for vhb we only evaluate the proposed method since the linear # beer-lambert is not applicable - evaluation_setups = [EvaluationStruct("Proposed", rf, - sns.xkcd_rgb["medium green"], - 0.5)] - standard_plotting(df_train, noise_levels, df_test, noise_levels, - evaluation_setups=evaluation_setups, - preprocessing=self.preprocess_vhb, + evaluation_setups = [EvaluationStruct("Proposed", rf)] + df = evaluate_data(df_train, noise_levels, df_test, noise_levels, + evaluation_setups=evaluation_setups, + preprocessing=self.preprocess_vhb, ) + standard_plotting(df, color_palette = sns.xkcd_rgb["medium green"], xytext_position=(2, 3)) plt.ylim((0, 4)) # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') class NoisePlot(luigi.Task): which_train = luigi.Parameter() which_test = luigi.Parameter() def requires(self): return tasks_mc.CameraBatch(self.which_train), \ tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, - "noise_plot_train_" + self.which_train + - "_test_" + self.which_test + ".png")) + "noise_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".png")) def run(self): # get data df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) - standard_plotting(df_train, noise_levels, df_test, noise_levels) + df = evaluate_data(df_train, noise_levels, df_test, noise_levels) + standard_plotting(df) # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') class WrongNoisePlot(luigi.Task): which_train = luigi.Parameter() which_test = luigi.Parameter() def requires(self): return tasks_mc.CameraBatch(self.which_train), \ tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, - "wrong_noise_plot_train_" + self.which_train + - "_test_" + self.which_test + ".png")) + "wrong_noise_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".png")) def run(self): # get data df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) # do same as in NoisePlot but with standard noise input - standard_plotting(df_train, np.ones_like(noise_levels) * w_standard, - df_test, noise_levels) + df = evaluate_data(df_train, np.ones_like(noise_levels) * w_standard, + df_test, noise_levels) + standard_plotting(df) # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') def evaluate_data(df_train, w_train, df_test, w_test, evaluation_setups=None, preprocessing=None): """ Our standard method to evaluate the data. It will fill a DataFrame df which saves the errors for each evaluated setup""" if evaluation_setups is None: evaluation_setups = standard_evaluation_setups if preprocessing is None: preprocessing = preprocess + if "weights" in df_train: + weights = df_train["weights"].as_matrix().squeeze() + else: + weights = np.ones(df_train.shape[0]) # create a new dataframe which will hold all the generated errors df = pd.DataFrame() for one_w_train, one_w_test in zip(w_train, w_test): # setup testing function X_test, y_test = preprocessing(df_test, w_percent=one_w_test) # extract noisy data X_train, y_train = preprocessing(df_train, w_percent=one_w_train) for e in evaluation_setups: regressor = e.regressor - regressor.fit(X_train, y_train) + regressor.fit(X_train, y_train, weights) y_pred = regressor.predict(X_test) # save results to a dataframe errors = np.abs(y_pred - y_test) errors = errors.reshape(len(errors), 1) current_df = DataFrame(errors * 100, columns=["Errors"]) current_df["Method"] = e.name current_df["noise added [sigma %]"] = int(one_w_test * 100) df = pd.concat([df, current_df], ignore_index=True) return df -def standard_plotting(df_train, w_train, df_test, w_test, - evaluation_setups=None, preprocessing=None, - xytext_position=None): - if evaluation_setups is None: - evaluation_setups = standard_evaluation_setups +def standard_plotting(df, color_palette=None, xytext_position=None): + if color_palette is None: + color_palette = my_colors if xytext_position is None: xytext_position = (2, 15) plt.figure() - # evaluate the data which is used to generate the plot - df = evaluate_data(df_train, w_train, df_test, w_test, - evaluation_setups, preprocessing) - # group it by method and noise level and get description on the errors - df = df.groupby(['Method', 'noise added [sigma %]']).describe() + df_statistics = df.groupby(['Method', 'noise added [sigma %]']).describe() # get the error description in the rows: - df = df.unstack(-1) + df_statistics = df_statistics.unstack(-1) # get rid of multiindex by dropping "Error" level - df.columns = df.columns.droplevel(0) + df_statistics.columns = df_statistics.columns.droplevel(0) # iterate over methods to plot linegraphs with error tube # probably this can be done nicer, but no idea how exactly - for e in evaluation_setups: - df_method = df.loc[e.name] + + for color, method in zip( + color_palette, df_statistics.index.get_level_values("Method").unique()): + df_method = df_statistics.loc[method] plt.plot(df_method.index, df_method["50%"], - color=e.color, label=e.name) - plt.fill_between(df_method.index, - df_method["25%"], df_method["75%"], - facecolor=e.color, edgecolor=e.color, - alpha=e.alpha) + color=color, label=method) + plt.fill_between(df_method.index, df_method["25%"], df_method["75%"], + facecolor=color, edgecolor=color, + alpha=0.5) plt.xlabel("noise added [sigma %]") plt.ylabel("absolute error [%]") # add annotation: - median_proposed = df.T["Proposed", 0].loc["50%"] + median_proposed = df_statistics.T["Proposed", 0].loc["50%"] plt.gca().annotate('error: ' + "{0:.1f}".format(median_proposed) + "%", xy=(0, median_proposed), xytext=xytext_position, va="center", ha="center", bbox=dict(boxstyle="round4", fc=sns.xkcd_rgb["medium green"], alpha=0.5), arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=-0.2", fc="w")) # tidy up the plot plt.ylim((0, 20)) plt.xlim((0, 15)) plt.legend() if __name__ == '__main__': logging.basicConfig(filename='log/in_silico_plots_' + str(datetime.datetime.now()) + '.log', level=logging.INFO) ch = logging.StreamHandler() ch.setLevel(logging.INFO) logger = logging.getLogger() logger.addHandler(ch) luigi.interface.setup_interface_logging() sch = luigi.scheduler.CentralPlannerScheduler() w = luigi.worker.Worker(scheduler=sch) w.add(TrainingSamplePlot(which_train="ipcai_revision_colon_train", which_test="ipcai_revision_colon_test")) w.add(NoisePlot(which_train="ipcai_revision_colon_train", which_test="ipcai_revision_colon_test")) w.add(WrongNoisePlot(which_train="ipcai_revision_colon_train", which_test="ipcai_revision_colon_test")) # Set a different testing domain to evaluate domain sensitivity w.add(NoisePlot(which_train="ipcai_revision_colon_train", which_test="ipcai_revision_generic")) w.add(VhbPlot(which_train="ipcai_revision_colon_train", which_test="ipcai_revision_colon_test")) w.run() diff --git a/Modules/Biophotonics/python/iMC/script_calculate_spectra.py b/Modules/Biophotonics/python/iMC/script_calculate_spectra.py index e8a9c310c5..a32e77545c 100644 --- a/Modules/Biophotonics/python/iMC/script_calculate_spectra.py +++ b/Modules/Biophotonics/python/iMC/script_calculate_spectra.py @@ -1,136 +1,133 @@ ''' Created on Sep 9, 2015 @author: wirkert ''' import logging import datetime import os import time import numpy as np import luigi import scriptpaths as sp import mc.factories as mcfac from mc.sim import SimWrapper, get_diffuse_reflectance # parameter setting NR_BATCHES = 100 NR_ELEMENTS_IN_BATCH = 1000 # the wavelengths to be simulated WAVELENGHTS = np.arange(450, 720, 2) * 10 ** -9 NR_PHOTONS = 10 ** 6 # general output path config sp.ROOT_FOLDER = \ "/media/wirkert/data/Data/2016_02_22_IPCAI_revision_in_silico" sp.RESULTS_FOLDER = "mc_data" # experiment configuration MCI_FILENAME = "./temp.mci" MCO_FILENAME = "temp.mco" # this path definitly needs to be adapted by you PATH_TO_MCML = "/home/wirkert/workspace/monteCarlo/gpumcml/fast-gpumcml/" EXEC_MCML = "gpumcml.sm_20" class CreateSpectraTask(luigi.Task): df_prefix = luigi.Parameter() batch_nr = luigi.IntParameter() nr_samples = luigi.IntParameter() factory = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, self.df_prefix + "_" + str(self.batch_nr) + ".txt")) def run(self): start = time.time() # setup simulation wrapper self.sim_wrapper = SimWrapper() self.sim_wrapper.set_mci_filename(MCI_FILENAME) self.sim_wrapper.set_mcml_executable(PATH_TO_MCML + EXEC_MCML) # setup model self.tissue_model = self.factory.create_tissue_model() self.tissue_model.set_mci_filename(self.sim_wrapper.mci_filename) self.tissue_model.set_mco_filename(MCO_FILENAME) self.tissue_model.set_nr_photons(NR_PHOTONS) # setup array in which data shall be stored batch = self.factory.create_batch_to_simulate() batch.create_parameters(self.nr_samples) # dataframe created by batch: df = batch.df # add reflectance column to dataframe for w in WAVELENGHTS: df["reflectances", w] = np.NAN # for each instance of our tissue model for i in range(df.shape[0]): self.tissue_model.set_dataframe_element(df, i) logging.info("running simulation " + str(i) + " for\n" + str(self.tissue_model)) start = time.time() # map the _wavelengths array to reflectance list reflectances = map(self.wavelength_to_reflectance, WAVELENGHTS) end = time.time() # store in dataframe for r, w in zip(reflectances, WAVELENGHTS): df["reflectances", w][i] = r # success! logging.info("successfully ran simulation in " + "{:.2f}".format(end - start) + " seconds") # clean up temporarily created files os.remove(MCI_FILENAME) created_mco_file = PATH_TO_MCML + "/" + MCO_FILENAME if os.path.isfile(created_mco_file): os.remove(created_mco_file) # save the created output f = open(self.output().path, 'w') df.to_csv(f) end = time.time() logging.info("time for creating batch of mc data: %.f s" % (end - start)) def wavelength_to_reflectance(self, wavelength): """helper function to determine the reflectance for a given wavelength using the current model and simulation """ self.tissue_model.set_wavelength(wavelength) self.tissue_model.create_mci_file() if os.path.isfile(PATH_TO_MCML + EXEC_MCML): self.sim_wrapper.run_simulation() return get_diffuse_reflectance(PATH_TO_MCML + MCO_FILENAME) else: raise IOError("path to gpumcml not valid") if __name__ == '__main__': logging.basicConfig(filename='log/calculate_spectra_' + str(datetime.datetime.now()) + '.log', level=logging.INFO) ch = logging.StreamHandler() ch.setLevel(logging.INFO) logger = logging.getLogger() logger.addHandler(ch) luigi.interface.setup_interface_logging() sch = luigi.scheduler.CentralPlannerScheduler() w = luigi.worker.Worker(scheduler=sch) BATCH_NUMBERS = np.arange(0, NR_BATCHES, 1) for i in BATCH_NUMBERS: - colon_task = CreateSpectraTask("ipcai_revision_colon_train", i, + colon_task = CreateSpectraTask("ipcai_revision_colon_mean_scattering", + i, NR_ELEMENTS_IN_BATCH, - mcfac.ColonMuscleMcFactory()) - generic_task = CreateSpectraTask("ipcai_revision_generic", i, - NR_ELEMENTS_IN_BATCH, - mcfac.GenericMcFactory()) + mcfac.ColonMuscleMeanScatteringFactory()) w.add(colon_task) - w.add(generic_task) w.run()