diff --git a/Modules/Biophotonics/python/iMC/scripts/da/__init__.py b/Modules/Biophotonics/python/iMC/scripts/da/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Modules/Biophotonics/python/iMC/scripts/da/script_analyze_da_in_silico.py b/Modules/Biophotonics/python/iMC/scripts/da/script_analyze_da_in_silico.py new file mode 100644 index 0000000000..34baa658bf --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/da/script_analyze_da_in_silico.py @@ -0,0 +1,357 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Aug 14 11:09:18 2015 + +@author: wirkert +""" + +import seaborn as sns + +# import everything as done in IPCAI in silico evaluation +from ipcai2016.script_analyze_ipcai_in_silico import * +# additionally we need the weights estimation functionality +from regression.domain_adaptation import estimate_weights_random_forests + +# also the folder has changed there we store the data. +sp.ROOT_FOLDER = "/media/wirkert/data/Data/" + \ + "2016_02_22_DA_in_silico" + + +class WeightedBatch(luigi.Task): + which_source = luigi.Parameter() + which_target = luigi.Parameter() + noise = luigi.FloatParameter() + + def requires(self): + return tasks_mc.CameraBatch(self.which_source), \ + tasks_mc.CameraBatch(self.which_target) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + "adapted_" + + self.which_source + + "_with_" + self.which_target + + "_noise_" + str(self.noise) + + ".txt")) + + def run(self): + # get data + df_source = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_target = pd.read_csv(self.input()[1].path, header=[0, 1]) + + # first extract X_source and X_target, preprocessed at standard noise + # level + X_source, y_source = preprocess(df_source, w_percent=w_standard) + X_target, y_target = preprocess(df_target, w_percent=w_standard) + + # train a classifier to determine probability for specific class + weights = estimate_weights_random_forests(X_source, X_target, X_source) + # add weight to dataframe + df_source["weights"] = weights + + # finally save the dataframe with the added weights + df_source.to_csv(self.output().path, index=False) + + +class DaNoisePlot(luigi.Task): + """ + Very similar to NoisePlot in IPCAI in silico evaluation but with + weighted data coming in. + """ + which_train = luigi.Parameter() + which_test = luigi.Parameter() + + def requires(self): + # for each noise level we need to create the weights + NecessaryBatches = map(lambda noise: WeightedBatch(self.which_train, + self.which_test, + noise), + noise_levels) + return NecessaryBatches(self.which_train, self.which_test), \ + tasks_mc.CameraBatch(self.which_test) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "noise_da_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".png")) + + def run(self): + # get data + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + + df = evaluate_data(df_train, noise_levels, df_test, noise_levels) + standard_plotting(df) + + # finally save the figure + plt.savefig(self.output().path, dpi=500, + bbox_inches='tight') + + +class GeneratingDistributionPlot(luigi.Task): + which_source = luigi.Parameter() + which_target = luigi.Parameter() + + def requires(self): + return WeightedBatch(self.which_source, self.which_target), \ + tasks_mc.CameraBatch(self.which_target) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "generating_distribution_" + + self.which_source + + "_adapted_to_" + + self.which_target + + ".png")) + + def run(self): + # get data + df_source = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_target = pd.read_csv(self.input()[1].path, header=[0, 1]) + + # create dataframe suited for plotting + # we do a weighted sampling with replacement to be able to create some + # plots there the data distribution is visible. + nr_samples = 100 + # first data weighted by domain adaptation + df_source_adapted = df_source["layer0"].copy() + df_source_adapted["weights"] = df_source["weights"] + df_source_adapted["data"] = "adapted" + df_source_adapted = df_source_adapted.sample(n=nr_samples, replace=True, + weights="weights") + # now original source data + df_source = df_source["layer0"].copy() + df_source["weights"] = 1 # we set the weights here to 1 + df_source["data"] = "source" + df_source = df_source.sample(n=nr_samples, replace=True, + weights="weights") + # now the target data + df_target = df_target["layer0"] + df_target["weights"] = 1 + df_target["data"] = "target" + df_target = df_target.sample(n=nr_samples, replace=True, + weights="weights") + # now merge all three dataframes to the dataframe used for the plotting + df = pd.concat([df_source, df_source_adapted, df_target]) + # since we already sampled we can get rid of weights + df.drop("weights", axis=1, inplace=True) + # d to um + df["d"] *= 10**6 + # vhb and sao2 to % + df["vhb"] *= 100 + df["sao2"] *= 100 + + # do some serious plotting + g = sns.pairplot(df, vars=["vhb", "sao2", "d"], + hue="data", markers=["o", "s", "D"]) + + # tidy up plot + g.add_legend() + + # finally save the figure + plt.savefig(self.output().path, dpi=500, + bbox_inches='tight') + + +class WeightDistributionPlot(luigi.Task): + which_source = luigi.Parameter() + which_target = luigi.Parameter() + + def requires(self): + return WeightedBatch(self.which_source, self.which_target) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "weight_distribution_" + + self.which_source + + "_adapted_to_" + + self.which_target + + ".png")) + + def run(self): + # get data + df_source = pd.read_csv(self.input().path, header=[0, 1]) + + df_source["weights"].plot.hist(bins=100) + plt.axvline(x=1, ymin=0, ymax=df_source.shape[0]) + # TODO: add cumsum on top + + # finally save the figure + plt.savefig(self.output().path, dpi=500, + bbox_inches='tight') + + +class FeatureDistributionPlot(luigi.Task): + which_source = luigi.Parameter() + which_target = luigi.Parameter() + + def requires(self): + return WeightedBatch(self.which_source, self.which_target), \ + tasks_mc.CameraBatch(self.which_target) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "feature_distribution_" + + self.which_source + + "_adapted_to_" + + self.which_target + + ".png")) + + def run(self): + # get data + df_source = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_target = pd.read_csv(self.input()[1].path, header=[0, 1]) + + df_f_source = format_dataframe_for_distribution_plotting(df_source) + df_f_target = format_dataframe_for_distribution_plotting(df_target) + df_f_adapted = format_dataframe_for_distribution_plotting(df_source, + weights=df_source["weights"].values.squeeze()) + + # build a combined df + df_f_source["data"] = "source" + df_f_target["data"] = "target" + df_f_adapted["data"] = "adapted" + df = pd.concat([df_f_source, df_f_target, df_f_adapted]) + + # do the plotting + grid = sns.FacetGrid(df, col="w", hue="data", col_wrap=3, size=1.5) + grid.map(plt.plot, "bins", "frequency") + + # tidy up plot + grid.fig.tight_layout(w_pad=1) + grid.add_legend() + grid.set(xticks=(0, 1)) + + # finally save the figure + plt.savefig(self.output().path, dpi=500) + + +class DAvNormalPlot(luigi.Task): + which_train = luigi.Parameter() + which_test = luigi.Parameter() + which_train_no_covariance_shift = luigi.Parameter() + + def requires(self): + return WeightedBatch(self.which_train, self.which_test), \ + tasks_mc.CameraBatch(self.which_test), \ + tasks_mc.CameraBatch(self.which_train_no_covariance_shift) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + sp.FINALS_FOLDER, + "da_v_normal_train_" + + self.which_train + + "_test_" + self.which_test + + ".png")) + + def run(self): + # get data + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + df_train_no_covariance_shift = pd.read_csv(self.input()[2].path, + header=[0, 1]) + + evaluation_setups = [EvaluationStruct("Proposed", rf)] + # evaluate the different methods + df_adapted = evaluate_data(df_train, noise_levels, + df_test, noise_levels, + evaluation_setups=evaluation_setups) + df_adapted["data"] = "adapted" + df_no_adaptation = evaluate_data( + df_train.drop("weights", axis=1), noise_levels, + df_test, noise_levels, + evaluation_setups=evaluation_setups) + df_no_adaptation["data"] = "source" + df_no_covariance_shift = evaluate_data( + df_train_no_covariance_shift, noise_levels, + df_test, noise_levels, + evaluation_setups=evaluation_setups) + df_no_covariance_shift["data"] = "target" + df = pd.concat([df_adapted, df_no_adaptation, df_no_covariance_shift]) + + # plot it + sns.boxplot(data=df, x="noise added [sigma %]", y="Errors", hue="data", + hue_order=["source", "adapted", "target"], fliersize=0) + # tidy up plot + plt.ylim((0, 40)) + plt.legend(loc='upper left') + + # finally save the figure + plt.savefig(self.output().path, dpi=500) + + +def format_dataframe_for_distribution_plotting(df, weights=None): + if weights is None: + weights = np.ones(df.shape[0]) + + bins = np.arange(0, 1, 0.01) + + # we're only interested in reflectance information + df_formatted = df.loc[:, "reflectances"] + # to [nm] for plotting + df_formatted.rename(columns=lambda x: float(x)*10**9, inplace=True) + + # transform data to a histogram + df_formatted = df_formatted.apply(lambda x: + pd.Series(np.histogram(x, bins=bins, + weights=weights, + normed=True)[0]), + axis=0) + # convert to long form using bins as identifier + df_formatted["bins"] = bins[1:] + df_formatted = pd.melt(df_formatted, id_vars=["bins"], + var_name="w", value_name="frequency") + + return df_formatted + + +if __name__ == '__main__': + logging.basicConfig(filename=os.path.join(sp.LOG_FOLDER, + "da_in_silico_plots" + + str(datetime.datetime.now()) + + '.log'), + level=logging.INFO) + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + logger = logging.getLogger() + logger.addHandler(ch) + luigi.interface.setup_interface_logging() + + source_domain = "ipcai_revision_generic" + target_domain = "ipcai_revision_colon_test" + + sch = luigi.scheduler.CentralPlannerScheduler() + w = luigi.worker.Worker(scheduler=sch) + # check how the graph looks with same domains for training and testing + w.add(DaNoisePlot(which_train="ipcai_revision_colon_train", + which_test="ipcai_revision_colon_test")) + # check how the graph looks without domain adaptation + w.add(NoisePlot(which_train=source_domain, which_test=target_domain)) + # Set a different testing domain to evaluate domain sensitivity + w.add(DaNoisePlot(which_train=source_domain, which_test=target_domain)) + + w.add(WeightDistributionPlot(which_source=source_domain, + which_target=target_domain)) + w.add(FeatureDistributionPlot(which_source=source_domain, + which_target=target_domain)) + # also plot distributions for equal domains to check for errors in data + w.add(FeatureDistributionPlot(which_source="ipcai_revision_colon_mean_scattering_train", + which_target="ipcai_revision_colon_mean_scattering_test")) + # plot how the generating model data (e.g. sao2 and vhb) is distributed + w.add(GeneratingDistributionPlot(which_source=source_domain, + which_target=target_domain)) + w.add(DAvNormalPlot(which_train=source_domain, which_test=target_domain, + which_train_no_covariance_shift="ipcai_revision_colon_train")) + w.run() diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai2016/__init__.py b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_analyze_ipcai_in_silico.py b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_analyze_ipcai_in_silico.py new file mode 100644 index 0000000000..52ba2fe6a7 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_analyze_ipcai_in_silico.py @@ -0,0 +1,333 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Aug 14 11:09:18 2015 + +@author: wirkert +""" + +import os +import logging +import datetime +from collections import namedtuple + +import matplotlib +import numpy as np +import pandas as pd +from pandas import DataFrame +import luigi +import matplotlib.pyplot as plt +from sklearn.ensemble.forest import RandomForestRegressor + +import tasks_mc +from regression.preprocessing import preprocess, preprocess2 +from regression.linear import LinearSaO2Unmixing + +import scriptpaths as sp + +FINALS_FOLDER = "in_silico" +in_silico_results_path = os.path.join(sp.RESULTS_FOLDER, + FINALS_FOLDER) + +sp.RECORDED_WAVELENGTHS = np.arange(470, 680, 10) * 10 ** -9 + +w_standard = 10. # for this evaluation we add 10% noise + +font = {'family' : 'normal', + 'size' : 20} + +matplotlib.rc('font', **font) + + +# setup standard random forest +rf = RandomForestRegressor(10, min_samples_leaf=10, max_depth=9, n_jobs=-1) +EvaluationStruct = namedtuple("EvaluationStruct", + "name regressor") +# standard evaluation setup +standard_evaluation_setups = [EvaluationStruct("Linear Beer-Lambert", + LinearSaO2Unmixing()) + , EvaluationStruct("Proposed", rf)] + +# color palette +my_colors = ["red", "green"] + +# standard noise levels +noise_levels = np.array([1,2,3,4,5,6,7,8,9,10, + 15,20,30,40,50,100,150,200]).astype("float") + + +class TrainingSamplePlot(luigi.Task): + which_train = luigi.Parameter() + which_test = luigi.Parameter() + + def requires(self): + return tasks_mc.CameraBatch(self.which_train), \ + tasks_mc.CameraBatch(self.which_test) + + def output(self): + return luigi.LocalTarget(os.path.join(in_silico_results_path, + "sample_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".pdf")) + + def run(self): + # get data + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + + # for this plot we write a custom evaluation function as it is built + # a little different + + # create a new dataframe which will hold all the generated errors + df = pd.DataFrame() + + nr_training_samples = np.arange(10, 15010, 50).astype(int) + # not very pythonic, don't care + for n in nr_training_samples: + X_test, y_test = preprocess(df_test, snr=w_standard) + # only take n samples for training + X_train, y_train = preprocess(df_train, nr_samples=n, + snr=w_standard) + + regressor = rf + regressor.fit(X_train, y_train) + y_pred = regressor.predict(X_test) + # save results to a dataframe + errors = np.abs(y_pred - y_test) + errors = errors.reshape(len(errors), 1) + current_df = DataFrame(errors * 100, + columns=["Errors"]) + current_df["Method"] = "Proposed" + current_df["Number Samples"] = n / 10**3. + df = pd.concat([df, current_df], ignore_index=True) + logging.info( + "Finished training classifier with {0} samples".format( + str(n))) + + df = df.groupby("Number Samples").describe() + # get the error description in the rows: + df = df.unstack(-1) + # get rid of multiindex by dropping "Error" level + df.columns = df.columns.droplevel(0) + + plt.figure() + plt.plot(df.index, df["50%"], color="green") + + # tidy up the plot + plt.xlabel("number of training samples / 1000") + plt.ylabel("absolute error [%]") + plt.ylim((0, 20)) + plt.xlim((0, 15)) + plt.grid() + + # finally save the figure + plt.savefig(self.output().path, mode="pdf", dpi=500, + bbox_inches='tight') + + +class VhbPlot(luigi.Task): + which_train = luigi.Parameter() + which_test = luigi.Parameter() + + def requires(self): + return tasks_mc.CameraBatch(self.which_train), \ + tasks_mc.CameraBatch(self.which_test) + + def output(self): + return luigi.LocalTarget(os.path.join(in_silico_results_path, + "vhb_noise_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".pdf")) + + @staticmethod + def preprocess_vhb(batch, nr_samples=None, snr=None, + magnification=None, bands_to_sortout=None): + """ For evaluating vhb we extract labels for vhb instead of sao2""" + X, y = preprocess2(batch, nr_samples, snr, + magnification, bands_to_sortout) + + return X, y["vhb"].values + + def run(self): + # get data + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + + # for vhb we only evaluate the proposed method since the linear + # beer-lambert is not applicable + evaluation_setups = [EvaluationStruct("Proposed", rf)] + df = evaluate_data(df_train, noise_levels, df_test, noise_levels, + evaluation_setups=evaluation_setups, + preprocessing=self.preprocess_vhb) + standard_plotting(df, color_palette=["green"], + xytext_position=(2, 3)) + plt.ylim((0, 4)) + + # finally save the figure + plt.savefig(self.output().path, dpi=500, + bbox_inches='tight') + + +class NoisePlot(luigi.Task): + which_train = luigi.Parameter() + which_test = luigi.Parameter() + + def requires(self): + return tasks_mc.CameraBatch(self.which_train), \ + tasks_mc.CameraBatch(self.which_test) + + def output(self): + return luigi.LocalTarget(os.path.join(in_silico_results_path, + "noise_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".pdf")) + + def run(self): + # get data + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + + df = evaluate_data(df_train, noise_levels, df_test, noise_levels) + standard_plotting(df) + + # finally save the figure + plt.savefig(self.output().path, mode="pdf", dpi=500, + bbox_inches='tight') + + +class WrongNoisePlot(luigi.Task): + which_train = luigi.Parameter() + which_test = luigi.Parameter() + train_snr = luigi.FloatParameter() + + def requires(self): + return tasks_mc.CameraBatch(self.which_train), \ + tasks_mc.CameraBatch(self.which_test) + + def output(self): + return luigi.LocalTarget(os.path.join(in_silico_results_path, + str(self.train_snr) + + "_wrong_noise_plot_train_" + + self.which_train + + "_test_" + self.which_test + + ".pdf")) + + def run(self): + # get data + df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) + df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) + + # do same as in NoisePlot but with standard noise input + df = evaluate_data(df_train, + np.ones_like(noise_levels) * self.train_snr, + df_test, noise_levels) + standard_plotting(df) + + # finally save the figure + plt.savefig(self.output().path, mode="pdf", dpi=500, + bbox_inches='tight') + + +def evaluate_data(df_train, w_train, df_test, w_test, + evaluation_setups=None, preprocessing=None): + """ Our standard method to evaluate the data. It will fill a DataFrame df + which saves the errors for each evaluated setup""" + if evaluation_setups is None: + evaluation_setups = standard_evaluation_setups + if preprocessing is None: + preprocessing = preprocess + if ("weights" in df_train) and df_train["weights"].size > 0: + weights = df_train["weights"].as_matrix().squeeze() + else: + weights = np.ones(df_train.shape[0]) + + # create a new dataframe which will hold all the generated errors + df = pd.DataFrame() + for one_w_train, one_w_test in zip(w_train, w_test): + # setup testing function + X_test, y_test = preprocessing(df_test, snr=one_w_test) + # extract noisy data + X_train, y_train = preprocessing(df_train, snr=one_w_train) + for e in evaluation_setups: + regressor = e.regressor + regressor.fit(X_train, y_train, weights) + y_pred = regressor.predict(X_test) + # save results to a dataframe + errors = np.abs(y_pred - y_test) + errors = errors.reshape(len(errors), 1) + current_df = DataFrame(errors * 100, + columns=["Errors"]) + current_df["Method"] = e.name + current_df["SNR"] = int(one_w_test) + df = pd.concat([df, current_df], ignore_index=True) + + return df + + +def standard_plotting(df, color_palette=None, xytext_position=None): + if color_palette is None: + color_palette = my_colors + if xytext_position is None: + xytext_position = (2, 15) + + plt.figure() + + # group it by method and noise level and get description on the errors + df_statistics = df.groupby(['Method', 'SNR']).describe() + # get the error description in the rows: + df_statistics = df_statistics.unstack(-1) + # get rid of multiindex by dropping "Error" level + df_statistics.columns = df_statistics.columns.droplevel(0) + + # iterate over methods to plot linegraphs with error tube + # probably this can be done nicer, but no idea how exactly + + for color, method in zip( + color_palette, df_statistics.index.get_level_values("Method").unique()): + df_method = df_statistics.loc[method] + plt.plot(df_method.index, df_method["50%"], + color=color, label=method) + plt.fill_between(df_method.index, df_method["25%"], df_method["75%"], + facecolor=color, edgecolor=color, + alpha=0.5) + # tidy up the plot + plt.ylim((0, 40)) + plt.gca().set_xticks(np.arange(0, 200, 10), minor=True) + plt.xlabel("SNR") + plt.ylabel("absolute error [%]") + plt.grid() + plt.legend() + + +if __name__ == '__main__': + logging.basicConfig(filename=os.path.join(sp.LOG_FOLDER, + "in_silico_plots_" + + str(datetime.datetime.now()) + + '.log'), + level=logging.INFO) + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + logger = logging.getLogger() + logger.addHandler(ch) + luigi.interface.setup_interface_logging() + + # create a folder for the results if necessary + sp.create_folder_if_necessary(in_silico_results_path) + + train = "ipcai_revision_colon_mean_scattering_train" + test = "ipcai_revision_colon_mean_scattering_test" + + sch = luigi.scheduler.CentralPlannerScheduler() + w = luigi.worker.Worker(scheduler=sch) + w.add(TrainingSamplePlot(which_train=train, which_test=test)) + w.add(NoisePlot(which_train=train, which_test=test)) + w.add(WrongNoisePlot(which_train=train, which_test=test, train_snr=10.)) + w.add(WrongNoisePlot(which_train=train, which_test=test, train_snr=50.)) + w.add(WrongNoisePlot(which_train=train, which_test=test, train_snr=200.)) + # Set a different testing domain to evaluate domain sensitivity + w.add(NoisePlot(which_train=train, + which_test="ipcai_revision_generic_mean_scattering_test")) + w.add(VhbPlot(which_train=train, which_test=test)) + w.run() diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_analyze_ipcai_in_vivo_liver.py b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_analyze_ipcai_in_vivo_liver.py new file mode 100644 index 0000000000..a5bce62852 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_analyze_ipcai_in_vivo_liver.py @@ -0,0 +1,289 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Aug 14 11:09:18 2015 + +@author: wirkert +""" + + +import Image +import ImageEnhance +import logging +import datetime + +import matplotlib.pyplot as plt +import SimpleITK as sitk +import matplotlib + +from msi.io.nrrdreader import NrrdReader +import msi.normalize as norm +from regression.estimation import estimate_image +from tasks_common import * + + +import sklearn.datasets +sklearn.datasets.get_data_home(data_home=None) + +FINALS_FOLDER = "liver" +liver_results_path = os.path.join(sp.RESULTS_FOLDER, FINALS_FOLDER) +data_folder = sp.LIVER_FOLDER + +sp.RECORDED_WAVELENGTHS = \ + np.array([580, 470, 660, 560, 480, 511, 600, 700]) * 10 ** -9 + +font = {'family' : 'normal', + 'size' : 30} +matplotlib.rc('font', **font) + + +class ResultsFile(luigi.Task): + + def output(self): + return luigi.LocalTarget(os.path.join(liver_results_path, + "results.csv")) + + +class OxyAndVhbOverTimeTask(luigi.Task): + + def output(self): + return luigi.LocalTarget(os.path.join(liver_results_path, + "liver_oxy_over_time.pdf")) + + def requires(self): + return ResultsFile() + + def run(self): + df = pd.read_csv(self.input().path, index_col=0) + # determine times from start: + image_name_strings = df["image name"].values + time_strings = map(lambda s: s[ + s.find("2014-08-03_")+11:s.find("2014-08-03_")+19], + image_name_strings) + time_in_s = map(lambda s: int(s[0:2]) * 3600 + + int(s[3:5]) * 60 + + int(s[6:]), time_strings) + df["time since drug delivery [s]"] = np.array(time_in_s) - time_in_s[0] + + # print oxy over time as scatterplot. + ax = df.plot.scatter(x="time since drug delivery [s]", + y="oxygenation mean [%]", + s=100, alpha=0.5, + fontsize=30) + ax.set_xlim((-1, 70)) + + plt.axvline(x=0, ymin=0, ymax=1, linewidth=2) + plt.axvline(x=56, ymin=0, ymax=1, linewidth=2) + ax.annotate('drug delivery', xy=(0, ax.get_ylim()[1]), + xycoords='data', xytext=(0, 0), + fontsize=30, + textcoords='offset points') + ax.annotate('porcine death', xy=(56, ax.get_ylim()[1]), + xycoords='data', xytext=(-100, 0), + fontsize=30, + textcoords='offset points') + ax.yaxis.label.set_size(30) + ax.xaxis.label.set_size(30) + plt.grid() + + df.to_csv(self.input().path) + + # create and save vhb plot + plt.savefig(self.output().path, + dpi=250, bbox_inches='tight', mode="pdf") + + # print vhb over time as scatterplot. + ax = df.plot.scatter(x="time since drug delivery [s]", + y="blood volume fraction mean [%]", + s=100, alpha=0.5, + fontsize=30) + ax.set_xlim((-1, 70)) + + plt.axvline(x=0, ymin=0, ymax=1, linewidth=2) + plt.axvline(x=56, ymin=0, ymax=1, linewidth=2) + ax.annotate('drug delivery', xy=(0, ax.get_ylim()[1]), + xycoords='data', xytext=(0, 0), + fontsize=30, + textcoords='offset points') + ax.annotate('porcine death', xy=(56, ax.get_ylim()[1]), + xycoords='data', xytext=(-100, 0), + fontsize=30, + textcoords='offset points') + ax.yaxis.label.set_size(30) + ax.xaxis.label.set_size(30) + plt.grid() + + plt.savefig(self.output().path + "_vhb_mean.pdf", + dpi=250, bbox_inches='tight', mode="pdf") + + +class IPCAICreateOxyImageTask(luigi.Task): + image_name = luigi.Parameter() + df_prefix = luigi.Parameter() + + def requires(self): + return IPCAITrainRegressor(df_prefix=self.df_prefix), \ + Flatfield(flatfield_folder=sp.FLAT_FOLDER), \ + SingleMultispectralImage(image=self.image_name), \ + Dark(dark_folder=sp.DARK_FOLDER) + + def output(self): + return luigi.LocalTarget(os.path.join(liver_results_path, + self.image_name + "_" + + self.df_prefix + + "_oxy_summary" + ".png")) + + def run(self): + nrrd_reader = NrrdReader() + tiff_ring_reader = TiffRingReader() + # read the flatfield + flat = nrrd_reader.read(self.input()[1].path) + dark = nrrd_reader.read(self.input()[3].path) + # read the msi + nr_filters = len(sp.RECORDED_WAVELENGTHS) + msi, segmentation = tiff_ring_reader.read(self.input()[2].path, + nr_filters) + + # only take into account not saturated pixels. + segmentation = np.max(msi.get_image(), axis=-1) < 2000. + + # read the regressor + e_file = open(self.input()[0].path, 'r') + e = pickle.load(e_file) + + # correct image setup + position_filter_nr_in_string = self.image_name.find(" 2014") - 1 + filter_nr = int(self.image_name[ + position_filter_nr_in_string:position_filter_nr_in_string+1]) + original_order = np.arange(nr_filters) + new_image_order = np.concatenate(( + original_order[nr_filters - filter_nr:], + original_order[:nr_filters - filter_nr])) + # resort msi to restore original order + msimani.get_bands(msi, new_image_order) + # correct by flatfield + msimani.image_correction(msi, flat, dark) + + # create artificial rgb + rgb_image = msi.get_image()[:, :, [2, 3, 1]] + rgb_image /= np.max(rgb_image) + rgb_image *= 255. + + # preprocess the image + # sortout unwanted bands + print "1" + # zero values would lead to infinity logarithm, thus clip. + msi.set_image(np.clip(msi.get_image(), 0.00001, 2. ** 64)) + # normalize to get rid of lighting intensity + norm.standard_normalizer.normalize(msi) + # transform to absorption + msi.set_image(-np.log(msi.get_image())) + # normalize by l2 for stability + norm.standard_normalizer.normalize(msi, "l2") + print "2" + # estimate + sitk_image, time = estimate_image(msi, e) + image = sitk.GetArrayFromImage(sitk_image) + + plt.figure() + print "3" + rgb_image = rgb_image.astype(np.uint8) + im = Image.fromarray(rgb_image, 'RGB') + enh_brightness = ImageEnhance.Brightness(im) + im = enh_brightness.enhance(5.) + plotted_image = np.array(im) + top_left_axis = plt.gca() + top_left_axis.imshow(plotted_image, interpolation='nearest') + top_left_axis.xaxis.set_visible(False) + top_left_axis.yaxis.set_visible(False) + + plt.set_cmap("jet") + print "4" + # plot parametric maps + # first oxygenation + plt.figure() + oxy_image = image[:, :, 0] + segmentation[0, 0] = 1 + segmentation[0, 1] = 1 + oxy_image = np.ma.masked_array(oxy_image, ~segmentation) + oxy_image[np.isnan(oxy_image)] = 0. + oxy_image[np.isinf(oxy_image)] = 0. + oxy_mean = np.mean(oxy_image) + oxy_image[0, 0] = 0.0 + oxy_image[0, 1] = 1. + plot_image(oxy_image[:, :], plt.gca()) + plt.savefig(self.output().path, + dpi=250, bbox_inches='tight') + # second blood volume fraction + plt.figure() + vhb_image = image[:, :, 1] + vhb_image = np.ma.masked_array(vhb_image, ~segmentation) + vhb_image[np.isnan(vhb_image)] = 0. + vhb_image[np.isinf(vhb_image)] = 0. + vhb_image[0, 0] = 0.0 + vhb_image[0, 1] = 0.1 + vhb_image = np.clip(vhb_image, 0.0, 0.1) + vhb_mean = np.mean(vhb_image) + plot_image(vhb_image, plt.gca()) + plt.savefig(self.output().path + "vhb.png", + dpi=250, bbox_inches='tight') + + # store results summary in dataframe + df_image_results = pd.DataFrame(data=np.expand_dims([self.image_name, + oxy_mean * 100., + vhb_mean * 100., + time], 0), + columns=["image name", + "oxygenation mean [%]", + "blood volume fraction mean [%]", + "time to estimate"]) + + results_file = os.path.join(liver_results_path, "results.csv") + if os.path.isfile(results_file): + df_results = pd.read_csv(results_file, index_col=0) + df_results = pd.concat((df_results, df_image_results)).reset_index( + drop=True + ) + else: + df_results = df_image_results + + df_results.to_csv(results_file) + + print "5" + plt.close("all") + + +if __name__ == '__main__': + + # root folder there the data lies + logging.basicConfig(filename=os.path.join(sp.LOG_FOLDER, + "liver" + + str(datetime.datetime.now()) + + '.log'), level=logging.INFO) + luigi.interface.setup_interface_logging() + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + logger = logging.getLogger() + logger.addHandler(ch) + + sch = luigi.scheduler.CentralPlannerScheduler() + w = luigi.worker.Worker(scheduler=sch) + + # create a folder for the results if necessary + sp.create_folder_if_necessary(liver_results_path) + + onlyfiles = get_image_files_from_folder(sp.LIVER_FOLDER) + + first_invivo_image_files = filter(lambda image_name: "0 2014" in image_name, + onlyfiles) + + for f in first_invivo_image_files: + main_task = IPCAICreateOxyImageTask(image_name=f, + df_prefix="ipcai_revision_colon_mean_scattering_train") + w.add(main_task) + + w.run() + + oxygenation_over_time_task = OxyAndVhbOverTimeTask() + w.add(oxygenation_over_time_task) + w.run() + diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_analyze_ipcai_in_vivo_small_bowel.py b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_analyze_ipcai_in_vivo_small_bowel.py new file mode 100644 index 0000000000..d022820729 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_analyze_ipcai_in_vivo_small_bowel.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Aug 14 11:09:18 2015 + +@author: wirkert +""" + +import Image +import ImageEnhance +import logging +import datetime + +import matplotlib.pyplot as plt +import SimpleITK as sitk +import matplotlib + +from msi.io.nrrdreader import NrrdReader +import msi.normalize as norm +from regression.estimation import estimate_image +from tasks_common import * + + +FINALS_FOLDER = "small_bowel" +small_bowel_results_path = os.path.join(sp.RESULTS_FOLDER, + FINALS_FOLDER) +data_folder = sp.SMALL_BOWEL_FOLDER + +sp.RECORDED_WAVELENGTHS = \ + np.array([580, 470, 660, 560, 480, 511, 600, 700]) * 10 ** -9 + +font = {'family' : 'normal', + 'size' : 25} +matplotlib.rc('font', **font) + + +def get_image_files_from_folder(folder): + # small helper function to get all the image files in a folder + image_files = [f for f in os.listdir(folder) if + os.path.isfile(os.path.join(folder, f))] + image_files.sort() + image_files = [f for f in image_files if f.endswith(".tiff")] + return image_files + + +class ResultsFile(luigi.Task): + + def output(self): + return luigi.LocalTarget(os.path.join(small_bowel_results_path, + "results.csv")) + + +class OxyOverTimeTask(luigi.Task): + + def output(self): + return luigi.LocalTarget(os.path.join(small_bowel_results_path, + "colon_oxy_over_time.pdf")) + + def requires(self): + return ResultsFile() + + def run(self): + df = pd.read_csv(self.input().path, index_col=0) + + # determine times from start: + image_name_strings = df["image name"].values + time_strings = map(lambda s: s[ + s.find("2015-10-08_")+11:s.find("2015-10-08_")+19], + image_name_strings) + time_in_s = map(lambda s: int(s[0:2]) * 3600 + + int(s[3:5]) * 60 + + int(s[6:]), time_strings) + df["time since first frame [s]"] = np.array(time_in_s) - time_in_s[0] + df["time since applying first clip [s]"] = df["time since first frame [s]"] - 4 + # print oxy over time as scatterplot. + plt.figure() + ax = df.plot.scatter(x="time since applying first clip [s]", + y="oxygenation mean [%]", fontsize=30, + s=50, alpha=0.5) + ax.set_xlim((-5, 600)) + + plt.axvline(x=0, ymin=0, ymax=1, linewidth=2) + plt.axvline(x=39, ymin=0, ymax=1, linewidth=2) + plt.axvline(x=100, ymin=0, ymax=1, linewidth=2) + plt.axvline(x=124, ymin=0, ymax=1, linewidth=2) + ax.annotate('1', xy=(0, ax.get_ylim()[1]), + fontsize=18, + color="blue", + xycoords='data', xytext=(-5, 0), + textcoords='offset points') + ax.annotate('2', xy=(39, ax.get_ylim()[1]), + fontsize=18, + color="blue", + xycoords='data', xytext=(-5, 0), + textcoords='offset points') + ax.annotate('3', xy=(100, ax.get_ylim()[1]), + fontsize=18, + color="blue", + xycoords='data', xytext=(-5, 0), + textcoords='offset points') + ax.annotate('4', xy=(124, ax.get_ylim()[1]), + fontsize=18, + color="blue", + xycoords='data', xytext=(-5, 0), + textcoords='offset points') + + plt.grid() + + df.to_csv(self.input().path) + + # save + plt.savefig(self.output().path, + dpi=250, bbox_inches='tight', mode="pdf") + + +def plot_image(image, axis): + im2 = axis.imshow(image, interpolation='nearest', alpha=1.0) + # axis.set_title(title, fontsize=5) + # divider = make_axes_locatable(axis) + # cax = divider.append_axes("right", size="10%", pad=0.05) + # cbar = plt.colorbar(im2, cax=cax) + # cbar.ax.tick_params(labelsize=5) + axis.xaxis.set_visible(False) + + +class IPCAICreateOxyImageTask(luigi.Task): + image_name = luigi.Parameter() + df_prefix = luigi.Parameter() + + def requires(self): + return IPCAITrainRegressor(df_prefix=self.df_prefix), \ + Flatfield(flatfield_folder=sp.FLAT_FOLDER), \ + SingleMultispectralImage(image=self.image_name), \ + Dark(dark_folder=sp.DARK_FOLDER) + + def output(self): + return luigi.LocalTarget(os.path.join(small_bowel_results_path, + self.image_name + "_" + + self.df_prefix + + "_summary" + ".png")) + + def run(self): + nrrd_reader = NrrdReader() + tiff_ring_reader = TiffRingReader() + # read the flatfield + flat = nrrd_reader.read(self.input()[1].path) + dark = nrrd_reader.read(self.input()[3].path) + # read the msi + nr_filters = len(sp.RECORDED_WAVELENGTHS) + msi, segmentation = tiff_ring_reader.read(self.input()[2].path, + nr_filters) + # only take into account not saturated pixels. + segmentation = np.logical_and(segmentation, + (np.max(msi.get_image(), + axis=-1) < 1000.)) + + # read the regressor + e_file = open(self.input()[0].path, 'r') + e = pickle.load(e_file) + + # correct image setup + filter_nr = int(self.image_name[-6:-5]) + original_order = np.arange(nr_filters) + new_image_order = np.concatenate(( + original_order[nr_filters - filter_nr:], + original_order[:nr_filters - filter_nr])) + # resort msi to restore original order + msimani.get_bands(msi, new_image_order) + # correct by flatfield + msimani.image_correction(msi, flat, dark) + + # create artificial rgb + rgb_image = msi.get_image()[:, :, [2, 3, 1]] + rgb_image /= np.max(rgb_image) + rgb_image *= 255. + + # preprocess the image + # sortout unwanted bands + print "1" + # zero values would lead to infinity logarithm, thus clip. + msi.set_image(np.clip(msi.get_image(), 0.00001, 2. ** 64)) + # normalize to get rid of lighting intensity + norm.standard_normalizer.normalize(msi) + # transform to absorption + msi.set_image(-np.log(msi.get_image())) + # normalize by l2 for stability + norm.standard_normalizer.normalize(msi, "l2") + print "2" + # estimate + sitk_image, time = estimate_image(msi, e) + image = sitk.GetArrayFromImage(sitk_image) + + plt.figure() + print "3" + rgb_image = rgb_image.astype(np.uint8) + im = Image.fromarray(rgb_image, 'RGB') + enh_brightness = ImageEnhance.Brightness(im) + im = enh_brightness.enhance(10.) + plotted_image = np.array(im) + top_left_axis = plt.gca() + top_left_axis.imshow(plotted_image, interpolation='nearest') + top_left_axis.xaxis.set_visible(False) + top_left_axis.yaxis.set_visible(False) + + plt.set_cmap("jet") + print "4" + # plot parametric maps + segmentation[0, 0] = 1 + segmentation[0, 1] = 1 + oxy_image = np.ma.masked_array(image[:, :], ~segmentation) + oxy_image[np.isnan(oxy_image)] = 0. + oxy_image[np.isinf(oxy_image)] = 0. + oxy_mean = np.mean(oxy_image) + oxy_image[0, 0] = 0.0 + oxy_image[0, 1] = 1. + + plot_image(oxy_image[:, :], plt.gca()) + + df_image_results = pd.DataFrame(data=np.expand_dims([self.image_name, + oxy_mean * 100., + time], 0), + columns=["image name", + "oxygenation mean [%]", + "time to estimate"]) + + results_file = os.path.join(small_bowel_results_path, "results.csv") + if os.path.isfile(results_file): + df_results = pd.read_csv(results_file, index_col=0) + df_results = pd.concat((df_results, df_image_results)).reset_index( + drop=True + ) + else: + df_results = df_image_results + + df_results.to_csv(results_file) + + plt.savefig(self.output().path, + dpi=250, bbox_inches='tight') + plt.close("all") + + +if __name__ == '__main__': + + # root folder there the data lies + logging.basicConfig(filename=os.path.join(sp.LOG_FOLDER, + "small_bowel_images" + + str(datetime.datetime.now()) + + '.log'), + level=logging.INFO) + luigi.interface.setup_interface_logging() + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + logger = logging.getLogger() + logger.addHandler(ch) + + sch = luigi.scheduler.CentralPlannerScheduler() + w = luigi.worker.Worker(scheduler=sch) + + # create a folder for the results if necessary + sp.create_folder_if_necessary(small_bowel_results_path) + + # determine files to process + onlyfiles = get_image_files_from_folder(sp.SMALL_BOWEL_FOLDER) + first_invivo_image_files = filter(lambda image_name: "F0" in image_name, + onlyfiles) + + for f in first_invivo_image_files: + main_task = IPCAICreateOxyImageTask(image_name=f, + df_prefix="ipcai_revision_colon_mean_scattering_train") + w.add(main_task) + w.run() + + oxygenation_over_time_task = OxyOverTimeTask() + w.add(oxygenation_over_time_task) + w.run() + diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_calculate_spectra.py b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_calculate_spectra.py new file mode 100644 index 0000000000..34abeb1d76 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/script_calculate_spectra.py @@ -0,0 +1,132 @@ +''' +Created on Sep 9, 2015 + +@author: wirkert +''' + + +import logging +import datetime +import os +import time + +import numpy as np +import luigi + +import scriptpaths as sp +import mc.factories as mcfac +from mc.sim import SimWrapper, get_diffuse_reflectance + +# parameter setting +NR_BATCHES = 100 +NR_ELEMENTS_IN_BATCH = 1000 +# the wavelengths to be simulated +WAVELENGHTS = np.arange(450, 720, 2) * 10 ** -9 +NR_PHOTONS = 10 ** 6 + +# experiment configuration +MCI_FILENAME = "./temp.mci" +MCO_FILENAME = "temp.mco" +# this path definitly needs to be adapted by you +PATH_TO_MCML = "/home/wirkert/workspace/monteCarlo/gpumcml/fast-gpumcml/" +EXEC_MCML = "gpumcml.sm_20" + + +class CreateSpectraTask(luigi.Task): + df_prefix = luigi.Parameter() + batch_nr = luigi.IntParameter() + nr_samples = luigi.IntParameter() + factory = luigi.Parameter() + + def output(self): + return luigi.LocalTarget(os.path.join(sp.MC_DATA_FOLDER, + self.df_prefix + "_" + + str(self.batch_nr) + ".txt")) + + def run(self): + start = time.time() + # setup simulation wrapper + self.sim_wrapper = SimWrapper() + self.sim_wrapper.set_mci_filename(MCI_FILENAME) + self.sim_wrapper.set_mcml_executable(PATH_TO_MCML + EXEC_MCML) + # setup model + self.tissue_model = self.factory.create_tissue_model() + self.tissue_model.set_mci_filename(self.sim_wrapper.mci_filename) + self.tissue_model.set_mco_filename(MCO_FILENAME) + self.tissue_model.set_nr_photons(NR_PHOTONS) + # setup array in which data shall be stored + batch = self.factory.create_batch_to_simulate() + batch.create_parameters(self.nr_samples) + # dataframe created by batch: + df = batch.df + # add reflectance column to dataframe + for w in WAVELENGHTS: + df["reflectances", w] = np.NAN + + # for each instance of our tissue model + for i in range(df.shape[0]): + self.tissue_model.set_dataframe_element(df, i) + logging.info("running simulation " + str(i) + " for\n" + + str(self.tissue_model)) + start = time.time() + # map the _wavelengths array to reflectance list + reflectances = map(self.wavelength_to_reflectance, + WAVELENGHTS) + end = time.time() + # store in dataframe + for r, w in zip(reflectances, WAVELENGHTS): + df["reflectances", w][i] = r + # success! + logging.info("successfully ran simulation in " + + "{:.2f}".format(end - start) + " seconds") + # clean up temporarily created files + os.remove(MCI_FILENAME) + created_mco_file = PATH_TO_MCML + "/" + MCO_FILENAME + if os.path.isfile(created_mco_file): + os.remove(created_mco_file) + # save the created output + f = open(self.output().path, 'w') + df.to_csv(f) + + end = time.time() + logging.info("time for creating batch of mc data: %.f s" % + (end - start)) + + def wavelength_to_reflectance(self, wavelength): + """helper function to determine the reflectance for a given + wavelength using the current model and simulation """ + self.tissue_model.set_wavelength(wavelength) + self.tissue_model.create_mci_file() + if os.path.isfile(PATH_TO_MCML + EXEC_MCML): + self.sim_wrapper.run_simulation() + return get_diffuse_reflectance(PATH_TO_MCML + MCO_FILENAME) + else: + raise IOError("path to gpumcml not valid") + + +if __name__ == '__main__': + logging.basicConfig(filename=os.path.join(sp.LOG_FOLDER, + "calculate_spectra" + + str(datetime.datetime.now()) + + '.log'), + level=logging.INFO) + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + logger = logging.getLogger() + logger.addHandler(ch) + luigi.interface.setup_interface_logging() + + # create a folder for the results if necessary + sp.create_folder_if_necessary(sp.INTERMEDIATES_FOLDER) + + sch = luigi.scheduler.CentralPlannerScheduler() + w = luigi.worker.Worker(scheduler=sch) + BATCH_NUMBERS = np.arange(0, NR_BATCHES, 1) + for i in BATCH_NUMBERS: + colon_task = CreateSpectraTask("ipcai_revision_generic_mean_scattering", + i, + NR_ELEMENTS_IN_BATCH, + mcfac.GenericMeanScatteringFactory()) + w.add(colon_task) + w.run() + diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai2016/scriptpaths.py b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/scriptpaths.py new file mode 100644 index 0000000000..ca71627a88 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/scriptpaths.py @@ -0,0 +1,27 @@ +''' +Created on Aug 27, 2015 + +@author: wirkert +''' + +import os + +ROOT_FOLDER = "../.." +LOG_FOLDER = os.path.join(ROOT_FOLDER, "log") +DATA_FOLDER = os.path.join(ROOT_FOLDER, "data") +FLAT_FOLDER = os.path.join(DATA_FOLDER, "flatfields") +DARK_FOLDER = os.path.join(DATA_FOLDER, "dark") +SMALL_BOWEL_FOLDER = os.path.join(DATA_FOLDER, + "small_bowel_images") +LIVER_FOLDER = os.path.join(DATA_FOLDER, "liver_images") +SPECTROMETER_FOLDER = os.path.join(DATA_FOLDER, "spectrometer_measurements") +RESULTS_FOLDER = os.path.join(ROOT_FOLDER, "results") +INTERMEDIATES_FOLDER = os.path.join(RESULTS_FOLDER, "intermediate") +MC_DATA_FOLDER = os.path.join(INTERMEDIATES_FOLDER, "mc_data") + + +def create_folder_if_necessary(folder): + if not os.path.exists(folder): + os.makedirs(folder) + +create_folder_if_necessary(INTERMEDIATES_FOLDER) diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai2016/tasks_common.py b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/tasks_common.py new file mode 100644 index 0000000000..66e1a81398 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/tasks_common.py @@ -0,0 +1,141 @@ +import os +import pickle + +import numpy as np +import pandas as pd +import luigi +from sklearn.ensemble.forest import RandomForestRegressor + +import tasks_mc +import scriptpaths as sp +from msi.msi import Msi +from msi.io.nrrdwriter import NrrdWriter +import msi.msimanipulations as msimani +from regression.preprocessing import preprocess +from msi.io.tiffringreader import TiffRingReader + + +""" +Collection of functions and luigi.Task s which are used by more than one script +""" + + +def get_image_files_from_folder(folder): + # small helper function to get all the image files in a folder + image_files = [f for f in os.listdir(folder) if + os.path.isfile(os.path.join(folder, f))] + image_files.sort() + image_files = [f for f in image_files if f.endswith(".tiff")] + return image_files + + +def plot_image(image, axis): + im2 = axis.imshow(image, interpolation='nearest', alpha=1.0) + axis.xaxis.set_visible(False) + + +class IPCAITrainRegressor(luigi.Task): + df_prefix = luigi.Parameter() + + def output(self): + return luigi.LocalTarget(os.path.join(sp.INTERMEDIATES_FOLDER, + "reg_small_bowel_" + + self.df_prefix)) + + def requires(self): + return tasks_mc.SpectroCamBatch(self.df_prefix) + + def run(self): + # extract data from the batch + df_train = pd.read_csv(self.input().path, header=[0, 1]) + + X, y = preprocess(df_train, snr=10.) + # train regressor + reg = RandomForestRegressor(10, min_samples_leaf=10, max_depth=9, + n_jobs=-1) + # reg = KNeighborsRegressor(algorithm="auto") + # reg = LinearRegression() + # reg = sklearn.svm.SVR(kernel="rbf", degree=3, C=100., gamma=10.) + # reg = LinearSaO2Unmixing() + reg.fit(X, y) + # reg = LinearSaO2Unmixing() + # save regressor + regressor_file = self.output().open('w') + pickle.dump(reg, regressor_file) + regressor_file.close() + + +class SingleMultispectralImage(luigi.Task): + + image = luigi.Parameter() + + def output(self): + return luigi.LocalTarget(os.path.join(sp.DATA_FOLDER, self.image)) + + +class Flatfield(luigi.Task): + flatfield_folder = luigi.Parameter() + + def output(self): + return luigi.LocalTarget(os.path.join(sp.INTERMEDIATES_FOLDER, + "flatfield" + + ".nrrd")) + + def run(self): + tiff_ring_reader = TiffRingReader() + nr_filters = len(sp.RECORDED_WAVELENGTHS) + + # analyze all the first image files + image_files = get_image_files_from_folder(self.flatfield_folder) + image_files = filter(lambda image_name: "F0" in image_name, image_files) + + # helper function to take maximum of two images + def maximum_of_two_images(image_1, image_name_2): + image_2 = tiff_ring_reader.read(os.path.join(self.flatfield_folder, + image_name_2), + nr_filters)[0].get_image() + return np.maximum(image_1, image_2) + + # now reduce to maximum of all the single images + flat_maximum = reduce(lambda x, y: maximum_of_two_images(x, y), + image_files, 0) + msi = Msi(image=flat_maximum) + msi.set_wavelengths(sp.RECORDED_WAVELENGTHS) + + # write flatfield as nrrd + writer = NrrdWriter(msi) + writer.write(self.output().path) + + +class Dark(luigi.Task): + dark_folder = luigi.Parameter() + + def output(self): + return luigi.LocalTarget(os.path.join(sp.INTERMEDIATES_FOLDER, + "dark" + + ".nrrd")) + + def run(self): + tiff_ring_reader = TiffRingReader() + nr_filters = len(sp.RECORDED_WAVELENGTHS) + + # analyze all the first image files + image_files = get_image_files_from_folder(self.dark_folder) + image_files = filter(lambda image_name: "F0" in image_name, image_files) + + # returns the mean dark image vector of all inputted dark image + # overly complicated TODO SW: make this simple code readable. + dark_means = map(lambda image_name: + msimani.calculate_mean_spectrum( + tiff_ring_reader.read(os.path.join(self.dark_folder, image_name), + nr_filters)[0]), + image_files) + dark_means_sum = reduce(lambda x, y: x+y.get_image(), dark_means, 0) + final_dark_mean = dark_means_sum / len(dark_means) + + msi = Msi(image=final_dark_mean) + msi.set_wavelengths(sp.RECORDED_WAVELENGTHS) + + # write flatfield as nrrd + writer = NrrdWriter(msi) + writer.write(self.output().path) \ No newline at end of file diff --git a/Modules/Biophotonics/python/iMC/scripts/ipcai2016/tasks_mc.py b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/tasks_mc.py new file mode 100644 index 0000000000..84cd406e91 --- /dev/null +++ b/Modules/Biophotonics/python/iMC/scripts/ipcai2016/tasks_mc.py @@ -0,0 +1,154 @@ +''' +Created on Sep 10, 2015 + +@author: wirkert +''' + + + +import os + +import pandas as pd +import numpy as np +import luigi +from scipy.interpolate import interp1d +from sklearn.preprocessing import normalize + +import scriptpaths as sp +import mc.dfmanipulations as dfmani +from msi.io.spectrometerreader import SpectrometerReader +from msi.io.msireader import MsiReader +from msi.normalize import NormalizeMean +import msi.msimanipulations as msimani + + +class SpectrometerFile(luigi.Task): + input_file = luigi.Parameter() + + def output(self): + return luigi.LocalTarget(os.path.join(sp.SPECTROMETER_FOLDER, + self.input_file)) + + +class FilterTransmission(luigi.Task): + input_file = luigi.Parameter() + + def requires(self): + return SpectrometerFile(self.input_file) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.INTERMEDIATES_FOLDER, + "processed_transmission" + self.input_file)) + + def run(self): + reader = SpectrometerReader() + filter_transmission = reader.read(self.input().path) + # filter high and low _wavelengths + wavelengths = filter_transmission.get_wavelengths() + fi_image = filter_transmission.get_image() + fi_image[wavelengths < 450 * 10 ** -9] = 0.0 + fi_image[wavelengths > 720 * 10 ** -9] = 0.0 + # filter elements farther away than +- 30nm + name_to_float = float(os.path.splitext(self.input_file)[0]) + fi_image[wavelengths < (name_to_float - 30) * 10 ** -9] = 0.0 + fi_image[wavelengths > (name_to_float + 30) * 10 ** -9] = 0.0 + # elements < 0 are set to 0. + fi_image[fi_image < 0.0] = 0.0 + + # write it to a dataframe + df = pd.DataFrame() + df["wavelengths"] = wavelengths + df["reflectances"] = fi_image + df.to_csv(self.output().path, index=False) + + +class JoinBatches(luigi.Task): + df_prefix = luigi.Parameter() + + def output(self): + return luigi.LocalTarget(os.path.join(sp.INTERMEDIATES_FOLDER, + self.df_prefix + "_" + + "all" + ".txt")) + + def run(self): + # get all files in the search path + files = [f for f in os.listdir(sp.MC_DATA_FOLDER) + if os.path.isfile(os.path.join(sp.MC_DATA_FOLDER, f))] + # from these get only those who start with correct batch prefix + df_file_names = [os.path.join(sp.MC_DATA_FOLDER, f) for f in files + if f.startswith(self.df_prefix)] + # load these files + dfs = [pd.read_csv(f, header=[0, 1]) for f in df_file_names] + # now join them to one batch + joined_df = pd.concat(dfs, ignore_index=True) + # write it + joined_df.to_csv(self.output().path, index=False) + + +class CameraBatch(luigi.Task): + """takes a batch of reference data and converts it to the spectra + processed by a camera with the specified wavelengths assuming a 10nm FWHM""" + df_prefix = luigi.Parameter() + + def requires(self): + return JoinBatches(self.df_prefix) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.INTERMEDIATES_FOLDER, + self.df_prefix + + "_all_virtual_camera.txt")) + + def run(self): + # load dataframe + df = pd.read_csv(self.input().path, header=[0, 1]) + # camera batch creation: + dfmani.fold_by_sliding_average(df, 6) + dfmani.interpolate_wavelengths(df, sp.RECORDED_WAVELENGTHS) + # write it + df.to_csv(self.output().path, index=False) + + +class SpectroCamBatch(luigi.Task): + """ + Same as CameraBatch in purpose but adapts the batch to our very specific + SpectroCam set-up. + """ + df_prefix = luigi.Parameter() + + def requires(self): + # all wavelengths must have been measured for transmission and stored in + # wavelength.txt files (e.g. 470.txt) + filenames = ((sp.RECORDED_WAVELENGTHS * 10**9).astype(int)).astype(str) + filenames = map(lambda name: FilterTransmission(name + ".txt"), + filenames) + + return JoinBatches(self.df_prefix), filenames + + def output(self): + return luigi.LocalTarget(os.path.join(sp.INTERMEDIATES_FOLDER, + self.df_prefix + + "_all_spectrocam.txt")) + + def run(self): + # load dataframe + df = pd.read_csv(self.input()[0].path, header=[0, 1]) + # camera batch creation: + new_reflectances = [] + for band in self.input()[1]: + df_filter = pd.read_csv(band.path) + interpolator = interp1d(df_filter["wavelengths"], + df_filter["reflectances"], + assume_sorted=False, bounds_error=False) + # use this to create new reflectances + interpolated_filter = interpolator(df["reflectances"]. + columns.astype(float)) + # normalize band response + normalize(interpolated_filter.reshape(1, -1), norm='l1', copy=False) + folded_reflectance = np.dot(df["reflectances"].values, + interpolated_filter) + new_reflectances.append(folded_reflectance) + new_reflectances = np.array(new_reflectances).T + dfmani.switch_reflectances(df, sp.RECORDED_WAVELENGTHS, + new_reflectances) + # write it + df.to_csv(self.output().path, index=False)