diff --git a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py index 25be6462a0..41766af8fb 100644 --- a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py +++ b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_silico.py @@ -1,336 +1,334 @@ # -*- coding: utf-8 -*- """ Created on Fri Aug 14 11:09:18 2015 @author: wirkert """ import os import logging import datetime from collections import namedtuple import numpy as np import pandas as pd from pandas import DataFrame import luigi import matplotlib.pyplot as plt from sklearn.ensemble.forest import RandomForestRegressor import seaborn as sns import tasks_mc import scriptpaths as sp from regression.preprocessing import preprocess, preprocess2 from regression.linear import LinearSaO2Unmixing sp.ROOT_FOLDER = "/media/wirkert/data/Data/" + \ "2016_02_22_IPCAI_revision_in_silico" sp.FINALS_FOLDER = "finals" sp.RECORDED_WAVELENGTHS = np.arange(470, 680, 10) * 10 ** -9 sp.MC_DATA_FOLDER = "mc_data" w_standard = 0.1 # for this evaluation we add 10% noise sns.set_style("whitegrid") # sns.set_context("poster") # setup standard random forest rf = RandomForestRegressor(10, min_samples_leaf=10, max_depth=9, n_jobs=-1) EvaluationStruct = namedtuple("EvaluationStruct", "name regressor") # standard evaluation setup standard_evaluation_setups = [EvaluationStruct("Linear Beer-Lambert", LinearSaO2Unmixing()) , EvaluationStruct("Proposed", rf)] # color palette my_colors = [sns.xkcd_rgb["pale red"], sns.xkcd_rgb["medium green"]] # standard noise levels noise_levels = np.arange(0.00, 0.18, 0.02) class TrainingSamplePlot(luigi.Task): which_train = luigi.Parameter() which_test = luigi.Parameter() def requires(self): return tasks_mc.CameraBatch(self.which_train), \ tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "sample_plot_train_" + self.which_train + "_test_" + self.which_test + ".png")) def run(self): # get data df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) # for this plot we write a custom evaluation function as it is built # a little different # create a new dataframe which will hold all the generated errors df = pd.DataFrame() - nr_training_samples = np.arange(10, 15060, 50).astype(int) + nr_training_samples = np.arange(10, 10010, 50).astype(int) # not very pythonic, don't care for n in nr_training_samples: X_test, y_test = preprocess(df_test, w_percent=w_standard) # only take n samples for training X_train, y_train = preprocess(df_train, nr_samples=n, w_percent=w_standard) regressor = rf regressor.fit(X_train, y_train) y_pred = regressor.predict(X_test) # save results to a dataframe errors = np.abs(y_pred - y_test) errors = errors.reshape(len(errors), 1) current_df = DataFrame(errors * 100, columns=["Errors"]) current_df["Method"] = "Proposed" current_df["Number Samples"] = n df = pd.concat([df, current_df], ignore_index=True) logging.info( "Finished training classifier with {0} samples".format( str(n))) df = df.groupby("Number Samples").describe() # get the error description in the rows: df = df.unstack(-1) # get rid of multiindex by dropping "Error" level df.columns = df.columns.droplevel(0) plt.figure() plt.plot(df.index, df["50%"], color=sns.xkcd_rgb["medium green"]) # tidy up the plot plt.xlabel("number of training samples") plt.ylabel("absolute error [%]") plt.ylim((0, 20)) plt.xlim((0, 15000)) # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') class VhbPlot(luigi.Task): which_train = luigi.Parameter() which_test = luigi.Parameter() def requires(self): return tasks_mc.CameraBatch(self.which_train), \ tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "vhb_noise_plot_train_" + self.which_train + "_test_" + self.which_test + ".png")) @staticmethod def preprocess_vhb(batch, nr_samples=None, w_percent=None, magnification=None, bands_to_sortout=None): """ For evaluating vhb we extract labels for vhb instead of sao2""" X, y = preprocess2(batch, nr_samples, w_percent, magnification, bands_to_sortout) return X, y["vhb"] def run(self): # get data df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) # for vhb we only evaluate the proposed method since the linear # beer-lambert is not applicable evaluation_setups = [EvaluationStruct("Proposed", rf)] df = evaluate_data(df_train, noise_levels, df_test, noise_levels, evaluation_setups=evaluation_setups, preprocessing=self.preprocess_vhb, ) - standard_plotting(df, color_palette = sns.xkcd_rgb["medium green"], + standard_plotting(df, color_palette=[sns.xkcd_rgb["medium green"]], xytext_position=(2, 3)) plt.ylim((0, 4)) # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') class NoisePlot(luigi.Task): which_train = luigi.Parameter() which_test = luigi.Parameter() def requires(self): return tasks_mc.CameraBatch(self.which_train), \ tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "noise_plot_train_" + self.which_train + "_test_" + self.which_test + ".png")) def run(self): # get data df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) df = evaluate_data(df_train, noise_levels, df_test, noise_levels) standard_plotting(df) # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') class WrongNoisePlot(luigi.Task): which_train = luigi.Parameter() which_test = luigi.Parameter() def requires(self): return tasks_mc.CameraBatch(self.which_train), \ tasks_mc.CameraBatch(self.which_test) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "wrong_noise_plot_train_" + self.which_train + "_test_" + self.which_test + ".png")) def run(self): # get data df_train = pd.read_csv(self.input()[0].path, header=[0, 1]) df_test = pd.read_csv(self.input()[1].path, header=[0, 1]) # do same as in NoisePlot but with standard noise input df = evaluate_data(df_train, np.ones_like(noise_levels) * w_standard, df_test, noise_levels) standard_plotting(df) # finally save the figure plt.savefig(self.output().path, dpi=500, bbox_inches='tight') def evaluate_data(df_train, w_train, df_test, w_test, evaluation_setups=None, preprocessing=None): """ Our standard method to evaluate the data. It will fill a DataFrame df which saves the errors for each evaluated setup""" if evaluation_setups is None: evaluation_setups = standard_evaluation_setups if preprocessing is None: preprocessing = preprocess - if "weights" in df_train: + if ("weights" in df_train) and df_train["weights"].size > 0: weights = df_train["weights"].as_matrix().squeeze() else: weights = np.ones(df_train.shape[0]) # create a new dataframe which will hold all the generated errors df = pd.DataFrame() for one_w_train, one_w_test in zip(w_train, w_test): # setup testing function X_test, y_test = preprocessing(df_test, w_percent=one_w_test) # extract noisy data X_train, y_train = preprocessing(df_train, w_percent=one_w_train) for e in evaluation_setups: regressor = e.regressor regressor.fit(X_train, y_train, weights) y_pred = regressor.predict(X_test) # save results to a dataframe errors = np.abs(y_pred - y_test) errors = errors.reshape(len(errors), 1) current_df = DataFrame(errors * 100, columns=["Errors"]) current_df["Method"] = e.name current_df["noise added [sigma %]"] = int(one_w_test * 100) df = pd.concat([df, current_df], ignore_index=True) return df def standard_plotting(df, color_palette=None, xytext_position=None): if color_palette is None: color_palette = my_colors if xytext_position is None: xytext_position = (2, 15) plt.figure() # group it by method and noise level and get description on the errors df_statistics = df.groupby(['Method', 'noise added [sigma %]']).describe() # get the error description in the rows: df_statistics = df_statistics.unstack(-1) # get rid of multiindex by dropping "Error" level df_statistics.columns = df_statistics.columns.droplevel(0) # iterate over methods to plot linegraphs with error tube # probably this can be done nicer, but no idea how exactly for color, method in zip( color_palette, df_statistics.index.get_level_values("Method").unique()): df_method = df_statistics.loc[method] plt.plot(df_method.index, df_method["50%"], color=color, label=method) plt.fill_between(df_method.index, df_method["25%"], df_method["75%"], facecolor=color, edgecolor=color, alpha=0.5) plt.xlabel("noise added [sigma %]") plt.ylabel("absolute error [%]") # add annotation: median_proposed = df_statistics.T["Proposed", 0].loc["50%"] plt.gca().annotate('error: ' + "{0:.1f}".format(median_proposed) + "%", xy=(0, median_proposed), xytext=xytext_position, va="center", ha="center", bbox=dict(boxstyle="round4", fc=sns.xkcd_rgb["medium green"], alpha=0.5), arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=-0.2", fc="w")) # tidy up the plot plt.ylim((0, 20)) plt.xlim((0, 15)) plt.legend() if __name__ == '__main__': logging.basicConfig(filename='log/in_silico_plots_' + str(datetime.datetime.now()) + '.log', level=logging.INFO) ch = logging.StreamHandler() ch.setLevel(logging.INFO) logger = logging.getLogger() logger.addHandler(ch) luigi.interface.setup_interface_logging() + train = "ipcai_revision_colon_mean_scattering_train" + test = "ipcai_revision_colon_mean_scattering_test" + sch = luigi.scheduler.CentralPlannerScheduler() w = luigi.worker.Worker(scheduler=sch) - w.add(TrainingSamplePlot(which_train="ipcai_revision_colon_train", - which_test="ipcai_revision_colon_test")) - w.add(NoisePlot(which_train="ipcai_revision_colon_train", - which_test="ipcai_revision_colon_test")) - w.add(WrongNoisePlot(which_train="ipcai_revision_colon_train", - which_test="ipcai_revision_colon_test")) + w.add(TrainingSamplePlot(which_train=train, which_test=test)) + w.add(NoisePlot(which_train=train, which_test=test)) + w.add(WrongNoisePlot(which_train=train, which_test=test)) # Set a different testing domain to evaluate domain sensitivity - w.add(NoisePlot(which_train="ipcai_revision_colon_train", - which_test="ipcai_revision_generic")) - w.add(VhbPlot(which_train="ipcai_revision_colon_train", - which_test="ipcai_revision_colon_test")) + w.add(NoisePlot(which_train=train, which_test="ipcai_revision_generic")) + w.add(VhbPlot(which_train=train, which_test=test)) w.run() diff --git a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_vivo.py b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_vivo.py index 8fcc647551..d0647b7fb0 100644 --- a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_vivo.py +++ b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_vivo.py @@ -1,424 +1,424 @@ # -*- coding: utf-8 -*- """ Created on Fri Aug 14 11:09:18 2015 @author: wirkert """ import os import Image import ImageEnhance import pickle import copy import logging import datetime import numpy as np import luigi import matplotlib.pyplot as plt import SimpleITK as sitk from sklearn.ensemble.forest import RandomForestRegressor from scipy.ndimage.filters import gaussian_filter import tasks_analyze as at import tasks_preprocessing as pre import tasks_mc import scriptpaths as sp from msi.io.pngreader import PngReader from msi.io.nrrdreader import NrrdReader from msi.io.nrrdwriter import NrrdWriter from msi.msi import Msi import msi.msimanipulations as msimani import msi.imgmani as imgmani import msi.normalize as norm from regression.estimation import estimate_image from regression.linear import LinearSaO2Unmixing from regression.preprocessing import preprocess, normalize sp.ROOT_FOLDER = "/media/wirkert/data/Data/" + \ "2015_11_12_IPCAI_Neil_InVivo" sp.DATA_FOLDER = "colon_images/not_registered" sp.MC_DATA_FOLDER = "mc_data2" sp.FINALS_FOLDER = "OxySecondCamera" sp.RECORDED_WAVELENGTHS = np.arange(400, 730, 10) * 10 ** -9 def resort_image_wavelengths(collapsed_image): return collapsed_image def resort_wavelengths(msi): """Neil organized his wavelengths differently. This function fixes this.""" collapsed_image = imgmani.collapse_image(msi.get_image()) collapsed_image = resort_image_wavelengths(collapsed_image) msi.set_image(np.reshape(collapsed_image, msi.get_image().shape)) # rebind this method since in this recoding the wavelengths got messed up sp.resort_wavelengths = resort_wavelengths sp.bands_to_sortout = np.array([0, 1, 2, 3, 4, 5, 6 , 7, 8, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]) class IPCAICreateOxyImageTask(luigi.Task): folder_name = luigi.Parameter() - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def requires(self): return IPCAIEstimateTissueParametersTask(folder_name=self.folder_name, batch_prefix= - self.batch_prefix), \ + self.df_prefix), \ MultiSpectralImageFromPNGFiles(folder_name=self.folder_name) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, self.folder_name + "_" + - self.batch_prefix + + self.df_prefix + "_summary.png")) def run(self): nrrdReader = NrrdReader() # read in the parametric image param_image = nrrdReader.read(self.input()[0].path) image = param_image.get_image() # read the multispectral image msi = nrrdReader.read(self.input()[1].path) # segmentation window: seg_windows = np.array([ [135, 332, 227, 154 ], [434, 280, 161, 161 ], [428, 381, 99 , 99 ], [701, 368, 161, 148 ], [756, 363, 151, 139 ], [630, 340, 150, 129 ], [468, 505, 97 , 108 ], [679, 499, 114, 114 ], [493, 513, 89 , 89 ], [336, 72 , 177 , 152 ], [313, 122, 132, 139 ], [697, 311, 113, 103 ], [881, 400, 128, 112 ], [634, 452, 124, 103 ], [632, 452, 120, 108 ], [675, 478, 122, 126 ], [774, 577, 128, 126 ], [490, 501, 130, 99 ], [495, 597, 112, 91 ], [499, 482, 103, 95 ], [681, 408, 122, 85 ], [747, 439, 120, 99 ], [634, 509, 99 , 103 ], [571, 499, 108, 85 ], [760, 495, 97 , 85 ], [636, 415, 93 , 77 ], [872, 445, 112, 103 ], [659, 462, 118, 103 ], [610, 452, 89 , 73 ], [840, 616, 89 , 75 ], [620, 419, 97 , 81 ], [597, 404, 79 , 83 ], [657, 400, 97 , 81 ], [501, 341, 95 , 77 ], [612, 398, 89 , 83 ], [655, 437, 79 , 68 ], [776, 394, 89 , 83 ], [597, 462, 93 , 71 ], [681, 505, 81 , 81 ], [566, 427, 83 , 71 ], [727, 427, 81 , 69 ], [772, 486, 103, 89 ], [515, 511, 83 , 73 ], [673, 412, 108, 91 ], [895, 505, 101, 93 ], [474, 365, 120, 101 ]]) # plot f, axarr = plt.subplots(1, 2) # create artificial rgb rgb_image = msi.get_image()[:, :, [-1, 6, 0]] rgb_image /= np.max(rgb_image) rgb_image *= 255. rgb_image = rgb_image.astype(np.uint8) im = Image.fromarray(rgb_image, 'RGB') enh_brightness = ImageEnhance.Brightness(im) im = enh_brightness.enhance(2.) plot_image = np.array(im) top_left_axis = axarr[0] top_left_axis.imshow(plot_image, interpolation='nearest') top_left_axis.set_title("synthetic rgb", fontsize=5) top_left_axis.xaxis.set_visible(False) top_left_axis.yaxis.set_visible(False) plt.set_cmap("jet") # postprocess parametric maps image[np.isnan(image)] = 0. image[np.isinf(image)] = 0. image[image > 1.] = 1. image[image < 0.] = 0. image[0, 0] = 0. image[0, 1] = 1. try: folder_nr = int(self.folder_name[-2:]) except ValueError: folder_nr = int(self.folder_name[-1]) seg_window = seg_windows[folder_nr - 1, :] evaluation_image = copy.deepcopy(image) # set out-segmented pixels to -1 evaluation_image[(msi.get_image() >= np.max(msi.get_image())).any(axis=2)] = -1 evaluation_image[(msi.get_image() <= 0).any(axis=2)] = -1 # only take elements in window evaluation_image = evaluation_image[ seg_window[1]:seg_window[1] + seg_window[3], seg_window[0]:seg_window[0] + seg_window[2]] # throw away elements not in window result = np.mean(evaluation_image[evaluation_image >= 0]) logging.info("mean of image is: " + str(result)) fd = open(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, 'results_summary.csv'), 'a') fd.write(str(folder_nr) + ", " + str(result) + "\n") fd.close() # write the windowed evaluation image sitk_img = sitk.GetImageFromArray(evaluation_image, isVector=True) sitk.WriteImage(sitk_img, self.output().path + "window.nrrd") # plot parametric maps at.plot_axis(axarr[1], image[:, :], "oxygenation [%]") plt.savefig(self.output().path, dpi=1000, bbox_inches='tight') class IPCAIEstimateTissueParametersTask(luigi.Task): folder_name = luigi.Parameter() - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def requires(self): return IPCAIPreprocessMSI(folder_name=self.folder_name), \ IPCAITrainRegressor(batch_prefix= - self.batch_prefix) + self.df_prefix) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, "processed", sp.FINALS_FOLDER, self.folder_name + "_" + - self.batch_prefix + + self.df_prefix + "estimate.nrrd")) def run(self): # load data reader = NrrdReader() msi = reader.read(self.input()[0].path) e_file = open(self.input()[1].path, 'r') e = pickle.load(e_file) # estimate sitk_image = estimate_image(msi, e) # save data outFile = self.output().open('w') outFile.close() sitk.WriteImage(sitk_image, self.output().path) class IPCAITrainRegressor(luigi.Task): - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "reg_" + - self.batch_prefix)) + self.df_prefix)) def requires(self): - return tasks_mc.CameraBatch(self.batch_prefix) + return tasks_mc.CameraBatch(self.df_prefix) def run(self): # extract data from the batch f = open(self.input().path, "r") batch_train = pickle.load(f) X, y = preprocess(batch_train, w_percent=0.1, bands_to_sortout=sp.bands_to_sortout) # train regressor reg = RandomForestRegressor(max_depth=10, n_estimators=10, min_samples_leaf=1, n_jobs=-1) # reg = LinearSaO2Unmixing() reg.fit(X, y) # reg = LinearSaO2Unmixing() # save regressor f = self.output().open('w') pickle.dump(reg, f) f.close() class IPCAIPreprocessMSI(luigi.Task): folder_name = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, self.folder_name + "_preprocessed.nrrd")) def requires(self): return IPCAICorrectImagingSetupTask(folder_name=self.folder_name) def run(self): reader = NrrdReader() msi = reader.read(self.input().path) # sortout unwanted bands msi.set_image(imgmani.sortout_bands(msi.get_image(), sp.bands_to_sortout)) # zero values would lead to infinity logarithm, thus clip. msi.set_image(np.clip(msi.get_image(), 0.00001, 2. ** 64)) # normalize to get rid of lighting intensity norm.standard_normalizer.normalize(msi) # transform to absorption msi.set_image(-np.log(msi.get_image())) # normalize by l2 for stability norm.standard_normalizer.normalize(msi, "l2") pre.touch_and_save_msi(msi, self.output()) class IPCAICorrectImagingSetupTask(luigi.Task): """corrects for dark image and flatfield""" folder_name = luigi.Parameter() def requires(self): return MultiSpectralImageFromPNGFiles(folder_name=self.folder_name), \ FlatfieldFromMeasurement() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, self.folder_name + "_image_setup_corrected.nrrd")) def run(self): """sort wavelengths and normalize by respective integration times""" # read inputs reader = NrrdReader() msi = reader.read(self.input()[0].path) flatfield = reader.read(self.input()[1].path) # msi.set_image(gaussian_filter(msi.get_image().astype(np.float), (1, 1, 0))) # correct image by flatfield msimani.flatfield_correction(msi, flatfield) sp.resort_wavelengths(msi) pre.touch_and_save_msi(msi, self.output()) class FlatfieldFromMeasurement(luigi.Task): def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, "flatfield" + ".nrrd")) def run(self): msi = Msi() msi.set_image(np.array([ 0.90733563 , 3.6583764 , 27.30637258, 52.35073931, 103.0408631, 176.812168 , 392.361853 , 620.0531964, 712.2926615, 1052.643461, 1202.266783, 1483.629833, 1730.168541, 2041.730527, 2296.256574, 2399.004343, 2536.580738, 2736.328832, 2760.046341, 2927.307208, 3539.747558, 3420.653065, 3229.74619 , 3435.280705, 3108.530329, 2817.607701, 2612.795573, 2053.155475, 1343.305582, 721.058596 , 340.6692005, 189.2180019, 77.61268264 ])) writer = NrrdWriter(msi) writer.write(self.output().path) class MultiSpectralImageFromPNGFiles(luigi.Task): folder_name = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, self.folder_name + ".nrrd")) def run(self): reader = PngReader() msi = reader.read(os.path.join(sp.ROOT_FOLDER, sp.DATA_FOLDER, self.folder_name, "image sample")) writer = NrrdWriter(msi) writer.write(self.output().path) if __name__ == '__main__': # configure logging logging.basicConfig(filename='invivo_neil_colon' + str(datetime.datetime.now()) + '.log', level=logging.INFO) ch = logging.StreamHandler() ch.setLevel(logging.INFO) logger = logging.getLogger() logger.addHandler(ch) # configure luigi luigi.interface.setup_interface_logging() sch = luigi.scheduler.CentralPlannerScheduler() w = luigi.worker.Worker(scheduler=sch) # run over all subfolders (non-recursively) # to collect the data and generate the results image_file_folder = os.path.join(sp.ROOT_FOLDER, sp.DATA_FOLDER) all_folders = os.listdir(image_file_folder) # each complete stack has elements F0 - F7 only_colon_folders = [ f for f in all_folders if f.startswith("SaO2") ] for f in only_colon_folders: main_task = IPCAICreateOxyImageTask( folder_name=f, batch_prefix= "ipcai_colon_muscle_train") w.add(main_task) w.run() diff --git a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_vivo_small_bowel.py b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_vivo_small_bowel.py index 4e894cae80..115acd2a0c 100644 --- a/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_vivo_small_bowel.py +++ b/Modules/Biophotonics/python/iMC/script_analyze_ipcai_in_vivo_small_bowel.py @@ -1,297 +1,294 @@ # -*- coding: utf-8 -*- """ Created on Fri Aug 14 11:09:18 2015 @author: wirkert """ import os import Image import ImageEnhance import pickle import logging import datetime import numpy as np import luigi import matplotlib.pyplot as plt import SimpleITK as sitk from sklearn.ensemble.forest import RandomForestRegressor from scipy.ndimage.filters import gaussian_filter import tasks_analyze as at import tasks_mc import scriptpaths as sp from msi.io.pngreader import PngReader from msi.io.nrrdreader import NrrdReader from msi.io.nrrdwriter import NrrdWriter import msi.msimanipulations as msimani import msi.imgmani as imgmani import msi.normalize as norm from regression.estimation import estimate_image from regression.linear import LinearSaO2Unmixing from regression.preprocessing import preprocess, preprocess2 from msi.io.tiffringreader import TiffRingReader from sklearn.linear_model.base import LinearRegression sp.ROOT_FOLDER = "/media/wirkert/data/Data/" + \ "2015_11_12_IPCAI_in_vivo" sp.DATA_FOLDER = "small_bowel_images" -sp.FINALS_FOLDER = "small_bowel_images_colorbar20_80" +sp.FINALS_FOLDER = "small_bowel_images_jet" sp.FLAT_FOLDER = "flatfields_liver" # sp.RECORDED_WAVELENGTHS = np.arange(470, 680, 10) * 10 ** -9 new_order = [0, 1, 2, 3, 4, 5, 6, 7] + def resort_image_wavelengths(collapsed_image): return collapsed_image[:, new_order] + def resort_wavelengths(msi): """Neil organized his _wavelengths differently. This function fixes this.""" collapsed_image = imgmani.collapse_image(msi.get_image()) collapsed_image = resort_image_wavelengths(collapsed_image) msi.set_image(np.reshape(collapsed_image, msi.get_image().shape)) # rebind this method since in this recoding the _wavelengths got messed up sp.resort_wavelengths = resort_wavelengths sp.bands_to_sortout = np.array([]) # [0, 1, 2, 20, 19, 18, 17, 16]) class IPCAICreateOxyImageTask(luigi.Task): image_name = luigi.Parameter() - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def requires(self): - return IPCAITrainRegressor(batch_prefix=self.batch_prefix), \ + return IPCAITrainRegressor(df_prefix=self.df_prefix), \ FlatfieldFromPNGFiles() + def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, self.image_name + "_" + - self.batch_prefix + + self.df_prefix + "_summary.png.png")) def run(self): nrrd_reader = NrrdReader() tiff_ring_reader = TiffRingReader() # read the flatfield flat = nrrd_reader.read(self.input()[1].path) # read the msi nr_filters = len(sp.RECORDED_WAVELENGTHS) full_image_name = os.path.join(sp.ROOT_FOLDER, sp.DATA_FOLDER, self.image_name) msi, segmentation = tiff_ring_reader.read(full_image_name, nr_filters) segmentation = np.logical_and(segmentation, (np.max(msi.get_image(), axis=-1) < 1000.)) # read the segmentation # # seg_path = os.path.join(sp.ROOT_FOLDER, sp.DATA_FOLDER, # "segmentation_" + self.image_name + ".nrrd") # segmentation = nrrd_reader.read(seg_path) # segmentation = segmentation.get_image() # read the regressor e_file = open(self.input()[0].path, 'r') e = pickle.load(e_file) # correct image setup filter_nr = int(self.image_name[-6:-5]) original_order = np.arange(nr_filters) new_image_order = np.concatenate(( original_order[nr_filters - filter_nr:], original_order[:nr_filters - filter_nr])) # resort msi to restore original order msimani.get_bands(msi, new_image_order) # correct by flatfield msimani.flatfield_correction(msi, flat) # create artificial rgb rgb_image = msi.get_image()[:, :, [2, 3, 1]] rgb_image /= np.max(rgb_image) rgb_image *= 255. # preprocess the image # sortout unwanted bands print "1" msi.set_image(imgmani.sortout_bands(msi.get_image(), sp.bands_to_sortout)) # zero values would lead to infinity logarithm, thus clip. msi.set_image(np.clip(msi.get_image(), 0.00001, 2. ** 64)) # normalize to get rid of lighting intensity norm.standard_normalizer.normalize(msi) # transform to absorption msi.set_image(-np.log(msi.get_image())) # normalize by l2 for stability norm.standard_normalizer.normalize(msi, "l2") print "2" # estimate sitk_image = estimate_image(msi, e) image = sitk.GetArrayFromImage(sitk_image) plt.figure() print "3" rgb_image = rgb_image.astype(np.uint8) im = Image.fromarray(rgb_image, 'RGB') enh_brightness = ImageEnhance.Brightness(im) im = enh_brightness.enhance(5.) plot_image = np.array(im) top_left_axis = plt.gca() top_left_axis.imshow(plot_image, interpolation='nearest') top_left_axis.xaxis.set_visible(False) top_left_axis.yaxis.set_visible(False) plt.set_cmap("jet") print "4" # plot parametric maps oxy_image = image[:, :] segmentation[0, 0] = 1 segmentation[0, 1] = 1 oxy_image = np.ma.masked_array(image[:, :], (segmentation < 1)) oxy_image[np.isnan(oxy_image)] = 0. oxy_image[np.isinf(oxy_image)] = 0. oxy_mean = np.mean(oxy_image) - oxy_image[0, 0] = 0.2 - oxy_image[0, 1] = 0.8 - oxy_image = np.clip(oxy_image, 0.2, 0.8) + oxy_image[0, 0] = 0.0 + oxy_image[0, 1] = 1. +# oxy_image = np.clip(oxy_image, 0.2, 0.8) # oxy_image[oxy_image > 0.8] = 0.8 # oxy_image[oxy_image < 0.2] = 0.2 at.plot_axis(plt.gca(), oxy_image[:, :], "oxygenation [%]. Mean " + "{0:.1f}".format((oxy_mean * 100.)) + "%") plt.savefig(self.output().path, dpi=500, bbox_inches='tight') # plt.figure() # bvf_image = np.ma.masked_array(image[:, :, 0], (segmentation < 1)) # # bvf_image = image[:, :, 0] # bvf_image[np.isnan(bvf_image)] = 0. # bvf_image[np.isinf(bvf_image)] = 0. # bvf_image[bvf_image > 1.] = 1. # bvf_image[bvf_image < 0.] = 0. # bvf_mean = np.mean(bvf_image) # bvf_image[0, 0] = 0. # bvf_image[1, 1] = 0.1 # at.plot_axis(plt.gca(), bvf_image[:, :] * 100, # "blood volume fraction [%]. Mean " + "{0:.1f}".format((bvf_mean * 100.)) + "%") # # plt.savefig(self.output().path + "_bvf.png", dpi=500, bbox_inches='tight') print "4.5" # fd = open(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, # sp.FINALS_FOLDER, # 'results_summary.csv'), 'a') # fd.write(self.image_name + ", " + str(int(oxy_mean * 100)) + # ", " + "{0:.1f}".format((bvf_mean * 100.)) + "\n") # fd.close() print "5" plt.close("all") - - class IPCAITrainRegressor(luigi.Task): - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "reg_" + - self.batch_prefix)) + self.df_prefix)) def requires(self): - return tasks_mc.CameraBatch(self.batch_prefix) + return tasks_mc.CameraBatch(self.df_prefix) def run(self): # extract data from the batch f = open(self.input().path, "r") batch_train = pickle.load(f) # TODO: fix hack by sorting always to ascending _wavelengths batch_train.reflectances = resort_image_wavelengths( batch_train.reflectances) batch_train._wavelengths = batch_train._wavelengths[new_order] X, y = preprocess2(batch_train, w_percent=0.1, bands_to_sortout=sp.bands_to_sortout) # train regressor reg = RandomForestRegressor(10, min_samples_leaf=10, max_depth=9, n_jobs=-1) # reg = KNeighborsRegressor(algorithm="auto") # reg = LinearRegression() # reg = sklearn.svm.SVR(kernel="rbf", degree=3, C=100., gamma=10.) # reg = LinearSaO2Unmixing() reg.fit(X, y[:, 1]) # reg = LinearSaO2Unmixing() # save regressor f = self.output().open('w') pickle.dump(reg, f) f.close() class FlatfieldFromPNGFiles(luigi.Task): def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, "flatfield" + ".nrrd")) def run(self): reader = PngReader() msi = reader.read(os.path.join(sp.ROOT_FOLDER, sp.FLAT_FOLDER, "Flatfield9cm_I000x_F")) writer = NrrdWriter(msi) msi.set_image(msi.get_image().astype(float)) seg_reader = NrrdReader() segmentation = seg_reader.read(os.path.join(sp.ROOT_FOLDER, sp.FLAT_FOLDER, "segmentation.nrrd")) # msimani.apply_segmentation(msi, segmentation) # msimani.calculate_mean_spectrum(msi) # apply gaussian smoothing for error reduction img = gaussian_filter(msi.get_image(), sigma=(5, 5, 0), order=0) msi.set_image(img) writer.write(self.output().path) - if __name__ == '__main__': # root folder there the data lies logging.basicConfig(filename='small_bowel' + str(datetime.datetime.now()) + '.log', level=logging.INFO) luigi.interface.setup_interface_logging() ch = logging.StreamHandler() ch.setLevel(logging.INFO) logger = logging.getLogger() logger.addHandler(ch) sch = luigi.scheduler.CentralPlannerScheduler() w = luigi.worker.Worker(scheduler=sch) # run over all subfolders (non-recursively) # to collect the data and generate the results image_file_folder = os.path.join(sp.ROOT_FOLDER, sp.DATA_FOLDER) - onlyfiles = [ f for f in os.listdir(image_file_folder) if - os.path.isfile(os.path.join(image_file_folder, f)) ] + onlyfiles = [f for f in os.listdir(image_file_folder) if + os.path.isfile(os.path.join(image_file_folder, f))] onlyfiles.sort() - onlyfiles = [ f for f in onlyfiles if - f.endswith(".tiff") ] + onlyfiles = [f for f in onlyfiles if f.endswith(".tiff") ] for f in onlyfiles: - main_task = IPCAICreateOxyImageTask( - image_name=f, - batch_prefix= - "ipcai_colon_muscle_train") + main_task = IPCAICreateOxyImageTask(image_name=f, + df_prefix="ipcai_colon_muscle_train") w.add(main_task) w.run() diff --git a/Modules/Biophotonics/python/iMC/tasks_mc.py b/Modules/Biophotonics/python/iMC/tasks_mc.py index 97f51f41dd..ca08e6b058 100644 --- a/Modules/Biophotonics/python/iMC/tasks_mc.py +++ b/Modules/Biophotonics/python/iMC/tasks_mc.py @@ -1,119 +1,143 @@ ''' Created on Sep 10, 2015 @author: wirkert ''' import os import pandas as pd import luigi import scriptpaths as sp import mc.dfmanipulations as dfmani from msi.io.spectrometerreader import SpectrometerReader from msi.io.msiwriter import MsiWriter from msi.io.msireader import MsiReader from msi.normalize import NormalizeMean import msi.msimanipulations as msimani class SpectrometerFile(luigi.Task): input_file = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.SPECTROMETER_FOLDER, self.input_file)) class FilterTransmission(luigi.Task): input_file = luigi.Parameter() def requires(self): return SpectrometerFile(self.input_file) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, "processed_transmission" + self.input_file + ".msi")) def run(self): reader = SpectrometerReader() filter_transmission = reader.read(self.input().path) # filter high and low _wavelengths wavelengths = filter_transmission.get_wavelengths() fi_image = filter_transmission.get_image() fi_image[wavelengths < 450 * 10 ** -9] = 0.0 fi_image[wavelengths > 720 * 10 ** -9] = 0.0 # filter elements farther away than +- 30nm name_to_float = float(os.path.splitext(self.input_file)[0]) fi_image[wavelengths < (name_to_float - 30) * 10 ** -9] = 0.0 fi_image[wavelengths > (name_to_float + 30) * 10 ** -9] = 0.0 # elements < 0 are set to 0. fi_image[fi_image < 0.0] = 0.0 - # write it - writer = MsiWriter(filter_transmission) - writer.write(self.output().path) + + # write it to a dataframe + df = pd.DataFrame() + df["wavelengths"] = wavelengths + df["reflectances"] = fi_image + df.to_csv(self.output().path, index=False) class JoinBatches(luigi.Task): df_prefix = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, self.df_prefix + "_" + "all" + ".txt")) def run(self): path = os.path.join(sp.ROOT_FOLDER, sp.MC_DATA_FOLDER) # get all files in the search path files = [ f for f in os.listdir(path) \ if os.path.isfile(os.path.join(path, f)) ] # from these get only those who start with correct batch prefix - df_file_names = \ - [ os.path.join(path, f) for f in files if f.startswith(self.df_prefix)] + df_file_names = [os.path.join(path, f) for f in files + if f.startswith(self.df_prefix)] # load these files dfs = [pd.read_csv(f, header=[0, 1]) for f in df_file_names] # now join them to one batch joined_df = pd.concat(dfs, ignore_index=True) # write it joined_df.to_csv(self.output().path, index=False) class CameraBatch(luigi.Task): """takes a batch of reference data and converts it to the spectra - processed by the camera""" + processed by a camera with the specified wavelengths assuming a 10nm FWHM""" df_prefix = luigi.Parameter() def requires(self): return JoinBatches(self.df_prefix) def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, - self.df_prefix + "_all_camera.txt")) + self.df_prefix + + "_all_camera.txt")) def run(self): # load dataframe df = pd.read_csv(self.input().path, header=[0, 1]) # camera batch creation: dfmani.fold_by_sliding_average(df, 6) dfmani.interpolate_wavelengths(df, sp.RECORDED_WAVELENGTHS) # write it df.to_csv(self.output().path, index=False) +class SpectroCamBatch(luigi.Task): + """ + Same as CameraBatch in purpose but adapts the batch to our very specific + SpectroCam set-up. + """ + df_prefix = luigi.Parameter() + + def requires(self): + return JoinBatches(self.df_prefix) + + def output(self): + return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, + sp.RESULTS_FOLDER, + self.df_prefix + + "_all_spectrocam.txt")) + + def run(self): + pass + + def get_transmission_data(input_path, desired_wavelengths): """small helper method to get filter transmission from file at input_path. The _wavelengths are interpolated to the desired ones""" reader = MsiReader() filter_transmission = reader.read(input_path) msimani.interpolate_wavelengths(filter_transmission, desired_wavelengths) # normalize to one normalizer = NormalizeMean() normalizer.normalize(filter_transmission) return filter_transmission diff --git a/Modules/Biophotonics/python/iMC/tasks_regression.py b/Modules/Biophotonics/python/iMC/tasks_regression.py index e10197b89c..aaba47567b 100644 --- a/Modules/Biophotonics/python/iMC/tasks_regression.py +++ b/Modules/Biophotonics/python/iMC/tasks_regression.py @@ -1,306 +1,306 @@ ''' Created on Aug 26, 2015 @author: wirkert ''' import os import glob import pickle import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.grid_search import GridSearchCV from sklearn.cross_validation import KFold from sklearn import tree from sklearn.linear_model import LogisticRegression import luigi from msi.normalize import standard_normalizer from msi.msi import Msi from msi.imgmani import select_n_reflectances from msi.io.nrrdreader import NrrdReader import msi.msimanipulations as msimani import msi.imgmani as imgmani import tasks_preprocessing as ppt import scriptpaths as sp import tasks_mc as mc def extract_mc_data(batch): batch = pickle.load(open(batch, 'r')) # in case there are bands to sort out, discard them here batch.reflectances = imgmani.sortout_bands(batch.reflectances, sp.bands_to_sortout) # create fake msi to be able to use normalizer fake_msi = Msi(image=batch.reflectances) # if normalizer which requires wavelengths is needed these have to be # added here standard_normalizer.normalize(fake_msi) batch.reflectances = fake_msi.get_image() # scaled_reflectances = preprocessing.scale(reflectances) return batch.mucosa, batch.reflectances def calc_weights(sourceReflectances, targetReflectances): targetReflectances = select_n_reflectances(targetReflectances, sourceReflectances.shape[0]) labelsSource = np.zeros(sourceReflectances.shape[0]) labelsTarget = np.ones(targetReflectances.shape[0]) assert(labelsSource.shape == labelsTarget.shape) allReflectances = np.concatenate((sourceReflectances, targetReflectances)) allLabels = np.concatenate((labelsSource, labelsTarget)) # train logistic regression kf = KFold(allReflectances.shape[0], 5, shuffle=True) # todo include intercept scaling paramter param_grid = [ {'C': np.logspace(-3, 6, 10), 'fit_intercept':['True', 'False']} ] best_lr = GridSearchCV(LogisticRegression(), param_grid, cv=kf, n_jobs=-1) best_lr.fit(allReflectances, allLabels) sourceProbabilities = best_lr.predict_proba(sourceReflectances) return sourceProbabilities[:, 1] / sourceProbabilities[:, 0] class ParameterFile(luigi.Task): file_name_prefix = luigi.Parameter() def output(self): filename = glob.glob(os.path.join(sp.ROOT_FOLDER, sp.MC_DATA_FOLDER, self.file_name_prefix + "_*")) return luigi.LocalTarget(os.path.abspath(filename[0])) class ReflectanceFile(luigi.Task): file_name_prefix = luigi.Parameter() def output(self): filename = glob.glob(os.path.join(sp.ROOT_FOLDER, sp.MC_DATA_FOLDER, self.file_name_prefix + "reflectances*")) return luigi.LocalTarget(os.path.abspath(filename[0])) class RegressorFile(luigi.Task): regressor = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, self.regressor)) def train(x, y): # set up cross-validation and grid search kf = KFold(x.shape[0], 5, shuffle=True) param_grid = [ {"n_estimators": np.array([50]), "max_depth": np.arange(5, 15, 2), "min_samples_leaf": np.array([1, 10, 15, 20]) * x.shape[0] / 1000}] # train forest rfs = GridSearchCV(RandomForestRegressor(50, max_depth=8, n_jobs=-1), param_grid, cv=kf, n_jobs=-1) rfs.fit(x, y) rf = rfs.best_estimator_ tree.export_graphviz(rf.estimators_[0], out_file='tree.dot') # rf = RandomForestRegressor(50, max_depth=13, # min_samples_leaf=5 * x.shape[0] / 1000, n_jobs=-1) # rf.fit(x, y) print("best random forest parameters: " + str(rf.get_params())) # return the best forest return rf class TrainForest(luigi.Task): - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "rf_" + - self.batch_prefix)) + self.df_prefix)) def requires(self): - return mc.CameraBatch(self.batch_prefix) + return mc.CameraBatch(self.df_prefix) def run(self): train_params, train_refl = extract_mc_data(self.input().path) rf = train(train_refl, train_params) # save the forest f = self.output().open('w') pickle.dump(rf, f) f.close() class TrainForestForwardModel(luigi.Task): - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, "rf_forward_" + - self.batch_prefix)) + self.df_prefix)) def requires(self): - return mc.CameraBatch(self.batch_prefix) + return mc.CameraBatch(self.df_prefix) def run(self): train_params, train_refl = extract_mc_data(self.input().path) rf = train(train_params, train_refl) # save the forest f = open(self.output().path, 'w') pickle.dump(rf, f) f.close() class DataAugmentation(luigi.Task): - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "augmented_" + - self.batch_prefix + + self.df_prefix + "_params" + ".npy")), \ luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "augmented_" + - self.batch_prefix + + self.df_prefix + "reflectances" + ".npy")) def requires(self): - return TrainForestForwardModel(self.batch_prefix), \ - ParameterFile(self.batch_prefix) + return TrainForestForwardModel(self.df_prefix), \ + ParameterFile(self.df_prefix) def run(self): # n samples shall be created n = 10 ** 5 # load forward forest rf_file = self.input()[0].open('r') forward_rf = pickle.load(rf_file) # find ranges in which samples shall be drawn params = np.load(self.input()[1].path) params = params[:, 0:4] params_min = np.min(params, axis=0) params_max = np.max(params, axis=0) # create parameter array enh_params = np.zeros((n, params.shape[1])) for i in range(n): # draw uniform sample in parameter range enh_param = (params_max - params_min) * \ np.random.random_sample(params_min.shape) + params_min for e in enh_param: noise = np.random.normal(e, e * 0.1) e += noise enh_params[i, :] = enh_param enh_refls = forward_rf.predict(enh_params) np.save(self.output()[0].path, enh_params) np.save(self.output()[1].path, enh_refls) class TrainForestAugmentedData(luigi.Task): - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "rf_augmented_" + - self.batch_prefix)) + self.df_prefix)) def requires(self): - return DataAugmentation(self.batch_prefix) + return DataAugmentation(self.df_prefix) def run(self): train_params, train_refl = extract_mc_data(self.input()[0].path, self.input()[1].path) train_params = train_params[:, 0:4] rf = train(train_refl, train_params) # save the forest f = open(self.output().path, 'w') pickle.dump(rf, f) f.close() class CalculateWeightsFromSegmentation(luigi.Task): imageName = luigi.Parameter() - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "weights_" + - self.batch_prefix + + self.df_prefix + "_image_" + self.imageName + ".npy")) def requires(self): - return ParameterFile(self.batch_prefix), \ - ReflectanceFile(self.batch_prefix), \ + return ParameterFile(self.df_prefix), \ + ReflectanceFile(self.df_prefix), \ ppt.PreprocessMSI(imageName=self.imageName), \ ppt.SegmentMSI(imageName=self.imageName) def run(self): train_params, train_refl = extract_mc_data(self.input()[0].path, self.input()[1].path) # get weights reader = NrrdReader() msi = reader.read(self.input()[2].path) segmentation = reader.read(self.input()[3].path).get_image() msimani.apply_segmentation(msi, segmentation) non_masked_elemnts = imgmani.remove_masked_elements(msi.get_image()) # collapsed_image = np.ma.compress_rows(collapsed_image) weights = calc_weights(train_refl, non_masked_elemnts) assert(weights.shape[0] == train_refl.shape[0]) np.save(self.output().path, weights) class TrainForestDA(luigi.Task): imageName = luigi.Parameter() - batch_prefix = luigi.Parameter() + df_prefix = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(sp.ROOT_FOLDER, sp.RESULTS_FOLDER, sp.FINALS_FOLDER, "rf_" + self.imageName + "_" + - self.batch_prefix)) + self.df_prefix)) def requires(self): - return mc.CameraBatch(self.batch_prefix), \ + return mc.CameraBatch(self.df_prefix), \ CalculateWeightsFromSegmentation(imageName=self.imageName, - file_name_prefix_training=self.batch_prefix) + file_name_prefix_training=self.df_prefix) def run(self): train_params, train_refl = extract_mc_data(self.input()[0].path) # get weights weights = np.load(self.input()[2].path) # unfortunately no grid search can be used, because it doesn't # allow the additional weights paramter rf = RandomForestRegressor(50, max_depth=13, max_features=8, min_samples_leaf=5, n_jobs=-1) rf.fit(train_refl, train_params, weights) # save the forest f = open(self.output().path, 'w') pickle.dump(rf, f) f.close()