diff --git a/__main__.py b/__main__.py index 3643368..8b13789 100644 --- a/__main__.py +++ b/__main__.py @@ -1,91 +1 @@ -#!/usr/bin/env python -# -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) -import os -import sys -import time -import argparse -ROOT = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) -sys.path.append(ROOT) - -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase -from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase -from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase -from hyppopy.workflows.unet_usecase.unet_usecase import unet_usecase -from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase -from hyppopy.workflows.imageregistration_usecase.imageregistration_usecase import imageregistration_usecase - - -def print_warning(msg): - print("\n!!!!! WARNING !!!!!") - print(msg) - sys.exit() - - -def args_check(args): - if not args.workflow: - print_warning("No workflow specified, check --help") - if not args.config: - print_warning("Missing config parameter, check --help") - if not os.path.isfile(args.config): - print_warning(f"Couldn't find configfile ({args.config}), please check your input --config") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Hyppopy UseCase Examples Executable.') - parser.add_argument('-w', '--workflow', type=str, help='workflow to be executed') - parser.add_argument('-o', '--output', type=str, default=None, help='output path to store result') - parser.add_argument('-c', '--config', type=str, help='config filename, .xml or .json formats are supported.' - 'pass a full path filename or the filename only if the' - 'configfile is in the data folder') - - args = parser.parse_args() - args_check(args) - - ProjectManager.read_config(args.config) - - if args.output is not None: - ProjectManager.register_member("output_dir", args.output) - - if args.workflow == "svc_usecase": - uc = svc_usecase() - elif args.workflow == "randomforest_usecase": - uc = randomforest_usecase() - elif args.workflow == "knc_usecase": - uc = knc_usecase() - elif args.workflow == "adaboost_usecase": - uc = adaboost_usecase() - elif args.workflow == "unet_usecase": - uc = unet_usecase() - elif args.workflow == "imageregistration_usecase": - uc = imageregistration_usecase() - else: - print("No workflow called {} found!".format(args.workflow)) - sys.exit() - - print("\nStart optimization...") - start = time.process_time() - uc.run(save=True) - end = time.process_time() - - print("Finished optimization!\n") - print("Total Time: {}s\n".format(end-start)) - res, best = uc.get_results() - print("---- Optimal Parameter -----\n") - for p in best.items(): - print(" - {}\t:\t{}".format(p[0], p[1])) diff --git a/examples/quality_tests.py b/examples/quality_tests.py index d1085ca..584cdba 100644 --- a/examples/quality_tests.py +++ b/examples/quality_tests.py @@ -1,345 +1,345 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import sys import time import argparse import tempfile import numpy as np import pandas as pd try: import hyppopy as hp from hyppopy.globals import ROOT - from hyppopy.virtualfunction import VirtualFunction + from hyppopy.VirtualFunction import VirtualFunction except Exception as e: sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) import hyppopy as hp from hyppopy.globals import ROOT - from hyppopy.virtualfunction import VirtualFunction + from hyppopy.VirtualFunction import VirtualFunction TEMP = tempfile.gettempdir() DATADIR = os.path.join(os.path.join(ROOT, os.path.join('hyppopy', 'virtualparameterspace')), "6D") vfunc = VirtualFunction() vfunc.load_images(DATADIR) minima = vfunc.minima() # for i in range(6): # mini = minima[i] # vfunc.plot(i, title="axis_{} min_x={} min_loss={}".format(str(i).zfill(2), np.mean(mini[0]), mini[1])) def blackboxfunction(data, params): return sum(vfunc(*params.values())) def getConfig(*args, **kwargs): if 'output_dir' in kwargs.keys() and kwargs['output_dir'] is not None: output_dir = kwargs['output_dir'] else: output_dir = TEMP if 'plugin' in kwargs.keys(): plugin = kwargs['plugin'] else: plugin = 'hyperopt' max_iterations = 0 if 'max_iterations' in kwargs.keys(): max_iterations = kwargs['max_iterations'] if len(args) < 6: print("Missing hyperparameter abortion!") sys.exit() config = { "hyperparameter": { "axis_0": { "domain": "uniform", "data": args[0], "type": "float" }, "axis_1": { "domain": "uniform", "data": args[1], "type": "float" }, "axis_2": { "domain": "uniform", "data": args[2], "type": "float" }, "axis_3": { "domain": "uniform", "data": args[3], "type": "float" }, "axis_4": { "domain": "uniform", "data": args[4], "type": "float" }, "axis_5": { "domain": "uniform", "data": args[5], "type": "float" } }, "settings": { "solver_plugin": { "max_iterations": max_iterations, "use_plugin": plugin, "output_dir": output_dir } } } return config def test_randomsearch(output_dir): print("#" * 30) print("# RANDOMSEARCH") print("# output_dir={}".format(output_dir)) print("#" * 30) ranges = [[0, 1], [0, 800], [-1, 1], [0, 5], [0, 10000], [0, 10]] args = {'plugin': 'randomsearch', 'output_dir': output_dir} config = getConfig(*ranges, **args) return config def test_hyperopt(output_dir): print("#" * 30) print("# HYPEROPT") print("# output_dir={}".format(output_dir)) print("#" * 30) ranges = [[0, 1], [0, 800], [-1, 1], [0, 5], [0, 10000], [0, 10]] args = {'plugin': 'hyperopt', 'output_dir': output_dir} config = getConfig(*ranges, **args) return config def test_optunity(output_dir): print("#" * 30) print("# OPTUNITY") print("# output_dir={}".format(output_dir)) print("#" * 30) ranges = [[0, 1], [0, 800], [-1, 1], [0, 5], [0, 10000], [0, 10]] args = {'plugin': 'optunity', 'output_dir': output_dir} config = getConfig(*ranges, **args) return config def analyse_iteration_characteristics(configs): N = 50 num_of_iterations = [5, 10, 25, 50, 100, 250, 500, 750, 1000, 1500, 2000] results = {'iteration': [], 'time_overhead': [], 'time_overhead_std': [], 'accuracy': [], 'accuracy_std': [], 'plugin': []} accuracies = {} time_overheads = {} for plugin in configs.keys(): accuracies[plugin] = [] time_overheads[plugin] = [] for it in num_of_iterations: for plugin, config in configs.items(): print("\riteration loop: {} for plugin {}".format(it, plugin)) for p, v in accuracies.items(): v.clear() for p, v in time_overheads.items(): v.clear() for n in range(N): print("\rrepeat loop: {}".format(n), end="") config["settings"]["solver_plugin"]["max_iterations"] = it if not hp.ProjectManager.set_config(config): print("Invalid config dict!") sys.exit() solver = hp.SolverFactory.get_solver() solver.set_loss_function(blackboxfunction) solver.set_data(None) start = time.process_time() solver.run() end = time.process_time() time_overheads[plugin].append(end-start) res, best = solver.get_results() best_loss = 0 for i, p in enumerate(best.items()): best_loss += minima[i][1] reached_loss = np.min(res["losses"].values) accuracies[plugin].append(100.0/best_loss*reached_loss) print("\r") results['iteration'].append(it) results['time_overhead'].append(np.mean(time_overheads[plugin])) results['accuracy'].append(np.mean(accuracies[plugin])) results['time_overhead_std'].append(np.std(time_overheads[plugin])) results['accuracy_std'].append(np.std(accuracies[plugin])) results['plugin'].append(plugin) return results def analyse_random_normal_search(output_dir): config = { "hyperparameter": { "axis_0": { "domain": "normal", "data": [0.0, 0.2], "type": "float" }, "axis_1": { "domain": "normal", "data": [500, 700], "type": "float" }, "axis_2": { "domain": "normal", "data": [-0.2, 0.9], "type": "float" }, "axis_3": { "domain": "normal", "data": [0.0, 3.0], "type": "float" }, "axis_4": { "domain": "normal", "data": [6000, 10000], "type": "float" }, "axis_5": { "domain": "normal", "data": [3, 7], "type": "float" } }, "settings": { "solver_plugin": { "max_iterations": 0, "use_plugin": 'randomsearch', "output_dir": output_dir } } } N = 50 num_of_iterations = [5, 10, 25, 50, 100, 250, 500, 750, 1000, 1500, 2000] results = {'iteration': [], 'time_overhead': [], 'time_overhead_std': [], 'accuracy': [], 'accuracy_std': []} accuracies = [] time_overheads = [] for it in num_of_iterations: config["settings"]["solver_plugin"]["max_iterations"] = it print("\riteration loop: {}".format(it)) accuracies.clear() time_overheads.clear() for n in range(N): print("\rrepeat loop: {}".format(n), end="") if not hp.ProjectManager.set_config(config): print("Invalid config dict!") sys.exit() solver = hp.SolverFactory.get_solver() solver.set_loss_function(blackboxfunction) solver.set_data(None) start = time.process_time() solver.run() end = time.process_time() time_overheads.append(end - start) res, best = solver.get_results() best_loss = 0 for i, p in enumerate(best.items()): best_loss += minima[i][1] reached_loss = np.min(res["losses"].values) accuracies.append(100.0 / best_loss * reached_loss) print("\r") results['iteration'].append(it) results['time_overhead'].append(np.mean(time_overheads)) results['accuracy'].append(np.mean(accuracies)) results['time_overhead_std'].append(np.std(time_overheads)) results['accuracy_std'].append(np.std(accuracies)) return results if __name__ == "__main__": print("") parser = argparse.ArgumentParser(description='Hyppopy Quality Test Executable') parser.add_argument('-o', '--output', type=str, default=None, help='output path to store result') parser.add_argument('-p', '--plugin', type=str, default=None, help='if set analysis is only executed on this plugin') args = parser.parse_args() do_analyse_iteration_characteristics = True do_analyse_random_normal_search = False funcs = [x for x in locals().keys() if x.startswith("test_")] configs = {} for f in funcs: if args.plugin is not None: if not f.endswith(args.plugin): continue configs[f.split("_")[1]] = locals()[f](args.output) if do_analyse_iteration_characteristics: start = time.process_time() data = analyse_iteration_characteristics(configs) end = time.process_time() print("Total duration analyse_iteration_characteristics: {} min".format((end-start)/60)) df = pd.DataFrame.from_dict(data) fname = os.path.join(args.output, "analyse_iteration_characteristics.csv") df.to_csv(fname, index=False) if do_analyse_random_normal_search: start = time.process_time() data = analyse_random_normal_search(args.output) end = time.process_time() print("Total duration analyse_random_normal_search: {} min".format((end - start) / 60)) df = pd.DataFrame.from_dict(data) fname = os.path.join(args.output, "analyse_random_normal_search.csv") df.to_csv(fname, index=False) diff --git a/examples/solver_tutorial.py b/examples/solver_tutorial.py index 1650b68..2a2300f 100644 --- a/examples/solver_tutorial.py +++ b/examples/solver_tutorial.py @@ -1,110 +1,128 @@ -# coding: utf-8 +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + import os -import time +import sys import tempfile -import hyppopy as hp - +from hyppopy.HyppopyProject import HyppopyProject +from hyppopy.solver.HyperoptSolver import HyperoptSolver +from hyppopy.solver.OptunitySolver import OptunitySolver +from hyppopy.solver.RandomsearchSolver import RandomsearchSolver +from hyppopy.solver.GridsearchSolver import GridsearchSolver +from hyppopy.BlackboxFunction import BlackboxFunction -# ### Get Some Data +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from sklearn.svm import SVC from sklearn.datasets import load_iris -iris_data = load_iris() -input_data = [iris_data.data, iris_data.target] - +from sklearn.model_selection import cross_val_score -# ### Setup ProjectManager -# -# We could read the configuration from .json or .xml using ProjectManager.read_config, but parameters can can also be set via a dictionary. -# All subsections in the section hyperparameter represent a hyperparameter. Top-level of each hyperparameter is the name. Additionally one -# needs to specifiy a domain [uniform, categorical, loguniform, normal], a type [str, int, float] and the data, wich is either a range [from, to] -# or a list of categories. All key value pairs in the section settings are added to the ProjectManager as member variables. If you add the -# section custom you can add your own workflow specific parameter. The ProjectManager is a Singleton, thus need no instanciation and can be used -# everywhere but exists only once! config = { "hyperparameter": { "C": { "domain": "uniform", - "data": [0, 20], + "data": [0.0001, 20, 20], "type": "float" }, "gamma": { "domain": "uniform", - "data": [0.0001, 20.0], + "data": [0.0001, 20.0, 20], "type": "float" }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": "str" }, "decision_function_shape": { "domain": "categorical", "data": ["ovo", "ovr"], "type": "str" } }, "settings": { - "solver_plugin": { + "solver": { "max_iterations": 300, - "use_plugin" : "hyperopt", + "plugin": "gridsearch", "output_dir": os.path.join(tempfile.gettempdir(), 'results') }, "custom": { "the_answer": 42 } }} -if hp.ProjectManager.set_config(config): - print("Valid config dict set!") -else: - print("Invalid config dict!") +project = HyppopyProject(config=config) print("--------------------------------------------------------------") -print("max_iterations:\t{}".format(hp.ProjectManager.max_iterations)) -print("use_plugin:\t{}".format(hp.ProjectManager.use_plugin)) -print("output_dir:\t{}".format(hp.ProjectManager.output_dir)) -print("the_answer:\t{}".format(hp.ProjectManager.the_answer)) - +print("max_iterations:\t{}".format(project.solver_max_iterations)) +print("plugin:\t{}".format(project.solver_plugin)) +print("output_dir:\t{}".format(project.solver_output_dir)) +print("the_answer:\t{}".format(project.custom_the_answer)) -# ### Define the problem -# -# We define a blackbox function with the signature func(data, params). The first parameter data is whatever we tell the solver later. -# So we are free in defining the type of data we want to give our blackbox function. However, the parameter params is fixed and of type -# dict. Each iteration, the solver will create a sample of each of the hyperparameter defined via the config set and throw it into our -# blackbox function. -# E.g. in our case above params in one round could look like {"C": 0.3, "gamma": 2.8, "kernel": "poly", "decision_function_shape", "ovo"}. -from sklearn.svm import SVC -from sklearn.model_selection import cross_val_score - -def my_blackbox_function(data, params): +def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() -# ### Feeding the Solver -# -# Now everything is prepared to set up the solver. First we request the solver from the SolverFactory which assembled a solver from the -# plugin parts we specified via the use_plugin parameter. Then we only need to set the our blackbox function and the data. - -solver = hp.SolverFactory.get_solver() -solver.set_loss_function(my_blackbox_function) -solver.set_data(input_data) - - -# ### Start the Solver and get the results - -print("\nStart optimization...") -start = time.process_time() +def my_dataloader_function(**kwargs): + print("Dataloading...") + # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. + print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) + iris_data = load_iris() + return [iris_data.data, iris_data.target] + + +def my_preprocess_function(**kwargs): + print("Preprocessing...") + # kwargs['data'] allows accessing the input data + print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) + # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. + print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param'])) + # if the preprocessing function returns something, + # the input data will be replaced with the data returned by this function. + x = kwargs['data'][0] + y = kwargs['data'][1] + for i in range(x.shape[0]): + x[i, :] += kwargs['params']['my_preproc_param'] + return [x, y] + + +def my_callback_function(**kwargs): + print("\r{}".format(kwargs), end="") + + +blackbox = BlackboxFunction(blackbox_func=my_loss_function, + dataloader_func=my_dataloader_function, + preprocess_func=my_preprocess_function, + callback_func=my_callback_function, + #data=input_data, # data can be set directly or via a dataloader function + my_preproc_param=1, + my_dataloader_input='could/be/a/path') + + +if project.solver_plugin == "hyperopt": + solver = HyperoptSolver(project) +elif project.solver_plugin == "optunity": + solver = OptunitySolver(project) +elif project.solver_plugin == "randomsearch": + solver = RandomsearchSolver(project) +elif project.solver_plugin == "gridsearch": + solver = GridsearchSolver(project) + +if solver is not None: + solver.blackbox = blackbox solver.run() -end = time.process_time() -print("Finished optimization!\n") -print("Total Time: {}s\n".format(end-start)) -res, best = solver.get_results() -print("---- Optimal Parameter -----\n") -for p in best.items(): - print(" - {} : {}".format(p[0], p[1])) - -solver.save_results() - diff --git a/hyppopy/BlackboxFunction.py b/hyppopy/BlackboxFunction.py new file mode 100644 index 0000000..0a7bdc6 --- /dev/null +++ b/hyppopy/BlackboxFunction.py @@ -0,0 +1,98 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import logging +import functools +from hyppopy.globals import DEBUGLEVEL + +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + + +def default_kwargs(**defaultKwargs): + def actual_decorator(fn): + @functools.wraps(fn) + def g(*args, **kwargs): + defaultKwargs.update(kwargs) + return fn(*args, **defaultKwargs) + return g + return actual_decorator + + +class BlackboxFunction(object): + + @default_kwargs(blackbox_func=None, dataloader_func=None, preprocess_func=None, callback_func=None, data=None) + def __init__(self, **kwargs): + self._blackbox_func = None + self._preprocess_func = None + self._dataloader_func = None + self._callback_func = None + self._raw_data = None + self._data = None + self.setup(kwargs) + + def __call__(self, **kwargs): + return self.blackbox_func(self.data, kwargs) + + def setup(self, kwargs): + self._blackbox_func = kwargs['blackbox_func'] + self._preprocess_func = kwargs['preprocess_func'] + self._dataloader_func = kwargs['dataloader_func'] + self._callback_func = kwargs['callback_func'] + self._raw_data = kwargs['data'] + self._data = self._raw_data + del kwargs['blackbox_func'] + del kwargs['preprocess_func'] + del kwargs['dataloader_func'] + del kwargs['data'] + params = kwargs + + if self.dataloader_func is not None: + self._raw_data = self.dataloader_func(params=params) + assert self._raw_data is not None, "Missing data exception!" + assert self.blackbox_func is not None, "Missing blackbox fucntion exception!" + if self.preprocess_func is not None: + result = self.preprocess_func(data=self._raw_data, params=params) + if result is not None: + self._data = result + else: + self._data = self._raw_data + else: + self._data = self._raw_data + + @property + def blackbox_func(self): + return self._blackbox_func + + @property + def preprocess_func(self): + return self._preprocess_func + + @property + def dataloader_func(self): + return self._dataloader_func + + @property + def callback_func(self): + return self._callback_func + + @property + def raw_data(self): + return self._raw_data + + @property + def data(self): + return self._data diff --git a/hyppopy/HyppopyProject.py b/hyppopy/HyppopyProject.py new file mode 100644 index 0000000..0446aa3 --- /dev/null +++ b/hyppopy/HyppopyProject.py @@ -0,0 +1,74 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +from .globals import * + +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + + +class HyppopyProject(object): + + def __init__(self, config=None): + self._hyperparameter = None + self._settings = None + self._extmembers = [] + if config is not None: + self.set_config(config) + + def clear(self): + self._hyperparameter = None + self._settings = None + for added in self._extmembers: + if added in self.__dict__.keys(): + del self.__dict__[added] + self._extmembers = [] + + def set_config(self, config): + self.clear() + assert isinstance(config, dict), "Input Error, config of type {} not supported!".format(type(config)) + assert HYPERPARAMETERPATH in config.keys(), "Missing hyperparameter section in config dict" + assert SETTINGSPATH in config.keys(), "Missing settings section in config dict" + self._hyperparameter = config[HYPERPARAMETERPATH] + self._settings = config[SETTINGSPATH] + self.parse_members() + + def parse_members(self): + for section_name, content in self.settings.items(): + for name, value in content.items(): + member_name = section_name + "_" + name + setattr(self, member_name, value) + self._extmembers.append(member_name) + + def get_typeof(self, hyperparametername): + if not hyperparametername in self.hyperparameter.keys(): + return None + dtype = self.hyperparameter[hyperparametername]["type"] + if dtype == 'str': + return str + if dtype == 'int': + return int + if dtype == 'float': + return float + + @property + def hyperparameter(self): + return self._hyperparameter + + @property + def settings(self): + return self._settings + + diff --git a/hyppopy/singleton.py b/hyppopy/Singleton.py similarity index 100% rename from hyppopy/singleton.py rename to hyppopy/Singleton.py diff --git a/hyppopy/Solver/GridsearchSolver.py b/hyppopy/Solver/GridsearchSolver.py new file mode 100644 index 0000000..382be65 --- /dev/null +++ b/hyppopy/Solver/GridsearchSolver.py @@ -0,0 +1,466 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import copy +import logging +import datetime +import numpy as np +from hyperopt import Trials +from scipy.stats import norm +from itertools import product +from hyppopy.globals import DEBUGLEVEL +from .HyppopySolver import HyppopySolver + +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + + +def get_uniform_axis_sample(a, b, N, dtype): + """ + returns a uniform sample x(n) in the range [a,b] sampled at N pojnts + :param a: left value range bound + :param b: right value range bound + :param N: discretization of intervall [a,b] + :param dtype: data type + :return: [list] axis range + """ + assert a < b, "condition a < b violated!" + assert isinstance(N, int), "condition N of type int violated!" + assert isinstance(dtype, str), "condition type of type str violated!" + if dtype == "int": + return list(np.linspace(a, b, N).astype(int)) + elif dtype == "float" or dtype == "double": + return list(np.linspace(a, b, N)) + else: + raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) + + +def get_norm_cdf(N): + """ + returns a normed gaussian cdf (range [0,1]) with N sampling points + :param N: sampling points + :return: [ndarray] gaussian cdf function values + """ + assert isinstance(N, int), "condition N of type int violated!" + even = True + if N % 2 != 0: + N -= 1 + even = False + N = int(N/2) + sigma = 1/3 + x = np.linspace(0, 1, N) + y1 = norm.cdf(x, loc=0, scale=sigma)-0.5 + if not even: + y1 = np.append(y1, [0.5]) + y2 = 1-(norm.cdf(x, loc=0, scale=sigma)-0.5) + y2 = np.flip(y2, axis=0) + y = np.concatenate((y1, y2), axis=0) + return y + + +def get_gaussian_axis_sample(a, b, N, dtype): + """ + returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points + :param a: left value range bound + :param b: right value range bound + :param N: discretization of intervall [a,b] + :param dtype: data type + :return: [list] axis range + """ + assert a < b, "condition a < b violated!" + assert isinstance(N, int), "condition N of type int violated!" + assert isinstance(dtype, str), "condition type of type str violated!" + + data = [] + for n in range(N): + x = a + get_norm_cdf(N)[n]*(b-a) + if dtype == "int": + data.append(int(x)) + elif dtype == "float" or dtype == "double": + data.append(x) + else: + raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) + return data + + +def get_logarithmic_axis_sample(a, b, N, dtype): + """ + returns a function value f(n) where f is logarithmic function e^x sampling + the exponent range [log(a), log(b)] linear at N sampling points. + The function values returned are in the range [a, b]. + :param a: left value range bound + :param b: right value range bound + :param N: discretization of intervall [a,b] + :param dtype: data type + :return: [list] axis range + """ + assert a < b, "condition a < b violated!" + assert isinstance(N, int), "condition N of type int violated!" + assert isinstance(dtype, str), "condition type of type str violated!" + lexp = np.log(a) + rexp = np.log(b) + exp_range = np.linspace(lexp, rexp, N) + + data = [] + for n in range(exp_range.shape[0]): + x = np.exp(exp_range[n]) + if dtype == "int": + data.append(int(x)) + elif dtype == "float" or dtype == "double": + data.append(x) + else: + raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) + return data + + +class GridsearchSolver(HyppopySolver): + + def __init__(self, project=None): + HyppopySolver.__init__(self, project) + self._tid = None + + def loss_function(self, params): + loss = None + vals = {} + idx = {} + for key, value in params.items(): + vals[key] = [value] + idx[key] = [self._tid] + trial = {'tid': self._tid, + 'result': {'loss': None, 'status': 'ok'}, + 'misc': { + 'tid': self._tid, + 'idxs': idx, + 'vals': vals + }, + 'book_time': datetime.datetime.now(), + 'refresh_time': None + } + try: + loss = self.blackbox(**params) + if loss is None: + trial['result']['loss'] = np.nan + trial['result']['status'] = 'failed' + else: + trial['result']['loss'] = loss + except Exception as e: + LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) + trial['result']['loss'] = np.nan + trial['result']['status'] = 'failed' + trial['refresh_time'] = datetime.datetime.now() + self._trials.trials.append(trial) + if self.blackbox.callback_func is not None: + cbd = copy.deepcopy(params) + cbd['iterations'] = self._tid + 1 + cbd['loss'] = loss + cbd['status'] = trial['result']['status'] + self.blackbox.callback_func(**cbd) + return + + def execute_solver(self, searchspace): + self._tid = 0 + self._trials = Trials() + + for x in product(*searchspace[1]): + params = {} + for name, value in zip(searchspace[0], x): + params[name] = value + try: + self.loss_function(params) + self._tid += 1 + except Exception as e: + msg = "internal error in randomsearch execute_solver occured. {}".format(e) + LOG.error(msg) + raise BrokenPipeError(msg) + self.best = self._trials.argmin + + def convert_searchspace(self, hyperparameter): + searchspace = [[], []] + for name, param in hyperparameter.items(): + if param["domain"] == "categorical": + searchspace[0].append(name) + searchspace[1].append(param["data"]) + elif param["domain"] == "uniform": + searchspace[0].append(name) + searchspace[1].append(get_uniform_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) + elif param["domain"] == "normal": + searchspace[0].append(name) + searchspace[1].append(get_gaussian_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) + elif param["domain"] == "loguniform": + searchspace[0].append(name) + searchspace[1].append(get_logarithmic_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) + return searchspace + + + + +# def get_uniform_axis_sample(n, a, b, N): +# """ +# returns a uniform sample x(n) in the range [a,b] sampled at N pojnts +# :param n: input position within range [0,N] +# :param a: left value range bound +# :param b: right value range bound +# :param N: discretization of intervall [a,b] +# :return: [float] x(n) +# """ +# assert a < b, "condition a < b violated!" +# assert n >= 0, "condition n >= 0 violated!" +# assert n < N, "condition n < N violated!" +# assert isinstance(n, int), "condition n of type int violated!" +# assert isinstance(N, int), "condition N of type int violated!" +# return np.linspace(a, b, N)[n] +# +# +# def get_norm_cdf(N): +# """ +# returns a normed gaussian cdf (range [0,1]) with N sampling points +# :param N: sampling points +# :return: [ndarray] gaussian cdf function values +# """ +# assert isinstance(N, int), "condition N of type int violated!" +# even = True +# if N % 2 != 0: +# N -= 1 +# even = False +# N = int(N/2) +# sigma = 1/3 +# x = np.linspace(0, 1, N) +# y1 = norm.cdf(x, loc=0, scale=sigma)-0.5 +# if not even: +# y1 = np.append(y1, [0.5]) +# y2 = 1-(norm.cdf(x, loc=0, scale=sigma)-0.5) +# y2 = np.flip(y2, axis=0) +# y = np.concatenate((y1, y2), axis=0) +# return y +# +# +# def get_gaussian_axis_sample(n, a, b, N): +# """ +# returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points +# :param n: input position within range [0,N] +# :param a: left value range bound +# :param b: right value range bound +# :param N: discretization of intervall [a,b] +# :return: [float] f(n) +# """ +# assert a < b, "condition a < b violated!" +# assert n >= 0, "condition n >= 0 violated!" +# assert n < N, "condition n < N violated!" +# assert isinstance(n, int), "condition n of type int violated!" +# assert isinstance(N, int), "condition N of type int violated!" +# return a + get_norm_cdf(N)[n]*(b-a) +# +# +# def get_logarithmic_axis_sample(n, a, b, N): +# """ +# returns a function value f(n) where f is logarithmic function e^x sampling +# the exponent range [log(a), log(b)] linear at N sampling points. +# The function values returned are in the range [a, b]. +# :param n: sampling point [0, N-1] +# :param a: left range bound +# :param b: right range bound +# :param N: discretization of intervall [log(a),log(b)] +# :return: [float] f(x) +# """ +# assert a < b, "condition a < b violated!" +# assert n >= 0, "condition n >= 0 violated!" +# assert n < N, "condition n < N violated!" +# assert isinstance(n, int), "condition n of type int violated!" +# assert isinstance(N, int), "condition N of type int violated!" +# lexp = np.log(a) +# rexp = np.log(b) +# exp_range = np.linspace(lexp, rexp, N) +# return np.exp(exp_range[n]) +# +# +# class GridAxis(object): +# _data = None +# _name = None +# _type = None +# _domain = None +# _sampling = None +# _is_categorical = False +# _current_pos = 0 +# +# def __init__(self, name, param): +# self._name = name +# self._domain = param["domain"] +# self.data = param["data"] +# self.type = param["type"] +# if param["domain"] == "categorical": +# self._is_categorical = True +# +# def elems_left(self): +# return self._sampling - self._current_pos - 1 +# +# def increment(self): +# self._current_pos += 1 +# if self._current_pos > self._sampling - 1: +# self._current_pos = 0 +# +# def get_value(self): +# if self._domain == "categorical": +# return self.data[self._current_pos] +# elif self._domain == "uniform": +# return get_uniform_axis_sample(self._current_pos, self.data[0], self.data[1], self._sampling) +# elif self._domain == "normal": +# return get_gaussian_axis_sample(self._current_pos, self.data[0], self.data[1], self._sampling) +# elif self._domain == "loguniform": +# return get_logarithmic_axis_sample(self._current_pos, self.data[0], self.data[1], self._sampling) +# +# @property +# def name(self): +# return self._name +# +# @property +# def data(self): +# return self._data +# +# @data.setter +# def data(self, value): +# if self._domain == "categorical": +# assert len(value) > 0, "Precondition violation, empty data cannot be handled!" +# self._data = value +# self._sampling = len(value) +# else: +# assert len(value) == 3, "precondition violation, gridsearch axis needs low, high and sampling value!" +# self._data = value[0:2] +# self._sampling = value[2] +# +# @property +# def sampling(self): +# return self._sampling +# +# @property +# def type(self): +# return self._type +# +# @type.setter +# def type(self, value): +# assert isinstance(value, str), "precondition violation, value expects a str!" +# if value == "str": +# self._type = str +# elif value == "int": +# self._type = int +# if value == "float" or value == "double": +# self._type = float +# +# +# class GridSampler(object): +# +# def __init__(self): +# self._axis = [] +# self._loops = [] +# +# def get_gridsize(self): +# n = 1 +# for ax in self._axis: +# n *= ax.sampling +# return n +# +# def add_axis(self, axis): +# self._axis.append(axis) +# self.update_loops() +# +# def update_loops(self): +# if len(self._axis) == 1: +# self._loops.append(1) +# else: +# lens = [] +# for ax in self._axis: +# lens.append(ax.sampling) +# self._loops.append(np.cumprod(lens)) +# +# def get_sample(self): +# sample = [] +# for ax in self._axis: +# sample.append(ax.get_value()) +# return sample +# +# +# class GridsearchSolver(HyppopySolver): +# +# def __init__(self, project=None): +# HyppopySolver.__init__(self, project) +# self._tid = None +# +# def loss_function(self, params): +# loss = None +# vals = {} +# idx = {} +# for key, value in params.items(): +# vals[key] = [value] +# idx[key] = [self._tid] +# trial = {'tid': self._tid, +# 'result': {'loss': None, 'status': 'ok'}, +# 'misc': { +# 'tid': self._tid, +# 'idxs': idx, +# 'vals': vals +# }, +# 'book_time': datetime.datetime.now(), +# 'refresh_time': None +# } +# try: +# loss = self.blackbox(**params) +# if loss is None: +# trial['result']['loss'] = np.nan +# trial['result']['status'] = 'failed' +# else: +# trial['result']['loss'] = loss +# except Exception as e: +# LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) +# trial['result']['loss'] = np.nan +# trial['result']['status'] = 'failed' +# trial['refresh_time'] = datetime.datetime.now() +# self._trials.trials.append(trial) +# if self.blackbox.callback_func is not None: +# cbd = copy.deepcopy(params) +# cbd['iterations'] = self._tid + 1 +# cbd['loss'] = loss +# cbd['status'] = trial['result']['status'] +# self.blackbox.callback_func(**cbd) +# return +# +# def execute_solver(self, searchspace): +# self._tid = 0 +# self._trials = Trials() +# +# while True: +# params = {} +# for axis in searchspace: +# params[axis.name] = axis.next() +# if params[axis.name] is None: +# break +# try: +# self.loss_function(params) +# self._tid += 1 +# except Exception as e: +# msg = "internal error in randomsearch execute_solver occured. {}".format(e) +# LOG.error(msg) +# raise BrokenPipeError(msg) +# self.best = self._trials.argmin +# +# def convert_searchspace(self, hyperparameter): +# searchspace = [] +# for name, param in hyperparameter.items(): +# if param["domain"] != "categorical": +# searchspace.append(GridAxis(name, param)) +# for name, param in hyperparameter.items(): +# if param["domain"] == "categorical": +# searchspace.append(GridAxis(name, param)) +# searchspace[-1].is_looping = False +# return searchspace diff --git a/hyppopy/Solver/HyperoptSolver.py b/hyppopy/Solver/HyperoptSolver.py new file mode 100644 index 0000000..f49d623 --- /dev/null +++ b/hyppopy/Solver/HyperoptSolver.py @@ -0,0 +1,141 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import copy +import logging +import numpy as np +from pprint import pformat +from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials + +from hyppopy.globals import DEBUGLEVEL +from .HyppopySolver import HyppopySolver + +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + + +class HyperoptSolver(HyppopySolver): + + def __init__(self, project=None): + HyppopySolver.__init__(self, project) + + def loss_function(self, params): + status = STATUS_FAIL + try: + loss = self.blackbox(**params) + if loss is not None: + status = STATUS_OK + else: + loss = 1e9 + except Exception as e: + LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) + status = STATUS_FAIL + loss = 1e9 + if self.blackbox.callback_func is not None: + cbd = copy.deepcopy(params) + cbd['iterations'] = self._trials.trials[-1]['tid'] + 1 + cbd['loss'] = loss + cbd['status'] = status + self.blackbox.callback_func(**cbd) + return {'loss': loss, 'status': status} + + def execute_solver(self, searchspace): + LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) + self.trials = Trials() + + try: + self.best = fmin(fn=self.loss_function, + space=searchspace, + algo=tpe.suggest, + max_evals=self.max_iterations, + trials=self.trials) + except Exception as e: + msg = "internal error in hyperopt.fmin occured. {}".format(e) + LOG.error(msg) + raise BrokenPipeError(msg) + + def convert_searchspace(self, hyperparameter): + + solution_space = {} + for name, content in hyperparameter.items(): + param_settings = {'name': name} + for key, value in content.items(): + if key == 'domain': + param_settings['domain'] = value + elif key == 'data': + param_settings['data'] = value + elif key == 'type': + param_settings['dtype'] = value + solution_space[name] = self.convert(param_settings) + return solution_space + + def convert(self, param_settings): + name = param_settings["name"] + domain = param_settings["domain"] + dtype = param_settings["dtype"] + data = param_settings["data"] + + if domain == "uniform": + if dtype == "float" or dtype == "double": + return hp.uniform(name, data[0], data[1]) + elif dtype == "int": + data = list(np.arange(int(data[0]), int(data[1] + 1))) + return hp.choice(name, data) + else: + msg = "cannot convert the type {} in domain {}".format(dtype, domain) + LOG.error(msg) + raise LookupError(msg) + elif domain == "loguniform": + if dtype == "float" or dtype == "double": + if data[0] == 0: + data[0] += 1e-23 + assert data[0] > 0, "Precondition Violation, a < 0!" + assert data[0] < data[1], "Precondition Violation, a > b!" + assert data[1] > 0, "Precondition Violation, b < 0!" + lexp = np.log(data[0]) + rexp = np.log(data[1]) + assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!" + assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!" + + return hp.loguniform(name, lexp, rexp) + else: + msg = "cannot convert the type {} in domain {}".format(dtype, domain) + LOG.error(msg) + raise LookupError(msg) + elif domain == "normal": + if dtype == "float" or dtype == "double": + mu = (data[1] - data[0]) / 2.0 + sigma = mu / 3 + return hp.normal(name, data[0] + mu, sigma) + else: + msg = "cannot convert the type {} in domain {}".format(dtype, domain) + LOG.error(msg) + raise LookupError(msg) + elif domain == "categorical": + if dtype == 'str': + return hp.choice(name, data) + elif dtype == 'bool': + data = [] + for elem in data: + if elem == "true" or elem == "True" or elem == 1 or elem == "1": + data.append(True) + elif elem == "false" or elem == "False" or elem == 0 or elem == "0": + data.append(False) + else: + msg = "cannot convert the type {} in domain {}, unknown bool type value".format(dtype, domain) + LOG.error(msg) + raise LookupError(msg) + return hp.choice(name, data) diff --git a/hyppopy/Solver/HyppopySolver.py b/hyppopy/Solver/HyppopySolver.py new file mode 100644 index 0000000..ac95f00 --- /dev/null +++ b/hyppopy/Solver/HyppopySolver.py @@ -0,0 +1,195 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import abc + +import os +import types +import logging +import datetime +import numpy as np +import pandas as pd +from ..globals import DEBUGLEVEL +from ..HyppopyProject import HyppopyProject +from ..BlackboxFunction import BlackboxFunction + +from hyppopy.globals import DEBUGLEVEL, DEFAULTITERATIONS + +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + + +class HyppopySolver(object): + + def __init__(self, project=None): + self._best = None + self._trials = None + self._blackbox = None + self._max_iterations = None + self._project = project + self._total_duration = None + + @abc.abstractmethod + def execute_solver(self, searchspace): + raise NotImplementedError('users must define execute_solver to use this class') + + # @abc.abstractmethod + # def convert_results(self): + # raise NotImplementedError('users must define convert_results to use this class') + + @abc.abstractmethod + def convert_searchspace(self, hyperparameter): + raise NotImplementedError('users must define convert_searchspace to use this class') + + def run(self, print_stats=True): + if 'solver_max_iterations' not in self.project.__dict__: + msg = "Missing max_iteration entry in project, use default {}!".format(DEFAULTITERATIONS) + LOG.warning(msg) + print("WARNING: {}".format(msg)) + setattr(self.project, 'solver_max_iterations', DEFAULTITERATIONS) + self._max_iterations = self.project.solver_max_iterations + + start_time = datetime.datetime.now() + try: + self.execute_solver(self.convert_searchspace(self.project.hyperparameter)) + except Exception as e: + raise e + end_time = datetime.datetime.now() + dt = end_time - start_time + days = divmod(dt.total_seconds(), 86400) + hours = divmod(days[1], 3600) + minutes = divmod(hours[1], 60) + seconds = divmod(minutes[1], 1) + milliseconds = divmod(seconds[1], 0.001) + self._total_duration = [int(days[0]), int(hours[0]), int(minutes[0]), int(seconds[0]), int(milliseconds[0])] + if print_stats: + self.print_best() + self.print_timestats() + + def convert_results(self): + results = {'duration': [], 'losses': []} + pset = self.trials.trials[0]['misc']['vals'] + for p in pset.keys(): + results[p] = [] + + for n, trial in enumerate(self.trials.trials): + t1 = trial['book_time'] + t2 = trial['refresh_time'] + results['duration'].append((t2 - t1).microseconds / 1000.0) + results['losses'].append(trial['result']['loss']) + losses = np.array(results['losses']) + results['losses'] = list(losses) + pset = trial['misc']['vals'] + for p in pset.items(): + results[p[0]].append(p[1][0]) + return pd.DataFrame.from_dict(results), self.best + + def print_best(self): + print("\n") + print("#" * 40) + print("### Best Parameter Choice ###") + print("#" * 40) + for name, value in self.best.items(): + print(" - {}\t:\t{}".format(name, value)) + print("#" * 40) + + def print_timestats(self): + dts = [] + tot = self._total_duration[0]*86400 + \ + self._total_duration[1]*3600 + \ + self._total_duration[2]*60 + \ + self._total_duration[3]*1000 + \ + self._total_duration[4] + overhead = tot + for trial in self._trials.trials: + if 'book_time' in trial.keys() and 'refresh_time' in trial.keys(): + dt = trial['refresh_time'] - trial['book_time'] + dts.append(dt.total_seconds()) + overhead -= dt.total_seconds()*1000.0 + print("\n") + print("#" * 40) + print("### Timing Statistics ###") + print("#" * 40) + per_iter = int(np.mean(dts)*1e6)/1000.0 + print(" - per iteration: {}ms".format(per_iter)) + print(" - total time: {}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], + self._total_duration[1], + self._total_duration[2], + self._total_duration[3], + self._total_duration[4])) + print(" - overhead: {}%".format(int(np.round(100.0/tot*overhead)))) + print("#" * 40) + + @property + def project(self): + return self._project + + @project.setter + def project(self, value): + if not isinstance(value, HyppopyProject): + msg = "Input error, project_manager of type: {} not allowed!".format(type(value)) + LOG.error(msg) + raise IOError(msg) + self._project = value + + @property + def blackbox(self): + return self._blackbox + + @blackbox.setter + def blackbox(self, value): + if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction): + self._blackbox = value + else: + self._blackbox = None + msg = "Input error, blackbox of type: {} not allowed!".format(type(value)) + LOG.error(msg) + raise IOError(msg) + + @property + def best(self): + return self._best + + @best.setter + def best(self, value): + if not isinstance(value, dict): + msg = "Input error, best of type: {} not allowed!".format(type(value)) + LOG.error(msg) + raise IOError(msg) + self._best = value + + @property + def trials(self): + return self._trials + + @trials.setter + def trials(self, value): + self._trials = value + + @property + def max_iterations(self): + return self._max_iterations + + @max_iterations.setter + def max_iterations(self, value): + if not isinstance(value, int): + msg = "Input error, max_iterations of type: {} not allowed!".format(type(value)) + LOG.error(msg) + raise IOError(msg) + if value < 1: + msg = "Precondition violation, max_iterations < 1!" + LOG.error(msg) + raise IOError(msg) + self._max_iterations = value diff --git a/hyppopy/Solver/OptunitySolver.py b/hyppopy/Solver/OptunitySolver.py new file mode 100644 index 0000000..b838e62 --- /dev/null +++ b/hyppopy/Solver/OptunitySolver.py @@ -0,0 +1,117 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import copy +import logging +import optunity +import datetime +import numpy as np +from pprint import pformat +from hyperopt import Trials +from hyppopy.globals import DEBUGLEVEL + +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + +from .HyppopySolver import HyppopySolver +from ..helpers import split_categorical + + +class OptunitySolver(HyppopySolver): + + def __init__(self, project=None): + HyppopySolver.__init__(self, project) + self._solver_info = None + self.opt_trials = None + self._idx = None + + def loss_function(self, **params): + self._idx += 1 + vals = {} + idx = {} + for key, value in params.items(): + vals[key] = [value] + idx[key] = [self._idx] + trial = {'tid': self._idx, + 'result': {'loss': None, 'status': 'ok'}, + 'misc': { + 'tid': self._idx, + 'idxs': idx, + 'vals': vals + }, + 'book_time': datetime.datetime.now(), + 'refresh_time': None + } + try: + for key in params.keys(): + if self.project.get_typeof(key) is int: + params[key] = int(round(params[key])) + loss = self.blackbox(**params) + trial['result']['loss'] = loss + trial['result']['status'] = 'ok' + except Exception as e: + LOG.error("computing loss failed due to:\n {}".format(e)) + loss = np.nan + trial['result']['loss'] = np.nan + trial['result']['status'] = 'failed' + trial['refresh_time'] = datetime.datetime.now() + self._trials.trials.append(trial) + if self.blackbox.callback_func is not None: + cbd = copy.deepcopy(params) + cbd['iterations'] = self._idx + cbd['loss'] = loss + cbd['status'] = trial['result']['status'] + self.blackbox.callback_func(**cbd) + return loss + + def execute_solver(self, searchspace): + LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) + self.trials = Trials() + self._idx = 0 + try: + self.best, self.opt_trials, self._solver_info = optunity.minimize_structured(f=self.loss_function, + num_evals=self.max_iterations, + search_space=searchspace) + except Exception as e: + LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) + raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) + + def convert_searchspace(self, hyperparameter): + solution_space = {} + # split input in categorical and non-categorical data + cat, uni = split_categorical(hyperparameter) + # build up dictionary keeping all non-categorical data + uniforms = {} + for key, value in uni.items(): + for key2, value2 in value.items(): + if key2 == 'data': + uniforms[key] = value2 + + if len(cat) == 0: + return uniforms + # build nested categorical structure + inner_level = uniforms + for key, value in cat.items(): + tmp = {} + tmp2 = {} + for key2, value2 in value.items(): + if key2 == 'data': + for elem in value2: + tmp[elem] = inner_level + tmp2[key] = tmp + inner_level = tmp2 + solution_space = tmp2 + return solution_space diff --git a/hyppopy/Solver/RandomsearchSolver.py b/hyppopy/Solver/RandomsearchSolver.py new file mode 100644 index 0000000..2986b02 --- /dev/null +++ b/hyppopy/Solver/RandomsearchSolver.py @@ -0,0 +1,152 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import copy +import random +import logging +import datetime +import numpy as np +from pprint import pformat +from hyperopt import Trials +from hyppopy.globals import DEBUGLEVEL +from .HyppopySolver import HyppopySolver + +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + + +def draw_uniform_sample(param): + assert param['type'] != 'str', "Cannot sample a string list uniformly!" + assert param['data'][0] < param['data'][1], "Precondition violation: data[0] > data[1]!" + s = random.random() + s *= np.abs(param['data'][1] - param['data'][0]) + s += param['data'][0] + if param['type'] == 'int': + s = int(np.round(s)) + if s < param['data'][0]: + s = int(param['data'][0]) + if s > param['data'][1]: + s = int(param['data'][1]) + return s + + +def draw_normal_sample(param): + mu = (param['data'][1] - param['data'][0]) / 2 + sigma = mu / 3 + s = np.random.normal(loc=param['data'][0] + mu, scale=sigma) + if s > param['data'][1]: + s = param['data'][1] + if s < param['data'][0]: + s = param['data'][0] + return s + + +def draw_loguniform_sample(param): + p = copy.deepcopy(param) + p['data'][0] = np.log(param['data'][0]) + p['data'][1] = np.log(param['data'][1]) + assert p['data'][0] is not np.nan, "Precondition violation, left bound input error, results in nan!" + assert p['data'][1] is not np.nan, "Precondition violation, right bound input error, results in nan!" + x = draw_uniform_sample(p) + s = np.exp(x) + if s > param['data'][1]: + s = param['data'][1] + if s < param['data'][0]: + s = param['data'][0] + return s + + +def draw_categorical_sample(param): + return random.sample(param['data'], 1)[0] + + +def draw_sample(param): + if param['domain'] == "uniform": + return draw_uniform_sample(param) + elif param['domain'] == "normal": + return draw_normal_sample(param) + elif param['domain'] == "loguniform": + return draw_loguniform_sample(param) + elif param['domain'] == "categorical": + return draw_categorical_sample(param) + else: + raise LookupError("Unknown domain {}".format(param['domain'])) + + +class RandomsearchSolver(HyppopySolver): + + def __init__(self, project=None): + HyppopySolver.__init__(self, project) + self._tid = None + + def loss_function(self, params): + loss = None + vals = {} + idx = {} + for key, value in params.items(): + vals[key] = [value] + idx[key] = [self._tid] + trial = {'tid': self._tid, + 'result': {'loss': None, 'status': 'ok'}, + 'misc': { + 'tid': self._tid, + 'idxs': idx, + 'vals': vals + }, + 'book_time': datetime.datetime.now(), + 'refresh_time': None + } + try: + loss = self.blackbox(**params) + if loss is None: + trial['result']['loss'] = np.nan + trial['result']['status'] = 'failed' + else: + trial['result']['loss'] = loss + except Exception as e: + LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) + trial['result']['loss'] = np.nan + trial['result']['status'] = 'failed' + trial['refresh_time'] = datetime.datetime.now() + self._trials.trials.append(trial) + if self.blackbox.callback_func is not None: + cbd = copy.deepcopy(params) + cbd['iterations'] = self._tid + 1 + cbd['loss'] = loss + cbd['status'] = trial['result']['status'] + self.blackbox.callback_func(**cbd) + return + + def execute_solver(self, searchspace): + self._tid = 0 + self._trials = Trials() + N = self.max_iterations + try: + for n in range(N): + params = {} + for name, p in searchspace.items(): + params[name] = draw_sample(p) + self.loss_function(params) + self._tid += 1 + except Exception as e: + msg = "internal error in randomsearch execute_solver occured. {}".format(e) + LOG.error(msg) + raise BrokenPipeError(msg) + self.best = self._trials.argmin + + def convert_searchspace(self, hyperparameter): + LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) + return hyperparameter diff --git a/hyppopy/plugins/__init__.py b/hyppopy/Solver/__init__.py similarity index 100% rename from hyppopy/plugins/__init__.py rename to hyppopy/Solver/__init__.py diff --git a/hyppopy/virtualfunction.py b/hyppopy/VirtualFunction.py similarity index 100% rename from hyppopy/virtualfunction.py rename to hyppopy/VirtualFunction.py diff --git a/hyppopy/__init__.py b/hyppopy/__init__.py index 86a11c3..1c6ff54 100644 --- a/hyppopy/__init__.py +++ b/hyppopy/__init__.py @@ -1,3 +1 @@ -__version__ = '0.1.2dev' -from hyppopy.solverfactory import SolverFactory -from hyppopy.projectmanager import ProjectManager \ No newline at end of file +__version__ = '0.4' \ No newline at end of file diff --git a/hyppopy/deepdict.py b/hyppopy/deepdict.py deleted file mode 100644 index 8c19412..0000000 --- a/hyppopy/deepdict.py +++ /dev/null @@ -1,437 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import re -import json -import types -import pprint -import xmltodict -from dicttoxml import dicttoxml -from collections import OrderedDict - -import logging -LOG = logging.getLogger('hyppopy') - -from hyppopy.globals import DEEPDICT_XML_ROOT - -def convert_ordered2std_dict(obj): - """ - Helper function converting an OrderedDict into a standard lib dict. - :param obj: [OrderedDict] - """ - for key, value in obj.items(): - if isinstance(value, OrderedDict): - obj[key] = dict(obj[key]) - convert_ordered2std_dict(obj[key]) - - -def check_dir_existance(dirname): - """ - Helper function to check if a directory exists, creating it if not. - :param dirname: [str] full path of the directory to check - """ - if not os.path.exists(dirname): - os.mkdir(dirname) - - -class DeepDict(object): - """ - The DeepDict class represents a nested dictionary with additional functionality compared to a standard - lib dict. The data can be accessed and changed vie a pathlike access and dumped or read to .json/.xml files. - - Initializing instances using defaults creates an empty DeepDict. Using in_data enables to initialize the - object instance with data, where in_data can be a dict, or a filepath to a json or xml file. Using path sep - the appearance of path passing can be changed, a default data access via path would look like my_dd['target/section/path'] with path_sep='.' like so my_dd['target.section.path'] - - :param in_data: [dict] or [str], input dict or filename - :param path_sep: [str] path separator character - """ - _data = None - _sep = "/" - - def __init__(self, in_data=None, path_sep="/"): - self.clear() - self._sep = path_sep - LOG.debug("path separator is: {}".format(self._sep)) - if in_data is not None: - if isinstance(in_data, str): - self.from_file(in_data) - elif isinstance(in_data, dict): - self.data = in_data - - def __str__(self): - """ - Enables print output for class instances, printing the instance data dict using pretty print - :return: [str] - """ - return pprint.pformat(self.data) - - def __eq__(self, other): - """ - Overloads the == operator comparing the instance data dictionaries for equality - :param other: [DeepDict] rhs - :return: [bool] - """ - return self.data == other.data - - def __getitem__(self, path): - """ - Overloads the return of the [] operator for data access. This enables access the DeepDict instance like so: - my_dd['target/section/path'] or my_dd[['target','section','path']] - :param path: [str] or [list(str)], the path to the target data structure level/content - :return: [object] - """ - return DeepDict.get_from_path(self.data, path, self.sep) - - def __setitem__(self, path, value=None): - """ - Overloads the setter for the [] operator for data assignment. - :param path: [str] or [list(str)], the path to the target data structure level/content - :param value: [object] rhs assignment object - """ - if isinstance(path, str): - path = path.split(self.sep) - if not isinstance(path, list) or isinstance(path, tuple): - raise IOError("Input Error, expect list[str] type for path") - if len(path) < 1: - raise IOError("Input Error, missing section strings") - - if not path[0] in self._data.keys(): - if value is not None and len(path) == 1: - self._data[path[0]] = value - else: - self._data[path[0]] = {} - - tmp = self._data[path[0]] - path.pop(0) - while True: - if len(path) == 0: - break - if path[0] not in tmp.keys(): - if value is not None and len(path) == 1: - tmp[path[0]] = value - else: - tmp[path[0]] = {} - tmp = tmp[path[0]] - else: - tmp = tmp[path[0]] - path.pop(0) - - def __len__(self): - return len(self._data) - - def items(self): - return self.data.items() - - def clear(self): - """ - clears the instance data - """ - LOG.debug("clear()") - self._data = {} - - def from_file(self, fname): - """ - Loads data from file. Currently implemented .json and .xml file reader. - :param fname: [str] filename - """ - if not isinstance(fname, str): - raise IOError("Input Error, expect str type for fname") - if fname.endswith(".json"): - self.read_json(fname) - elif fname.endswith(".xml"): - self.read_xml(fname) - else: - LOG.error("Unknown filetype, expect [.json, .xml]") - raise NotImplementedError("Unknown filetype, expect [.json, .xml]") - - def read_json(self, fname): - """ - Read json file - :param fname: [str] input filename - """ - if not isinstance(fname, str): - raise IOError("Input Error, expect str type for fname") - if not os.path.isfile(fname): - raise IOError("File {} not found!".format(fname)) - LOG.debug("read_json({})".format(fname)) - try: - with open(fname, "r") as read_file: - self._data = json.load(read_file) - DeepDict.value_traverse(self.data, callback=DeepDict.parse_type) - except Exception as e: - LOG.error("Error while reading json file {} or while converting types".format(fname)) - raise IOError("Error while reading json file {} or while converting types".format(fname)) - - def read_xml(self, fname): - """ - Read xml file - :param fname: [str] input filename - """ - if not isinstance(fname, str): - raise IOError("Input Error, expect str type for fname") - if not os.path.isfile(fname): - raise IOError("File {} not found!".format(fname)) - LOG.debug("read_xml({})".format(fname)) - try: - with open(fname, "r") as read_file: - xml = "".join(read_file.readlines()) - self._data = xmltodict.parse(xml, attr_prefix='') - DeepDict.value_traverse(self.data, callback=DeepDict.parse_type) - except Exception as e: - msg = "Error while reading xml file {} or while converting types".format(fname) - LOG.error(msg) - raise IOError(msg) - - # if written with DeepDict, the xml contains a root node called - # deepdict which should beremoved for consistency reasons - if DEEPDICT_XML_ROOT in self._data.keys(): - self._data = self._data[DEEPDICT_XML_ROOT] - self._data = dict(self.data) - # convert the orderes dict structure to a default dict for consistency reasons - convert_ordered2std_dict(self.data) - - def to_file(self, fname): - """ - Write to file, type is determined by checking the filename ending. - Currently implemented is writing to json and to xml. - :param fname: [str] filename - """ - if not isinstance(fname, str): - raise IOError("Input Error, expect str type for fname") - if fname.endswith(".json"): - self.write_json(fname) - elif fname.endswith(".xml"): - self.write_xml(fname) - else: - LOG.error("Unknown filetype, expect [.json, .xml]") - raise NotImplementedError("Unknown filetype, expect [.json, .xml]") - - def write_json(self, fname): - """ - Dump data to json file. - :param fname: [str] filename - """ - if not isinstance(fname, str): - raise IOError("Input Error, expect str type for fname") - check_dir_existance(os.path.dirname(fname)) - try: - LOG.debug("write_json({})".format(fname)) - with open(fname, "w") as write_file: - json.dump(self.data, write_file) - except Exception as e: - LOG.error("Failed dumping to json file: {}".format(fname)) - raise e - - def write_xml(self, fname): - """ - Dump data to json file. - :param fname: [str] filename - """ - if not isinstance(fname, str): - raise IOError("Input Error, expect str type for fname") - check_dir_existance(os.path.dirname(fname)) - xml = dicttoxml(self.data, custom_root=DEEPDICT_XML_ROOT, attr_type=False) - LOG.debug("write_xml({})".format(fname)) - try: - with open(fname, "w") as write_file: - write_file.write(xml.decode("utf-8")) - except Exception as e: - LOG.error("Failed dumping to xml file: {}".format(fname)) - raise e - - def has_section(self, section): - return DeepDict.has_key(self.data, section) - - @staticmethod - def get_from_path(data, path, sep="/"): - """ - Implements a nested dict access via a path like string like so path='target/section/path' - which is equivalent to my_dict['target']['section']['path']. - :param data: [dict] input dictionary - :param path: [str] pathlike string - :param sep: [str] path separator, default='/' - :return: [object] - """ - if not isinstance(data, dict): - LOG.error("Input Error, expect dict type for data") - raise IOError("Input Error, expect dict type for data") - if isinstance(path, str): - path = path.split(sep) - if not isinstance(path, list) or isinstance(path, tuple): - LOG.error("Input Error, expect list[str] type for path: {}".format(path)) - raise IOError("Input Error, expect list[str] type for path") - if not DeepDict.has_key(data, path[-1]): - LOG.error("Input Error, section {} does not exist in dictionary".format(path[-1])) - raise IOError("Input Error, section {} does not exist in dictionary".format(path[-1])) - try: - for k in path: - data = data[k] - except Exception as e: - LOG.error("Failed retrieving data from path {} due to {}".format(path, e)) - raise LookupError("Failed retrieving data from path {} due to {}".format(path, e)) - return data - - @staticmethod - def has_key(data, section, already_found=False): - """ - Checks if input dictionary has a key called section. The already_found parameter - is for internal recursion checks. - :param data: [dict] input dictionary - :param section: [str] key string to search for - :param already_found: recursion criteria check - :return: [bool] section found - """ - if not isinstance(data, dict): - LOG.error("Input Error, expect dict type for obj") - raise IOError("Input Error, expect dict type for obj") - if not isinstance(section, str): - LOG.error("Input Error, expect dict type for obj {}".format(section)) - raise IOError("Input Error, expect dict type for obj {}".format(section)) - if already_found: - return True - found = False - for key, value in data.items(): - if key == section: - found = True - if isinstance(value, dict): - found = DeepDict.has_key(data[key], section, found) - return found - - @staticmethod - def value_traverse(data, callback=None): - """ - Dictionary filter function, walks through the input dict (obj) calling the callback function for each value. - The callback function return is assigned the the corresponding dict value. - :param data: [dict] input dictionary - :param callback: - """ - if not isinstance(data, dict): - LOG.error("Input Error, expect dict type for obj") - raise IOError("Input Error, expect dict type for obj") - if not isinstance(callback, types.FunctionType): - LOG.error("Input Error, expect function type for callback") - raise IOError("Input Error, expect function type for callback") - for key, value in data.items(): - if isinstance(value, dict): - DeepDict.value_traverse(data[key], callback) - else: - data[key] = callback(value) - - def transfer_attrs(self, cls, target_section): - items_set = [] - - def set_item(item): - items_set.append(item[0]) - setattr(cls, item[0], item[1]) - DeepDict.sectionconstraint_item_traverse(self.data, target_section, callback=set_item, section=None) - return items_set - - @staticmethod - def sectionconstraint_item_traverse(data, target_section, callback=None, section=None): - """ - Dictionary filter function, walks through the input dict (obj) calling the callback function for each item. - The callback function then is called with the key value pair as tuple input but only for the target section. - :param data: [dict] input dictionary - :param callback: - """ - if not isinstance(data, dict): - LOG.error("Input Error, expect dict type for obj") - raise IOError("Input Error, expect dict type for obj") - if not isinstance(callback, types.FunctionType): - LOG.error("Input Error, expect function type for callback") - raise IOError("Input Error, expect function type for callback") - for key, value in data.items(): - if isinstance(value, dict): - DeepDict.sectionconstraint_item_traverse(data[key], target_section, callback, key) - else: - if target_section == section: - callback((key, value)) - - @staticmethod - def item_traverse(data, callback=None): - """ - Dictionary filter function, walks through the input dict (obj) calling the callback function for each item. - The callback function then is called with the key value pair as tuple input. - :param data: [dict] input dictionary - :param callback: - """ - if not isinstance(data, dict): - LOG.error("Input Error, expect dict type for obj") - raise IOError("Input Error, expect dict type for obj") - if not isinstance(callback, types.FunctionType): - LOG.error("Input Error, expect function type for callback") - raise IOError("Input Error, expect function type for callback") - for key, value in data.items(): - if isinstance(value, dict): - DeepDict.value_traverse(data[key], callback) - else: - callback((key, value)) - - @staticmethod - def parse_type(string): - """ - Type convert input string to float, int, list, tuple or string - :param string: [str] input string - :return: [T] converted output - """ - try: - a = float(string) - try: - b = int(string) - except ValueError: - return float(string) - if a == b: - return b - return a - except ValueError: - if string.startswith("[") and string.endswith("]"): - string = re.sub(' ', '', string) - elements = string[1:-1].split(",") - li = [] - for e in elements: - li.append(DeepDict.parse_type(e)) - return li - elif string.startswith("(") and string.endswith(")"): - elements = string[1:-1].split(",") - li = [] - for e in elements: - li.append(DeepDict.parse_type(e)) - return tuple(li) - return string - - @property - def data(self): - return self._data - - @data.setter - def data(self, value): - if not isinstance(value, dict): - LOG.error("Input Error, expect dict type for value, but got {}".format(type(value))) - raise IOError("Input Error, expect dict type for value, but got {}".format(type(value))) - self.clear() - self._data = value - - @property - def sep(self): - return self._sep - - @sep.setter - def sep(self, value): - if not isinstance(value, str): - LOG.error("Input Error, expect str type for value, but got {}".format(type(value))) - raise IOError("Input Error, expect str type for value, but got {}".format(type(value))) - self._sep = value diff --git a/hyppopy/globals.py b/hyppopy/globals.py index a00d280..cafe5b9 100644 --- a/hyppopy/globals.py +++ b/hyppopy/globals.py @@ -1,33 +1,30 @@ # DKFZ # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. import os import sys import logging ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, ROOT) LIBNAME = "hyppopy" -PLUGIN_DEFAULT_DIR = os.path.join(ROOT, *(LIBNAME, "plugins")) TESTDATA_DIR = os.path.join(ROOT, *(LIBNAME, "tests", "data")) -SETTINGSSOLVERPATH = "settings/solver_plugin" -SETTINGSCUSTOMPATH = "settings/custom" -DEEPDICT_XML_ROOT = LIBNAME -RANDOMSAMPLES = 10000 + +HYPERPARAMETERPATH = "hyperparameter" +SETTINGSPATH = "settings" + DEFAULTITERATIONS = 500 LOGFILENAME = os.path.join(ROOT, '{}_log.log'.format(LIBNAME)) DEBUGLEVEL = logging.DEBUG logging.basicConfig(filename=LOGFILENAME, filemode='w', format='%(levelname)s: %(name)s - %(message)s') - - diff --git a/hyppopy/helpers.py b/hyppopy/helpers.py index 3226c65..ca33a6a 100644 --- a/hyppopy/helpers.py +++ b/hyppopy/helpers.py @@ -1,246 +1,35 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) -import copy -import time -import itertools -import numpy as np -from numpy import argmin, argmax, unique -from collections import OrderedDict, abc - - -def gaussian(x, mu, sigma): - return 1.0/(sigma * np.sqrt(2*np.pi))*np.exp(-(x-mu)**2/(2*sigma**2)) - - -def gaussian_axis_sampling(a, b, N): - center = a + (b - a) / 2.0 - delta = (b - a) / N - bn = b - center - xn = np.arange(0, bn, delta) - dn = [] - for x in xn: - dn.append(1/gaussian(x, 0, bn/2.5)) - dn = np.array(dn) - dn /= np.sum(dn) - dn *= bn - - axis = [0] - for x in dn: - axis.append(x+axis[-1]) - axis.insert(0, -axis[-1]) - axis = np.array(axis) - axis += center - return axis - - -def log_axis_sampling(a, b, N): - if a == 0: - a += 1e-23 - assert a > 0, "Precondition Violation, a < 0!" - assert a < b, "Precondition Violation, a > b!" - assert b > 0, "Precondition Violation, b < 0!" - lexp = np.log(a) - rexp = np.log(b) - assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!" - assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!" - - delta = (rexp - lexp) / N - logrange = np.arange(lexp, rexp + delta, delta) - for n in range(logrange.shape[0]): - logrange[n] = np.exp(logrange[n]) - return logrange - - -def sample_domain(start, stop, count, ftype="uniform"): - assert stop > start, "Precondition Violation, stop <= start not allowed!" - assert count > 0, "Precondition Violation, N <= 0 not allowed!" - if ftype == 'uniform': - delta = (stop - start)/count - return np.arange(start, stop + delta, delta) - elif ftype == 'loguniform': - return log_axis_sampling(start, stop, count) - elif ftype == 'normal': - return gaussian_axis_sampling(start, stop, count) - raise IOError("Precondition Violation, unknown sampling function type!") - - -class Trials(object): - - def __init__(self): - self.loss = [] - self.duration = [] - self.status = [] - self.parameter = [] - self.best = None - self._tick = None - - def start_iteration(self): - self._tick = time.process_time() - - def stop_iteration(self): - if self._tick is None: - return - self.duration.append(time.process_time()-self._tick) - self._tick = None - - def set_status(self, status=True): - self.status.append(status) - - def set_parameter(self, params): - self.parameter.append(params) - - def set_loss(self, value): - self.loss.append(value) - - def get(self): - msg = None - if len(self.loss) <= 0: - msg = "Empty solver results!" - if len(self.loss) != len(self.duration): - msg = "Inconsistent results! len(self.loss) != len(self.duration) -> {} != {}".format(len(self.loss), len(self.duration)) - if len(self.loss) != len(self.parameter): - msg = "Inconsistent results! len(self.loss) != len(self.parameter) -> {} != {}".format(len(self.loss), len(self.parameter)) - if len(self.loss) != len(self.status): - msg = "Inconsistent results! len(self.loss) != len(self.status) -> {} != {}".format(len(self.loss), len(self.status)) - if msg is not None: - raise Exception(msg) - - best_index = argmin(self.loss) - best = self.parameter[best_index] - worst_loss = self.loss[argmax(self.loss)] - for n in range(len(self.status)): - if not self.status[n]: - self.loss[n] = worst_loss - - res = { - 'losses': self.loss, - 'duration': self.duration - } - is_string = [] - for key, value in self.parameter[0].items(): - res[key] = [] - if isinstance(value, str): - is_string.append(key) - - for p in self.parameter: - for key, value in p.items(): - res[key].append(value) - - for key in is_string: - uniques = unique(res[key]) - lookup = {} - for n, p in enumerate(uniques): - lookup[p] = n - for n in range(len(res[key])): - res[key][n] = lookup[res[key][n]] - - return res, best - - -class NestedDictUnfolder(object): - - def __init__(self, nested_dict): - self._nested_dict = nested_dict - self._categories = [] - self._values = OrderedDict() - self._tree_leafs = [] - - NestedDictUnfolder.nested_dict_iter(self._nested_dict, self) - - @staticmethod - def nested_dict_iter(nested, unfolder): - for key, value in nested.items(): - if isinstance(value, abc.Mapping): - unfolder.add_category(key) - NestedDictUnfolder.nested_dict_iter(value, unfolder) - else: - unfolder.add_values(key, value) - unfolder.mark_leaf() - - def find_parent_nodes(self, nested, node, last_node=""): - for key, value in nested.items(): - if key == node: - self._tree_leafs.append(last_node) - return - else: - last_node = key - if isinstance(value, abc.Mapping): - self.find_parent_nodes(value, node, last_node) - else: - return - - def find_parent_node(self, leaf_names): - if not isinstance(leaf_names, list): - leaf_names = [leaf_names] - for ln in leaf_names: - try: - pos = self._categories.index(ln) - 1 - candidate = self._categories[pos] - if candidate not in leaf_names: - return candidate - except: - pass - return None - - def add_category(self, name): - self._categories.append(name) - - def add_values(self, name, values): - self._values[name] = values - - def mark_leaf(self): - if len(self._categories) > 0: - if not self._categories[-1] in self._tree_leafs: - self._tree_leafs.append(self._categories[-1]) - - def permutate_values(self): - pset = list(self._values.values()) - pset = list(itertools.product(*pset)) - permutations = [] - okeys = list(self._values.keys()) - for ps in pset: - permutations.append({}) - for i in range(len(okeys)): - permutations[-1][okeys[i]] = ps[i] - return permutations - - def add_categories(self, values_permutated): - while True: - parent = self.find_parent_node(self._tree_leafs) - if parent is None: - return - result = [] - for tl in self._tree_leafs: - for elem in values_permutated: - new = copy.deepcopy(elem) - new[parent] = tl - result.append(new) - while tl in self._categories: - self._categories.remove(tl) - while parent in self._categories: - self._categories.remove(parent) - self._tree_leafs = [] - self.find_parent_nodes(self._nested_dict, parent) - if len(self._tree_leafs) == 1 and self._tree_leafs[0] == "": - break - values_permutated = copy.deepcopy(result) - return result - - def unfold(self): - values_permutated = self.permutate_values() - if len(self._categories) > 0: - return self.add_categories(values_permutated) - return values_permutated +import os +import logging +from hyppopy.globals import DEBUGLEVEL + +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + + +# define function spliting input dict +# into categorical and non-categorical +def split_categorical(pdict): + categorical = {} + uniform = {} + for name, pset in pdict.items(): + for key, value in pset.items(): + if key == 'domain' and value == 'categorical': + categorical[name] = pset + elif key == 'domain': + uniform[name] = pset + return categorical, uniform diff --git a/hyppopy/plugins/gridsearch_settings_plugin.py b/hyppopy/plugins/gridsearch_settings_plugin.py deleted file mode 100644 index 846c2c9..0000000 --- a/hyppopy/plugins/gridsearch_settings_plugin.py +++ /dev/null @@ -1,101 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import logging -import numpy as np -from pprint import pformat -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -from yapsy.IPlugin import IPlugin - - -from hyppopy.helpers import sample_domain -from hyppopy.settingspluginbase import SettingsPluginBase -from hyppopy.settingsparticle import split_categorical -from hyppopy.settingsparticle import SettingsParticle - - -class gridsearch_Settings(SettingsPluginBase, IPlugin): - - def __init__(self): - SettingsPluginBase.__init__(self) - LOG.debug("initialized") - - def convert_parameter(self, input_dict): - LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) - - solution_space = {} - # split input in categorical and non-categorical data - cat, uni = split_categorical(input_dict) - # build up dictionary keeping all non-categorical data - uniforms = {} - for name, content in uni.items(): - particle = gridsearch_SettingsParticle(name=name) - for key, value in content.items(): - if key == 'domain': - particle.domain = value - elif key == 'data': - particle.data = value - elif key == 'type': - particle.dtype = value - uniforms[name] = particle.get() - - # build nested categorical structure - inner_level = uniforms - for key, value in cat.items(): - tmp = {} - tmp2 = {} - for key2, value2 in value.items(): - if key2 == 'data': - for elem in value2: - tmp[elem] = inner_level - tmp2[key] = tmp - inner_level = tmp2 - if len(cat) > 0: - solution_space = tmp2 - else: - solution_space = inner_level - return solution_space - - -class gridsearch_SettingsParticle(SettingsParticle): - - def __init__(self, name=None, domain=None, dtype=None, data=None): - SettingsParticle.__init__(self, name, domain, dtype, data) - - def convert(self): - assert isinstance(self.data, list), "Precondition Violation, invalid input type for data!" - if self.domain == "categorical": - return self.data - else: - assert len(self.data) >= 2, "Precondition Violation, invalid input data!" - if len(self.data) < 3: - self.data.append(10) - LOG.warning("Grid sampling has set number of samples automatically to 10!") - print("WARNING: Grid sampling has set number of samples automatically to 10!") - - samples = sample_domain(start=self.data[0], stop=self.data[1], count=self.data[2], ftype=self.domain) - if self.dtype == "int": - data = [] - for s in samples: - val = int(np.round(s)) - if len(data) > 0: - if val == data[-1]: continue - data.append(val) - return data - return list(samples) diff --git a/hyppopy/plugins/gridsearch_settings_plugin.yapsy-plugin b/hyppopy/plugins/gridsearch_settings_plugin.yapsy-plugin deleted file mode 100644 index 9981474..0000000 --- a/hyppopy/plugins/gridsearch_settings_plugin.yapsy-plugin +++ /dev/null @@ -1,9 +0,0 @@ -[Core] -Name = gridsearch -Module = gridsearch_settings_plugin - -[Documentation] -Author = Sven Wanner -Version = 0.1 -Website = -Description = GridSearch Settings Plugin \ No newline at end of file diff --git a/hyppopy/plugins/gridsearch_solver_plugin.py b/hyppopy/plugins/gridsearch_solver_plugin.py deleted file mode 100644 index 83a1be0..0000000 --- a/hyppopy/plugins/gridsearch_solver_plugin.py +++ /dev/null @@ -1,75 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import logging -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -from pprint import pformat -from yapsy.IPlugin import IPlugin - -from hyppopy.helpers import Trials -from hyppopy.helpers import NestedDictUnfolder -from hyppopy.solverpluginbase import SolverPluginBase - - -class gridsearch_Solver(SolverPluginBase, IPlugin): - trials = None - best = None - - def __init__(self): - SolverPluginBase.__init__(self) - LOG.debug("initialized") - - def blackbox_function(self, params): - loss = None - self.trials.set_parameter(params) - try: - self.trials.start_iteration() - loss = self.blackbox_function_template(self.data, params) - self.trials.stop_iteration() - if loss is None: - self.trials.set_status(False) - except Exception as e: - LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e)) - self.trials.set_status(False) - self.trials.stop_iteration() - self.trials.set_status(True) - self.trials.set_loss(loss) - return - - def execute_solver(self, parameter): - LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter))) - - self.trials = Trials() - unfolder = NestedDictUnfolder(parameter) - parameter_set = unfolder.unfold() - N = len(parameter_set) - print("") - try: - for n, params in enumerate(parameter_set): - self.blackbox_function(params) - print("\r{}% done".format(int(round(100.0/N*n))), end="") - except Exception as e: - msg = "internal error in gridsearch execute_solver occured. {}".format(e) - LOG.error(msg) - raise BrokenPipeError(msg) - print("\r{}% done".format(100), end="") - print("") - - def convert_results(self): - return self.trials.get() diff --git a/hyppopy/plugins/gridsearch_solver_plugin.yapsy-plugin b/hyppopy/plugins/gridsearch_solver_plugin.yapsy-plugin deleted file mode 100644 index efef3f4..0000000 --- a/hyppopy/plugins/gridsearch_solver_plugin.yapsy-plugin +++ /dev/null @@ -1,9 +0,0 @@ -[Core] -Name = gridsearch -Module = gridsearch_solver_plugin - -[Documentation] -Author = Sven Wanner -Version = 0.1 -Website = -Description = GridSearch Solver Plugin \ No newline at end of file diff --git a/hyppopy/plugins/hyperopt_settings_plugin.py b/hyppopy/plugins/hyperopt_settings_plugin.py deleted file mode 100644 index f9cfcb5..0000000 --- a/hyppopy/plugins/hyperopt_settings_plugin.py +++ /dev/null @@ -1,115 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import logging -import numpy as np -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -from pprint import pformat - -try: - from hyperopt import hp - from yapsy.IPlugin import IPlugin -except: - LOG.warning("hyperopt package not installed, will ignore this plugin!") - print("hyperopt package not installed, will ignore this plugin!") - -from hyppopy.settingspluginbase import SettingsPluginBase -from hyppopy.settingsparticle import SettingsParticle - - -class hyperopt_Settings(SettingsPluginBase, IPlugin): - - def __init__(self): - SettingsPluginBase.__init__(self) - LOG.debug("initialized") - - def convert_parameter(self, input_dict): - LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) - - solution_space = {} - for name, content in input_dict.items(): - particle = hyperopt_SettingsParticle(name=name) - for key, value in content.items(): - if key == 'domain': - particle.domain = value - elif key == 'data': - particle.data = value - elif key == 'type': - particle.dtype = value - solution_space[name] = particle.get() - return solution_space - - -class hyperopt_SettingsParticle(SettingsParticle): - - def __init__(self, name=None, domain=None, dtype=None, data=None): - SettingsParticle.__init__(self, name, domain, dtype, data) - - def convert(self): - if self.domain == "uniform": - if self.dtype == "float" or self.dtype == "double": - return hp.uniform(self.name, self.data[0], self.data[1]) - elif self.dtype == "int": - data = list(np.arange(int(self.data[0]), int(self.data[1]+1))) - return hp.choice(self.name, data) - else: - msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain) - LOG.error(msg) - raise LookupError(msg) - elif self.domain == "loguniform": - if self.dtype == "float" or self.dtype == "double": - if self.data[0] == 0: - self.data[0] += 1e-23 - assert self.data[0] > 0, "Precondition Violation, a < 0!" - assert self.data[0] < self.data[1], "Precondition Violation, a > b!" - assert self.data[1] > 0, "Precondition Violation, b < 0!" - lexp = np.log(self.data[0]) - rexp = np.log(self.data[1]) - assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!" - assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!" - - return hp.loguniform(self.name, lexp, rexp) - else: - msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain) - LOG.error(msg) - raise LookupError(msg) - elif self.domain == "normal": - if self.dtype == "float" or self.dtype == "double": - mu = (self.data[1] - self.data[0])/2.0 - sigma = mu/3 - return hp.normal(self.name, self.data[0] + mu, sigma) - else: - msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain) - LOG.error(msg) - raise LookupError(msg) - elif self.domain == "categorical": - if self.dtype == 'str': - return hp.choice(self.name, self.data) - elif self.dtype == 'bool': - data = [] - for elem in self.data: - if elem == "true" or elem == "True" or elem == 1 or elem == "1": - data .append(True) - elif elem == "false" or elem == "False" or elem == 0 or elem == "0": - data .append(False) - else: - msg = "cannot convert the type {} in domain {}, unknown bool type value".format(self.dtype, self.domain) - LOG.error(msg) - raise LookupError(msg) - return hp.choice(self.name, data) diff --git a/hyppopy/plugins/hyperopt_settings_plugin.yapsy-plugin b/hyppopy/plugins/hyperopt_settings_plugin.yapsy-plugin deleted file mode 100644 index 7f41d11..0000000 --- a/hyppopy/plugins/hyperopt_settings_plugin.yapsy-plugin +++ /dev/null @@ -1,9 +0,0 @@ -[Core] -Name = hyperopt -Module = hyperopt_settings_plugin - -[Documentation] -Author = Sven Wanner -Version = 0.1 -Website = https://github.com/hyperopt/hyperopt -Description = Hyperopt Settings Plugin \ No newline at end of file diff --git a/hyppopy/plugins/hyperopt_solver_plugin.py b/hyppopy/plugins/hyperopt_solver_plugin.py deleted file mode 100644 index fe3d011..0000000 --- a/hyppopy/plugins/hyperopt_solver_plugin.py +++ /dev/null @@ -1,82 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import logging -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -from pprint import pformat -from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials -from yapsy.IPlugin import IPlugin - - -from hyppopy.projectmanager import ProjectManager -from hyppopy.solverpluginbase import SolverPluginBase - - -class hyperopt_Solver(SolverPluginBase, IPlugin): - trials = None - best = None - - def __init__(self): - SolverPluginBase.__init__(self) - LOG.debug("initialized") - - def blackbox_function(self, params): - status = STATUS_FAIL - try: - loss = self.blackbox_function_template(self.data, params) - if loss is not None: - status = STATUS_OK - except Exception as e: - LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e)) - status = STATUS_FAIL - return {'loss': loss, 'status': status} - - def execute_solver(self, parameter): - LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter))) - self.trials = Trials() - - try: - self.best = fmin(fn=self.blackbox_function, - space=parameter, - algo=tpe.suggest, - max_evals=ProjectManager.max_iterations, - trials=self.trials) - except Exception as e: - msg = "internal error in hyperopt.fmin occured. {}".format(e) - LOG.error(msg) - raise BrokenPipeError(msg) - - def convert_results(self): - # currently converting results in a way that this function returns a dict - # keeping all useful parameter as key/list item. This will be automatically - # converted to a pandas dataframe in the solver class - results = {'duration': [], 'losses': []} - pset = self.trials.trials[0]['misc']['vals'] - for p in pset.keys(): - results[p] = [] - - for n, trial in enumerate(self.trials.trials): - t1 = trial['book_time'] - t2 = trial['refresh_time'] - results['duration'].append((t2 - t1).microseconds/1000.0) - results['losses'].append(trial['result']['loss']) - pset = trial['misc']['vals'] - for p in pset.items(): - results[p[0]].append(p[1][0]) - return results, self.best diff --git a/hyppopy/plugins/hyperopt_solver_plugin.yapsy-plugin b/hyppopy/plugins/hyperopt_solver_plugin.yapsy-plugin deleted file mode 100644 index 3a79f63..0000000 --- a/hyppopy/plugins/hyperopt_solver_plugin.yapsy-plugin +++ /dev/null @@ -1,9 +0,0 @@ -[Core] -Name = hyperopt -Module = hyperopt_solver_plugin - -[Documentation] -Author = Sven Wanner -Version = 0.1 -Website = https://github.com/hyperopt/hyperopt -Description = Hyperopt Solver Plugin \ No newline at end of file diff --git a/hyppopy/plugins/optunity_settings_plugin.py b/hyppopy/plugins/optunity_settings_plugin.py deleted file mode 100644 index ca7bb99..0000000 --- a/hyppopy/plugins/optunity_settings_plugin.py +++ /dev/null @@ -1,106 +0,0 @@ -# -*- coding: utf-8 -*- -# -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import logging -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -from pprint import pformat - -try: - import optunity - from yapsy.IPlugin import IPlugin -except: - LOG.warning("optunity package not installed, will ignore this plugin!") - print("optunity package not installed, will ignore this plugin!") - -from hyppopy.settingspluginbase import SettingsPluginBase -from hyppopy.settingsparticle import split_categorical - - -class optunity_Settings(SettingsPluginBase, IPlugin): - - def __init__(self): - SettingsPluginBase.__init__(self) - LOG.debug("initialized") - - def convert_parameter(self, input_dict): - LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) - - solution_space = {} - # split input in categorical and non-categorical data - cat, uni = split_categorical(input_dict) - # build up dictionary keeping all non-categorical data - uniforms = {} - for key, value in uni.items(): - for key2, value2 in value.items(): - if key2 == 'data': - uniforms[key] = value2 - - if len(cat) == 0: - return uniforms - # build nested categorical structure - inner_level = uniforms - for key, value in cat.items(): - tmp = {} - tmp2 = {} - for key2, value2 in value.items(): - if key2 == 'data': - for elem in value2: - tmp[elem] = inner_level - tmp2[key] = tmp - inner_level = tmp2 - solution_space = tmp2 - return solution_space - - -# class optunity_SettingsParticle(SettingsParticle): -# -# def __init__(self, name=None, domain=None, dtype=None, data=None): -# SettingsParticle.__init__(self, name, domain, dtype, data) -# -# def convert(self): -# if self.domain == "uniform": -# if self.dtype == "float" or self.dtype == "double": -# pass -# elif self.dtype == "int": -# pass -# else: -# msg = f"cannot convert the type {self.dtype} in domain {self.domain}" -# LOG.error(msg) -# raise LookupError(msg) -# elif self.domain == "loguniform": -# if self.dtype == "float" or self.dtype == "double": -# pass -# else: -# msg = f"cannot convert the type {self.dtype} in domain {self.domain}" -# LOG.error(msg) -# raise LookupError(msg) -# elif self.domain == "normal": -# if self.dtype == "float" or self.dtype == "double": -# pass -# else: -# msg = f"cannot convert the type {self.dtype} in domain {self.domain}" -# LOG.error(msg) -# raise LookupError(msg) -# elif self.domain == "categorical": -# if self.dtype == 'str': -# pass -# elif self.dtype == 'bool': -# pass diff --git a/hyppopy/plugins/optunity_settings_plugin.yapsy-plugin b/hyppopy/plugins/optunity_settings_plugin.yapsy-plugin deleted file mode 100644 index b2395aa..0000000 --- a/hyppopy/plugins/optunity_settings_plugin.yapsy-plugin +++ /dev/null @@ -1,9 +0,0 @@ -[Core] -Name = optunity -Module = optunity_settings_plugin - -[Documentation] -Author = Sven Wanner -Version = 0.1 -Website = https://optunity.readthedocs.io/en/latest/ -Description = Optunity Settings Plugin \ No newline at end of file diff --git a/hyppopy/plugins/optunity_solver_plugin.py b/hyppopy/plugins/optunity_solver_plugin.py deleted file mode 100644 index 82f4215..0000000 --- a/hyppopy/plugins/optunity_solver_plugin.py +++ /dev/null @@ -1,68 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import logging -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -from pprint import pformat - -import optunity -from yapsy.IPlugin import IPlugin -from hyppopy.projectmanager import ProjectManager -from hyppopy.solverpluginbase import SolverPluginBase - - -class optunity_Solver(SolverPluginBase, IPlugin): - - solver_info = None - trials = None - best = None - status = None - - def __init__(self): - SolverPluginBase.__init__(self) - LOG.debug("initialized") - - def blackbox_function(self, **params): - try: - for key in params.keys(): - if self.settings.get_type_of(key) == 'int': - params[key] = int(round(params[key])) - loss = self.blackbox_function_template(self.data, params) - self.status.append('ok') - return loss - except Exception as e: - LOG.error("computing loss failed due to:\n {}".format(e)) - self.status.append('fail') - return 1e9 - - def execute_solver(self, parameter): - LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter))) - self.status = [] - try: - self.best, self.trials, self.solver_info = optunity.minimize_structured(f=self.blackbox_function, - num_evals=ProjectManager.max_iterations, - search_space=parameter) - except Exception as e: - LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) - raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) - - def convert_results(self): - results = self.trials.call_log['args'] - results['losses'] = self.trials.call_log['values'] - return results, self.best diff --git a/hyppopy/plugins/optunity_solver_plugin.yapsy-plugin b/hyppopy/plugins/optunity_solver_plugin.yapsy-plugin deleted file mode 100644 index bf638e4..0000000 --- a/hyppopy/plugins/optunity_solver_plugin.yapsy-plugin +++ /dev/null @@ -1,9 +0,0 @@ -[Core] -Name = optunity -Module = optunity_solver_plugin - -[Documentation] -Author = Sven Wanner -Version = 0.1 -Website = https://optunity.readthedocs.io/en/latest/ -Description = Optunity Solver Plugin \ No newline at end of file diff --git a/hyppopy/plugins/randomsearch_settings_plugin.py b/hyppopy/plugins/randomsearch_settings_plugin.py deleted file mode 100644 index 8aa5827..0000000 --- a/hyppopy/plugins/randomsearch_settings_plugin.py +++ /dev/null @@ -1,35 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import logging -from pprint import pformat -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -from yapsy.IPlugin import IPlugin -from hyppopy.settingspluginbase import SettingsPluginBase - - -class randomsearch_Settings(SettingsPluginBase, IPlugin): - - def __init__(self): - SettingsPluginBase.__init__(self) - LOG.debug("initialized") - - def convert_parameter(self, input_dict): - LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) - return input_dict diff --git a/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin b/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin deleted file mode 100644 index 27d25fd..0000000 --- a/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin +++ /dev/null @@ -1,9 +0,0 @@ -[Core] -Name = randomsearch -Module = randomsearch_settings_plugin - -[Documentation] -Author = Sven Wanner -Version = 0.1 -Website = https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html -Description = RandomSearch Settings Plugin \ No newline at end of file diff --git a/hyppopy/plugins/randomsearch_solver_plugin.py b/hyppopy/plugins/randomsearch_solver_plugin.py deleted file mode 100644 index 7291da8..0000000 --- a/hyppopy/plugins/randomsearch_solver_plugin.py +++ /dev/null @@ -1,146 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import copy -import random -import logging -import numpy as np -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -from pprint import pformat -from yapsy.IPlugin import IPlugin - -from hyppopy.helpers import Trials -from hyppopy.globals import DEFAULTITERATIONS -from hyppopy.projectmanager import ProjectManager -from hyppopy.solverpluginbase import SolverPluginBase - - -def drawUniformSample(param): - assert param['type'] != 'str', "Cannot sample a string list uniformly!" - assert param['data'][0] < param['data'][1], "Precondition violation: data[0] > data[1]!" - s = random.random() - s *= np.abs(param['data'][1]-param['data'][0]) - s += param['data'][0] - if param['type'] == 'int': - s = int(np.round(s)) - if s < param['data'][0]: - s = int(param['data'][0]) - if s > param['data'][1]: - s = int(param['data'][1]) - return s - - -def drawNormalSample(param): - mu = (param['data'][1]-param['data'][0])/2 - sigma = mu/3 - s = np.random.normal(loc=param['data'][0] + mu, scale=sigma) - if s > param['data'][1]: - s = param['data'][1] - if s < param['data'][0]: - s = param['data'][0] - return s - - -def drawLoguniformSample(param): - p = copy.deepcopy(param) - p['data'][0] = np.log(param['data'][0]) - p['data'][1] = np.log(param['data'][1]) - assert p['data'][0] is not np.nan, "Precondition violation, left bound input error, results in nan!" - assert p['data'][1] is not np.nan, "Precondition violation, right bound input error, results in nan!" - x = drawUniformSample(p) - s = np.exp(x) - if s > param['data'][1]: - s = param['data'][1] - if s < param['data'][0]: - s = param['data'][0] - return s - - -def drawCategoricalSample(param): - return random.sample(param['data'], 1)[0] - - -def drawSample(param): - if param['domain'] == "uniform": - return drawUniformSample(param) - elif param['domain'] == "normal": - return drawNormalSample(param) - elif param['domain'] == "loguniform": - return drawLoguniformSample(param) - elif param['domain'] == "categorical": - return drawCategoricalSample(param) - else: - raise LookupError("Unknown domain {}".format(param['domain'])) - - -class randomsearch_Solver(SolverPluginBase, IPlugin): - trials = None - best = None - - def __init__(self): - SolverPluginBase.__init__(self) - LOG.debug("initialized") - - def blackbox_function(self, params): - loss = None - self.trials.set_parameter(params) - try: - self.trials.start_iteration() - loss = self.blackbox_function_template(self.data, params) - self.trials.stop_iteration() - if loss is None: - self.trials.set_status(False) - self.trials.stop_iteration() - except Exception as e: - msg = "execution of self.loss(self.data, params) failed due to:\n {}".format(e) - LOG.error(msg) - self.trials.set_status(False) - self.trials.stop_iteration() - print("Exception occured for parameter set: {}".format(params)) - raise e - self.trials.set_status(True) - self.trials.set_loss(loss) - return - - def execute_solver(self, parameter): - LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter))) - self.trials = Trials() - if 'max_iterations' not in ProjectManager.__dict__: - msg = "Missing max_iteration entry in config, used default {}!".format(DEFAULTITERATIONS) - LOG.warning(msg) - print("WARNING: {}".format(msg)) - setattr(ProjectManager, 'max_iterations', DEFAULTITERATIONS) - N = ProjectManager.max_iterations - #print("") - try: - for n in range(N): - params = {} - for name, p in parameter.items(): - params[name] = drawSample(p) - self.blackbox_function(params) - #print("\r{}% done".format(int(round(100.0 / N * n))), end="") - except Exception as e: - msg = "internal error in randomsearch execute_solver occured. {}".format(e) - LOG.error(msg) - raise BrokenPipeError(msg) - #print("\r{}% done".format(100), end="") - #print("") - - def convert_results(self): - return self.trials.get() diff --git a/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin b/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin deleted file mode 100644 index e465d93..0000000 --- a/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin +++ /dev/null @@ -1,9 +0,0 @@ -[Core] -Name = randomsearch -Module = randomsearch_solver_plugin - -[Documentation] -Author = Sven Wanner -Version = 0.1 -Website = https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html -Description = RandomSearch Solver Plugin \ No newline at end of file diff --git a/hyppopy/projectmanager.py b/hyppopy/projectmanager.py index 6f135c7..1f036d3 100644 --- a/hyppopy/projectmanager.py +++ b/hyppopy/projectmanager.py @@ -1,150 +1,69 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) -from hyppopy.singleton import * -from hyppopy.deepdict import DeepDict -from hyppopy.globals import SETTINGSCUSTOMPATH, SETTINGSSOLVERPATH +from .Singleton import * import os import logging -import datetime -from hyppopy.globals import DEBUGLEVEL +from .HyppopyProject import HyppopyProject +from .globals import DEBUGLEVEL + LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) @singleton_object class ProjectManager(metaclass=Singleton): def __init__(self): - self.configfilename = None - self.config = None - self._extmembers = [] - self._identifier = None - - def clear(self): - self.configfilename = None - self.config = None - self.remove_externals() - - def is_ready(self): - return self.config is not None - - def remove_externals(self): - for added in self._extmembers: - if added in self.__dict__.keys(): - del self.__dict__[added] - self._extmembers = [] - - def get_hyperparameter(self): - return self.config["hyperparameter"] - - def test_config(self): - if not isinstance(self.config, DeepDict): - msg = "test_config failed, config is not of type DeepDict" - LOG.error(msg) - raise IOError(msg) - sections = ["hyperparameter"] - sections += [SETTINGSSOLVERPATH.split("/")[-1]] - sections += [SETTINGSCUSTOMPATH.split("/")[-1]] - sections_available = [True, True, True] - for n, sec in enumerate(sections): - if not self.config.has_section(sec): - msg = "WARNING: config has no section {}".format(sec) - LOG.warning(msg) - sections_available[n] = False - return sections_available - - - def set_config(self, config): - self.clear() - if isinstance(config, dict): - self.config = DeepDict() - self.config.data = config - elif isinstance(config, DeepDict): - self.config = config + self._current_project = None + self._projects = {} + + def clear_all(self): + pass + + def new_project(self, name="HyppopyProject", config=None): + if name in self._projects.keys(): + name = self.check_projectname(name) + self._projects[name] = HyppopyProject(config) + self._current_project = self._projects[name] + return self._current_project + + def check_projectname(self, name): + split = name.split(".") + if len(split) == 0: + return split[0] + "." + str(0).zfill(3) else: - msg = "unknown type ({}) for config passed, expected dict or DeepDict".format(type(config)) - LOG.error(msg) - raise IOError(msg) - - sections_available = self.test_config() - if not sections_available[0]: - msg = "Missing section {}".format("hyperparameter") - LOG.error(msg) - raise LookupError(msg) - if not sections_available[1]: - msg = "Missing section {}".format(SETTINGSSOLVERPATH) - LOG.error(msg) - raise LookupError(msg) - else: - try: - self._extmembers += self.config.transfer_attrs(self, SETTINGSSOLVERPATH.split("/")[-1]) - except Exception as e: - msg = "transfering custom section as class attributes failed, " \ - "is the config path to your custom section correct? {}. Exception {}".format(SETTINGSCUSTOMPATH, - e) - LOG.error(msg) - raise LookupError(msg) - if sections_available[2]: - try: - self._extmembers += self.config.transfer_attrs(self, SETTINGSCUSTOMPATH.split("/")[-1]) - except Exception as e: - msg = "transfering custom section as class attributes failed, " \ - "is the config path to your custom section correct? {}. Exception {}".format(SETTINGSCUSTOMPATH, - e) - LOG.error(msg) - raise LookupError(msg) - return True - - def read_config(self, configfile): - self.clear() - self.configfilename = configfile - self.config = DeepDict(configfile) - sections_available = self.test_config() - if not sections_available[0]: - msg = "Missing section {}".format("hyperparameter") - LOG.error(msg) - raise LookupError(msg) - if not sections_available[1]: - msg = "Missing section {}".format(SETTINGSSOLVERPATH) - LOG.error(msg) - raise LookupError(msg) - else: - try: - self._extmembers += self.config.transfer_attrs(self, SETTINGSSOLVERPATH.split("/")[-1]) - except Exception as e: - msg = "transfering custom section as class attributes failed, " \ - "is the config path to your custom section correct? {}. Exception {}".format(SETTINGSSOLVERPATH, e) - LOG.error(msg) - raise LookupError(msg) - if sections_available[2]: try: - self._extmembers += self.config.transfer_attrs(self, SETTINGSCUSTOMPATH.split("/")[-1]) - except Exception as e: - msg = "transfering custom section as class attributes failed, " \ - "is the config path to your custom section correct? {}. Exception {}".format(SETTINGSCUSTOMPATH, e) - LOG.error(msg) - raise LookupError(msg) - - return True - - def identifier(self, force=False): - if self._identifier is None or force: - self._identifier = datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S") - return self._identifier + number = int(split[-1]) + del split[-1] + except: + number = 0 + return '.'.join(split) + "." + str(number).zfill(3) + + def get_current(self): + if self._current_project is None: + self.new_project() + return self._current_project + + def get_project(self, name): + if name in self._projects.keys(): + self._current_project = self._projects[name] + return self.get_current() + return self.new_project(name) + + def get_projectnames(self): + return self._projects.keys() - def register_member(self, name, value): - setattr(name, value) diff --git a/hyppopy/resultviewer.py b/hyppopy/resultviewer.py deleted file mode 100644 index 1033328..0000000 --- a/hyppopy/resultviewer.py +++ /dev/null @@ -1,178 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import copy -import pandas as pd -import seaborn as sns -import matplotlib.pyplot as plt - -import logging -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -sns.set(style="darkgrid") - - -class ResultViewer(object): - - def __init__(self, fname=None, save_only=False): - self.close_all() - self.df = None - self.has_duration = False - self.hyperparameter = None - self.save_only = save_only - self.path = None - self.appendix = None - if fname is not None: - self.read(fname) - - def close_all(self): - plt.close('all') - - def read(self, fname): - self.path = os.path.dirname(fname) - split = os.path.basename(fname).split("_") - self.appendix = split[-1] - self.appendix = self.appendix[:-4] - self.df = pd.read_csv(fname, index_col=0) - const_data = ["duration", "losses"] - hyperparameter_columns = [item for item in self.df.columns if item not in const_data] - self.hyperparameter = pd.DataFrame() - for key in hyperparameter_columns: - self.hyperparameter[key] = self.df[key] - self.has_duration = "duration" in self.df.columns - - def plot_XYGrid(self, df, x, y, name="", save=None, show=True): - argmin = df["losses"].idxmin() - grid = [len(x), len(y)] - if grid[0] == 1 and grid[1] == 1: - fig = plt.figure(figsize=(10.0, 8)) - plt.plot(df[x[0]].values, df[y[0]].values, '.') - plt.plot(df[x[0]].values[argmin], df[y[0]].values[argmin], 'ro') - plt.grid(True) - plt.ylabel(y[0]) - plt.xlabel(x[0]) - plt.title(name, fontsize=16) - else: - if grid[0] > 1 and grid[1] == 1: - fig, axs = plt.subplots(ncols=grid[0], figsize=(10.0, grid[1] * 3.5)) - elif grid[0] == 1 and grid[1] > 1: - fig, axs = plt.subplots(nrows=grid[1], figsize=(10.0, grid[1] * 3.5)) - else: - fig, axs = plt.subplots(nrows=grid[1], ncols=grid[0], figsize=(10.0, grid[1] * 3.5)) - fig.subplots_adjust(left=0.08, right=0.98, wspace=0.3) - - for nx, _x in enumerate(x): - for ny, _y in enumerate(y): - if grid[0] > 1 and grid[1] == 1: - ax = axs[nx] - elif grid[0] == 1 and grid[1] > 1: - ax = axs[ny] - else: - ax = axs[ny, nx] - ax.plot(df[_x].values, df[_y].values, '.') - ax.plot(df[_x].values[argmin], df[_y].values[argmin], 'ro') - ax.grid(True) - if nx == 0: - ax.set_ylabel(_y) - if ny == len(y)-1: - ax.set_xlabel(_x) - fig.suptitle(name, fontsize=16) - if save is not None: - if not os.path.isdir(os.path.dirname(save)): - os.makedirs(os.path.dirname(save)) - plt.savefig(save) - if show: - plt.show() - - def plot_performance_and_feature_grids(self, save=True): - x_axis = [] - if 'losses' in self.df.columns: - x_axis.append('losses') - if 'iterations' in self.df.columns: - x_axis.append('iterations') - y_axis_performance = [] - if 'accuracy' in self.df.columns: - y_axis_performance.append('accuracy') - if 'duration' in self.df.columns: - y_axis_performance.append('duration') - features = [] - for cit in self.df.columns: - if cit not in x_axis and cit not in y_axis_performance: - features.append(cit) - - save_name = None - if save: - save_name = os.path.join(self.path, "performance" + self.appendix + ".png") - self.plot_XYGrid(self.df, x=x_axis, - y=y_axis_performance, - name="Performance", - save=save_name, - show=not self.save_only) - - chunks = [features[x:x + 3] for x in range(0, len(features), 3)] - for n, chunk in enumerate(chunks): - save_name = None - if save: - save_name = os.path.join(self.path, "features_{}_".format(str(n).zfill(3)) + self.appendix + ".png") - self.plot_XYGrid(self.df, x=x_axis, - y=chunk, - name="Feature set {}".format(n+1), - save=save_name, - show=not self.save_only) - - def plot_feature_matrix(self, save=True): - sns_plot = sns.pairplot(self.df, height=1.8, aspect=1.8, - plot_kws=dict(edgecolor="k", linewidth=0.5), - diag_kind="kde", diag_kws=dict(shade=True)) - - fig = sns_plot.fig - fig.subplots_adjust(top=0.93, wspace=0.3) - t = fig.suptitle('Pairwise Plots', fontsize=14) - if not self.save_only: - plt.show() - if save: - save_name = os.path.join(self.path, "matrixview_"+self.appendix+".png") - try: - sns_plot.savefig(save_name) - except Exception as e: - msg = "failed to save file {}, reason {}".format(save_name, e) - LOG.error(msg) - raise IOError(msg) - - def plot_duration(self, save=True): - try: - if "duration" in self.df.columns: - sns_plot = sns.jointplot(y="duration", x="losses", data=self.df, kind="kde") - if not self.save_only: - plt.show() - if save: - save_name = os.path.join(self.path, "t_vs_loss_" + self.appendix + ".png") - try: - sns_plot.savefig(save_name) - except Exception as e: - msg = "failed to save file {}, reason {}".format(save_name, e) - LOG.error(msg) - raise IOError(msg) - except Exception as e: - print(e) - - def show(self, save=True): - self.plot_duration(save) - self.plot_feature_matrix(save) - self.plot_performance_and_feature_grids(save) - diff --git a/hyppopy/settingsparticle.py b/hyppopy/settingsparticle.py deleted file mode 100644 index 6d82b48..0000000 --- a/hyppopy/settingsparticle.py +++ /dev/null @@ -1,104 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import abc -import logging -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - - -# define function spliting input dict -# into categorical and non-categorical -def split_categorical(pdict): - categorical = {} - uniform = {} - for name, pset in pdict.items(): - for key, value in pset.items(): - if key == 'domain' and value == 'categorical': - categorical[name] = pset - elif key == 'domain': - uniform[name] = pset - return categorical, uniform - - -class SettingsParticle(object): - domains = ["uniform", "loguniform", "normal", "categorical"] - _name = None - _domain = None - _dtype = None - _data = None - - def __init__(self, name=None, domain=None, dtype=None, data=None): - if name is not None: - self.name = name - if domain is not None: - self.domain = domain - if dtype is not None: - self.dtype = dtype - if data is not None: - self.data = data - - @abc.abstractmethod - def convert(self): - raise NotImplementedError("the user has to implement this function") - - def get(self): - msg = None - if self.name is None: msg = "cannot convert unnamed parameter" - if self.domain is None: msg = "cannot convert parameter of empty domain" - if self.dtype is None: msg = "cannot convert parameter with unknown dtype" - if self.data is None: msg = "cannot convert parameter having no data" - if msg is not None: - LOG.error(msg) - raise LookupError(msg) - return self.convert() - - @property - def name(self): - return self._name - - @name.setter - def name(self, value): - self._name = value - - @property - def domain(self): - return self._domain - - @domain.setter - def domain(self, value): - if not value in self.domains: - msg = "domain named {} not available, check your domain name or implement new domain!".format(value) - LOG.error(msg) - raise LookupError(msg) - self._domain = value - - @property - def dtype(self): - return self._dtype - - @dtype.setter - def dtype(self, value): - self._dtype = value - - @property - def data(self): - return self._data - - @data.setter - def data(self, value): - self._data = value diff --git a/hyppopy/settingspluginbase.py b/hyppopy/settingspluginbase.py deleted file mode 100644 index 6d5d995..0000000 --- a/hyppopy/settingspluginbase.py +++ /dev/null @@ -1,97 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import abc - -import os -import copy -import logging -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -from hyppopy.deepdict import DeepDict - - -class SettingsPluginBase(object): - _data = None - _name = None - - def __init__(self): - self._data = {} - - @abc.abstractmethod - def convert_parameter(self, input_dict): - raise NotImplementedError('users must define convert_parameter to use this base class') - - def get_hyperparameter(self): - return self.convert_parameter(self.data) - - def set_hyperparameter(self, input_data): - self.data.clear() - self.data = copy.deepcopy(input_data) - - def get_type_of(self, name): - if not name in self.data: - msg = "hyperparameter named {} not found!".format(name) - LOG.error(msg) - raise LookupError(msg) - return self.data[name]["type"] - - def get_domain_of(self, name): - if not name in self.data: - msg = "hyperparameter named {} not found!".format(name) - LOG.error(msg) - raise LookupError(msg) - return self.data[name]["domain"] - - def get_data_of(self, name): - if not name in self.data: - msg = "hyperparameter named {} not found!".format(name) - LOG.error(msg) - raise LookupError(msg) - return self.data[name]["data"] - - def read(self, fname): - self.data.clear() - self.data.from_file(fname) - - def write(self, fname): - self.data.to_file(fname) - - @property - def data(self): - return self._data - - @data.setter - def data(self, value): - if isinstance(value, dict): - self._data = value - elif isinstance(value, DeepDict): - self._data = value.data - else: - raise IOError("unexpected input type({}) for data, needs to be of type dict or DeepDict!".format(type(value))) - - - @property - def name(self): - return self._name - - @name.setter - def name(self, value): - if not isinstance(value, str): - LOG.error("Invalid input, str type expected for value, got {} instead".format(type(value))) - raise IOError("Invalid input, str type expected for value, got {} instead".format(type(value))) - self._name = value diff --git a/hyppopy/solver.py b/hyppopy/solver.py deleted file mode 100644 index 73fba64..0000000 --- a/hyppopy/solver.py +++ /dev/null @@ -1,127 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - - -from hyppopy.projectmanager import ProjectManager -#from hyppopy.resultviewer import ResultViewer - -import os -import logging -import pandas as pd -from hyppopy.globals import LIBNAME -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - - -class Solver(object): - _name = None - _solver_plugin = None - _settings_plugin = None - - def __init__(self): - pass - - def set_data(self, data): - self._solver_plugin.set_data(data) - - def set_hyperparameters(self, params): - self.settings_plugin.set_hyperparameter(params) - - def set_loss_function(self, func): - self._solver_plugin.set_blackbox_function(func) - - def run(self): - if not ProjectManager.is_ready(): - LOG.error("No config data found to initialize PluginSetting object") - raise IOError("No config data found to initialize PluginSetting object") - self.settings_plugin.set_hyperparameter(ProjectManager.get_hyperparameter()) - self._solver_plugin.settings = self.settings_plugin - self._solver_plugin.run() - - def save_results(self, savedir=None, savename=None, overwrite=True):#, show=False): - df, best = self.get_results() - dir = None - if savename is None: - savename = LIBNAME - if savedir is None: - if 'output_dir' in ProjectManager.__dict__.keys(): - if not os.path.isdir(ProjectManager.output_dir): - os.mkdir(ProjectManager.output_dir) - dir = ProjectManager.output_dir - else: - print("WARNING: No solver option output_dir found, cannot save results!") - LOG.warning("WARNING: No solver option output_dir found, cannot save results!") - else: - dir = savedir - if not os.path.isdir(savedir): - os.mkdir(savedir) - - appendix = "" - if not overwrite: - appendix = "_" + ProjectManager.identifier(True) - name = savename + "_all" + appendix + ".csv" - fname_all = os.path.join(dir, name) - df.to_csv(fname_all) - name = savename + "_best" + appendix + ".txt" - fname_best = os.path.join(dir, name) - with open(fname_best, "w") as text_file: - for item in best.items(): - text_file.write("{}\t:\t{}\n".format(item[0], item[1])) - - # if show: - # viewer = ResultViewer(fname_all) - # viewer.show() - # else: - # viewer = ResultViewer(fname_all, save_only=True) - # viewer.show() - - def get_results(self): - results, best = self._solver_plugin.get_results() - df = pd.DataFrame.from_dict(results) - return df, best - - @property - def is_ready(self): - return self._solver_plugin is not None and self.settings_plugin is not None - - @property - def solver_plugin(self): - return self._solver_plugin - - @solver_plugin.setter - def solver_plugin(self, value): - self._solver_plugin = value - - @property - def settings_plugin(self): - return self._settings_plugin - - @settings_plugin.setter - def settings_plugin(self, value): - self._settings_plugin = value - - @property - def name(self): - return self._name - - @name.setter - def name(self, value): - if not isinstance(value, str): - msg = "Invalid input, str type expected for value, got {} instead".format(type(value)) - LOG.error(msg) - raise IOError(msg) - self._name = value - diff --git a/hyppopy/solverfactory.py b/hyppopy/solverfactory.py deleted file mode 100644 index 80ce478..0000000 --- a/hyppopy/solverfactory.py +++ /dev/null @@ -1,166 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -from yapsy.PluginManager import PluginManager - -from hyppopy.projectmanager import ProjectManager -from hyppopy.globals import PLUGIN_DEFAULT_DIR -from hyppopy.deepdict import DeepDict -from hyppopy.solver import Solver -from hyppopy.singleton import * - -import os -import logging -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - -@singleton_object -class SolverFactory(metaclass=Singleton): - """ - This class is responsible for grabbing all plugins from the plugin folder arranging them into a - Solver class instances. These Solver class instances can be requested from the factory via the - get_solver method. The SolverFactory class is a Singleton class, so try not to instantiate it using - SolverFactory(), the consequences will be horrific. Instead use is like a class having static - functions only, SolverFactory.method(). - """ - _plugin_dirs = [] - _plugins = {} - - def __init__(self): - self.reset() - self.load_plugins() - LOG.debug("Solverfactory initialized") - - def load_plugins(self): - """ - Load plugin modules from plugin paths - """ - LOG.debug("load_plugins()") - manager = PluginManager() - LOG.debug("setPluginPlaces(" + " ".join(map(str, self._plugin_dirs))) - manager.setPluginPlaces(self._plugin_dirs) - manager.collectPlugins() - print("") - for plugin in manager.getAllPlugins(): - name_elements = plugin.plugin_object.__class__.__name__.split("_") - LOG.debug("found plugin " + " ".join(map(str, name_elements))) - print("Hyppopy: found plugin " + " ".join(map(str, name_elements))) - if len(name_elements) != 2 or ("Solver" not in name_elements and "Settings" not in name_elements): - msg = "invalid plugin class naming for class {}, the convention is libname_Solver or libname_Settings.".format(plugin.plugin_object.__class__.__name__) - LOG.error(msg) - raise NameError(msg) - if name_elements[0] not in self._plugins.keys(): - self._plugins[name_elements[0]] = Solver() - self._plugins[name_elements[0]].name = name_elements[0] - if name_elements[1] == "Solver": - try: - obj = plugin.plugin_object.__class__() - obj.name = name_elements[0] - self._plugins[name_elements[0]].solver_plugin = obj - LOG.info("plugin: {} Solver loaded".format(name_elements[0])) - except Exception as e: - msg = "failed to instanciate class {}".format(plugin.plugin_object.__class__.__name__) - LOG.error(msg) - raise ImportError(msg) - elif name_elements[1] == "Settings": - try: - obj = plugin.plugin_object.__class__() - obj.name = name_elements[0] - self._plugins[name_elements[0]].settings_plugin = obj - LOG.info("plugin: {} ParameterSpace loaded".format(name_elements[0])) - except Exception as e: - msg = "failed to instanciate class {}".format(plugin.plugin_object.__class__.__name__) - LOG.error(msg) - raise ImportError(msg) - else: - msg = "failed loading plugin {}, please check if naming conventions are kept!".format(name_elements[0]) - LOG.error(msg) - raise IOError(msg) - if len(self._plugins) == 0: - msg = "no plugins found, please check your plugin folder names or your plugin scripts for errors!" - LOG.error(msg) - raise IOError(msg) - - def reset(self): - """ - Reset solver factory - """ - LOG.debug("reset()") - self._plugins = {} - self._plugin_dirs = [] - self.add_plugin_dir(os.path.abspath(PLUGIN_DEFAULT_DIR)) - - def add_plugin_dir(self, dir): - """ - Add plugin directory - """ - LOG.debug("add_plugin_dir({})".format(dir)) - self._plugin_dirs.append(dir) - - def list_solver(self): - """ - list all solvers available - :return: [list(str)] - """ - return list(self._plugins.keys()) - - def from_settings(self, settings): - if isinstance(settings, str): - if not os.path.isfile(settings): - LOG.error("input error, file {} not found!".format(settings)) - if not ProjectManager.read_config(settings): - LOG.error("failed to read config in ProjectManager!") - return None - else: - if not ProjectManager.set_config(settings): - LOG.error("failed to set config in ProjectManager!") - return None - - if not ProjectManager.is_ready(): - LOG.error("failed to set config in ProjectManager!") - return None - - try: - solver = self.get_solver(ProjectManager.use_plugin) - except Exception as e: - msg = "failed to create solver, reason {}".format(e) - LOG.error(msg) - return None - return solver - - def get_solver(self, name=None): - """ - returns a solver by name tag - :param name: [str] solver name - :return: [Solver] instance - """ - if name is None: - try: - name = ProjectManager.use_plugin - except Exception as e: - msg = "failed to setup solver, no solver specified, check your ProjectManager for the use_plugin value! Error {}".format(e) - LOG.error(msg) - raise LookupError(msg) - if not isinstance(name, str): - msg = "Invalid input, str type expected for name, got {} instead".format(type(name)) - LOG.error(msg) - raise IOError(msg) - if name not in self.list_solver(): - msg = "failed solver request, a solver called {} is not available, check for typo or if your plugin failed while loading!".format(name) - LOG.error(msg) - raise LookupError(msg) - LOG.debug("get_solver({})".format(name)) - return self._plugins[name] diff --git a/hyppopy/solverpluginbase.py b/hyppopy/solverpluginbase.py deleted file mode 100644 index e67f52e..0000000 --- a/hyppopy/solverpluginbase.py +++ /dev/null @@ -1,95 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import abc - -import os -import logging -from hyppopy.globals import DEBUGLEVEL -from hyppopy.settingspluginbase import SettingsPluginBase - -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - - -class SolverPluginBase(object): - _data = None - _blackbox_function_template = None - _settings = None - _name = None - - def __init__(self): - pass - - @abc.abstractmethod - def blackbox_function(self, params): - raise NotImplementedError('users must define loss_func to use this base class') - - @abc.abstractmethod - def execute_solver(self): - raise NotImplementedError('users must define execute_solver to use this base class') - - @abc.abstractmethod - def convert_results(self): - raise NotImplementedError('users must define convert_results to use this base class') - - def set_data(self, data): - self._data = data - - def set_blackbox_function(self, func): - self._blackbox_function_template = func - - def get_results(self): - return self.convert_results() - - def run(self): - try: - self.execute_solver(self.settings.get_hyperparameter()) - except Exception as e: - raise e - - @property - def data(self): - return self._data - - @property - def blackbox_function_template(self): - return self._blackbox_function_template - - @property - def name(self): - return self._name - - @name.setter - def name(self, value): - if not isinstance(value, str): - msg = "Invalid input, str type expected for value, got {} instead".format(type(value)) - LOG.error(msg) - raise IOError(msg) - self._name = value - - @property - def settings(self): - return self._settings - - @settings.setter - def settings(self, value): - if not isinstance(value, SettingsPluginBase): - msg = "Invalid input, SettingsPluginBase type expected for value, got {} instead".format(type(value)) - LOG.error(msg) - raise IOError(msg) - self._settings = value - - diff --git a/hyppopy/tests/test_deepdict.py b/hyppopy/tests/test_deepdict.py deleted file mode 100644 index 29e5de7..0000000 --- a/hyppopy/tests/test_deepdict.py +++ /dev/null @@ -1,163 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import unittest - -from hyppopy.deepdict import DeepDict - - -DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") - - -class DeepDictTestSuite(unittest.TestCase): - - def setUp(self): - self.test_data = { - 'widget': { - 'debug': 'on', - 'image': {'alignment': 'center', - 'hOffset': 250, - 'name': 'sun1', - 'src': 'Images/Sun.png', - 'vOffset': 250}, - 'text': {'alignment': 'center', - 'data': 'Click Here', - 'hOffset': 250, - 'name': 'text1', - 'onMouseUp': 'sun1.opacity = (sun1.opacity / 100) * 90;', - 'size': 36, - 'style': 'bold', - 'vOffset': 100}, - 'window': {'height': 500, - 'name': 'main_window', - 'title': 'Sample Konfabulator Widget', - 'width': 500} - } - } - - self.test_data2 = {"test": { - "section": { - "var1": 100, - "var2": 200 - } - }} - - def test_fileIO(self): - dd_json = DeepDict(os.path.join(DATA_PATH, 'test_json.json')) - dd_xml = DeepDict(os.path.join(DATA_PATH, 'test_xml.xml')) - dd_dict = DeepDict(self.test_data) - - self.assertTrue(list(self.test_data.keys())[0] == list(dd_json.data.keys())[0]) - self.assertTrue(list(self.test_data.keys())[0] == list(dd_xml.data.keys())[0]) - self.assertTrue(list(self.test_data.keys())[0] == list(dd_dict.data.keys())[0]) - for key in self.test_data['widget'].keys(): - self.assertTrue(self.test_data['widget'][key] == dd_json.data['widget'][key]) - self.assertTrue(self.test_data['widget'][key] == dd_xml.data['widget'][key]) - self.assertTrue(self.test_data['widget'][key] == dd_dict.data['widget'][key]) - for key in self.test_data['widget'].keys(): - if key == 'debug': - self.assertTrue(dd_json.data['widget']["debug"] == "on") - self.assertTrue(dd_xml.data['widget']["debug"] == "on") - self.assertTrue(dd_dict.data['widget']["debug"] == "on") - else: - for key2, value2 in self.test_data['widget'][key].items(): - self.assertTrue(value2 == dd_json.data['widget'][key][key2]) - self.assertTrue(value2 == dd_xml.data['widget'][key][key2]) - self.assertTrue(value2 == dd_dict.data['widget'][key][key2]) - - dd_dict.to_file(os.path.join(DATA_PATH, 'write_to_json_test.json')) - dd_dict.to_file(os.path.join(DATA_PATH, 'write_to_xml_test.xml')) - self.assertTrue(os.path.isfile(os.path.join(DATA_PATH, 'write_to_json_test.json'))) - self.assertTrue(os.path.isfile(os.path.join(DATA_PATH, 'write_to_xml_test.xml'))) - dd_json = DeepDict(os.path.join(DATA_PATH, 'write_to_json_test.json')) - dd_xml = DeepDict(os.path.join(DATA_PATH, 'write_to_xml_test.xml')) - self.assertTrue(dd_json == dd_dict) - self.assertTrue(dd_xml == dd_dict) - try: - os.remove(os.path.join(DATA_PATH, 'write_to_json_test.json')) - os.remove(os.path.join(DATA_PATH, 'write_to_xml_test.xml')) - except Exception as e: - print(e) - print("Warning: Failed to delete temporary data during tests!") - - def test_has_section(self): - dd = DeepDict(self.test_data) - self.assertTrue(dd.has_section('hOffset')) - self.assertTrue(dd.has_section('window')) - self.assertTrue(dd.has_section('widget')) - self.assertTrue(dd.has_section('style')) - self.assertTrue(dd.has_section('window')) - self.assertTrue(dd.has_section('title')) - self.assertFalse(dd.has_section('notasection')) - - def test_data_access(self): - dd = DeepDict(self.test_data) - self.assertEqual(dd['widget/window/height'], 500) - self.assertEqual(dd['widget/image/name'], 'sun1') - self.assertTrue(isinstance(dd['widget/window'], dict)) - self.assertEqual(len(dd['widget/window']), 4) - - dd = DeepDict(path_sep=".") - dd.data = self.test_data - self.assertEqual(dd['widget.window.height'], 500) - self.assertEqual(dd['widget.image.name'], 'sun1') - self.assertTrue(isinstance(dd['widget.window'], dict)) - self.assertEqual(len(dd['widget.window']), 4) - - def test_data_adding(self): - dd = DeepDict() - dd["test/section/var1"] = 100 - dd["test/section/var2"] = 200 - self.assertTrue(dd.data == self.test_data2) - - dd = DeepDict() - dd["test"] = {} - dd["test/section"] = {} - dd["test/section/var1"] = 100 - dd["test/section/var2"] = 200 - self.assertTrue(dd.data == self.test_data2) - - def test_sample_space(self): - dd = DeepDict(os.path.join(DATA_PATH, 'test_paramset.json')) - self.assertEqual(len(dd[['parameter', 'activation', 'data']]), 4) - self.assertEqual(dd['parameter/activation/data'], ['ReLU', 'tanh', 'sigm', 'ELU']) - self.assertTrue(isinstance(dd['parameter/activation/data'], list)) - self.assertTrue(isinstance(dd['parameter/activation/data'][0], str)) - self.assertEqual(dd['parameter/layerdepth/data'], [3, 20]) - self.assertTrue(isinstance(dd['parameter/layerdepth/data'], list)) - self.assertTrue(isinstance(dd['parameter/layerdepth/data'][0], int)) - self.assertTrue(isinstance(dd['parameter/learningrate/data'][0], float)) - self.assertEqual(dd['parameter/learningrate/data'][0], 1e-5) - self.assertEqual(dd['parameter/learningrate/data'][1], 10.0) - - def test_len(self): - dd = DeepDict(os.path.join(DATA_PATH, 'test_paramset.json')) - self.assertEqual(len(dd), 1) - - def test_setattr(self): - dd = DeepDict(os.path.join(DATA_PATH, 'iris_svc_parameter.xml')) - - class Foo(object): - def __init__(self): - pass - foo = Foo - dd.transfer_attrs(foo, 'solver_plugin') - self.assertEqual(foo.max_iterations, 50) - self.assertEqual(foo.use_plugin, 'optunity') - - -if __name__ == '__main__': - unittest.main() diff --git a/hyppopy/tests/test_gridsearchsolver.py b/hyppopy/tests/test_gridsearchsolver.py new file mode 100644 index 0000000..91c1f53 --- /dev/null +++ b/hyppopy/tests/test_gridsearchsolver.py @@ -0,0 +1,127 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import unittest +import numpy as np + +from ..solver.GridsearchSolver import * +from ..globals import TESTDATA_DIR + + +class GridsearchTestSuite(unittest.TestCase): + + def setUp(self): + pass + + def test_get_uniform_axis_sample(self): + drange = [0, 10] + N = 11 + data = get_uniform_axis_sample(drange[0], drange[1], N, "float") + for i in range(11): + self.assertEqual(float(i), data[i]) + + drange = [-10, 10] + N = 21 + data = get_uniform_axis_sample(drange[0], drange[1], N, "int") + self.assertEqual(data[0], -10) + self.assertEqual(data[20], 10) + self.assertEqual(data[10], 0) + + def test_get_norm_cdf(self): + res = [0, 0.27337265, 0.4331928, 0.48777553, 0.4986501, 0.5013499, 0.51222447, 0.5668072, 0.72662735, 1] + f = get_norm_cdf(10) + for n, v in enumerate(res): + self.assertAlmostEqual(v, f[n]) + + res = [0.0, 0.27337264762313174, 0.4331927987311419, 0.48777552734495533, 0.4986501019683699, 0.5, + 0.5013498980316301, 0.5122244726550447, 0.5668072012688581, 0.7266273523768683, 1.0] + f = get_norm_cdf(11) + for n, v in enumerate(res): + self.assertAlmostEqual(v, f[n]) + + def test_get_gaussian_axis_sampling(self): + res = [-5.0, + -2.2662735237686826, + -0.6680720126885813, + -0.12224472655044671, + -0.013498980316301257, + 0.013498980316301257, + 0.12224472655044671, + 0.6680720126885813, + 2.2662735237686826, + 5.0] + + bounds = (-5, 5) + N = 10 + data = get_gaussian_axis_sample(bounds[0], bounds[1], N, "float") + for n in range(N): + self.assertAlmostEqual(res[n], data[n]) + + res = [-5.0, + -2.2662735237686826, + -0.6680720126885813, + -0.12224472655044671, + -0.013498980316301257, + 0.0, + 0.013498980316301257, + 0.12224472655044671, + 0.6680720126885813, + 2.2662735237686826, + 5.0] + + bounds = (-5, 5) + N = 11 + data = get_gaussian_axis_sample(bounds[0], bounds[1], N, "float") + for n in range(N): + self.assertAlmostEqual(res[n], data[n]) + + def test_get_logarithmic_axis_sample(self): + res = [0.0010000000000000002, + 0.0035938136638046297, + 0.012915496650148841, + 0.046415888336127795, + 0.1668100537200059, + 0.5994842503189414, + 2.154434690031884, + 7.7426368268112675, + 27.825594022071247, + 100.00000000000004] + bounds = (0.001, 1e2) + N = 10 + data = get_logarithmic_axis_sample(bounds[0], bounds[1], N, "float") + for n in range(N): + self.assertAlmostEqual(res[n], data[n]) + + res = [0.0010000000000000002, + 0.003162277660168382, + 0.010000000000000004, + 0.03162277660168381, + 0.10000000000000006, + 0.31622776601683833, + 1.0000000000000009, + 3.1622776601683813, + 10.00000000000001, + 31.622776601683846, + 100.00000000000004] + bounds = (0.001, 1e2) + N = 11 + data = get_logarithmic_axis_sample(bounds[0], bounds[1], N, "float") + for n in range(N): + self.assertAlmostEqual(res[n], data[n]) + + +if __name__ == '__main__': + unittest.main() diff --git a/hyppopy/tests/test_helpers.py b/hyppopy/tests/test_helpers.py deleted file mode 100644 index e1071b3..0000000 --- a/hyppopy/tests/test_helpers.py +++ /dev/null @@ -1,87 +0,0 @@ -# -*- coding: utf-8 -*- -# -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import unittest - -from hyppopy.helpers import NestedDictUnfolder - - -class SolverFactoryTestSuite(unittest.TestCase): - - def setUp(self): - self.p1 = {"uni1": [1, 2], "uni2": [11, 12]} - self.p2 = {"cat": {"a": {"uni1": [1, 2], "uni2": [11, 12]}, "b": {"uni1": [1, 2], "uni2": [11, 12]}}} - self.p3 = {"cat1": { - "a1": {"cat2": {"a2": {"uni1": [1, 2], "uni2": [11, 12]}, "b2": {"uni1": [1, 2], "uni2": [11, 12]}}}, - "b1": {"cat2": {"a2": {"uni1": [1, 2], "uni2": [11, 12]}, "b2": {"uni1": [1, 2], "uni2": [11, 12]}}}}} - - self.output_p3 = [{'cat1': 'a1', 'cat2': 'a2', 'uni1': 1, 'uni2': 11}, - {'cat1': 'a1', 'cat2': 'a2', 'uni1': 1, 'uni2': 12}, - {'cat1': 'a1', 'cat2': 'a2', 'uni1': 2, 'uni2': 11}, - {'cat1': 'a1', 'cat2': 'a2', 'uni1': 2, 'uni2': 12}, - {'cat1': 'a1', 'cat2': 'b2', 'uni1': 1, 'uni2': 11}, - {'cat1': 'a1', 'cat2': 'b2', 'uni1': 1, 'uni2': 12}, - {'cat1': 'a1', 'cat2': 'b2', 'uni1': 2, 'uni2': 11}, - {'cat1': 'a1', 'cat2': 'b2', 'uni1': 2, 'uni2': 12}, - {'cat1': 'b1', 'cat2': 'a2', 'uni1': 1, 'uni2': 11}, - {'cat1': 'b1', 'cat2': 'a2', 'uni1': 1, 'uni2': 12}, - {'cat1': 'b1', 'cat2': 'a2', 'uni1': 2, 'uni2': 11}, - {'cat1': 'b1', 'cat2': 'a2', 'uni1': 2, 'uni2': 12}, - {'cat1': 'b1', 'cat2': 'b2', 'uni1': 1, 'uni2': 11}, - {'cat1': 'b1', 'cat2': 'b2', 'uni1': 1, 'uni2': 12}, - {'cat1': 'b1', 'cat2': 'b2', 'uni1': 2, 'uni2': 11}, - {'cat1': 'b1', 'cat2': 'b2', 'uni1': 2, 'uni2': 12}] - - self.output_p2 = [{'cat': 'a', 'uni1': 1, 'uni2': 11}, - {'cat': 'a', 'uni1': 1, 'uni2': 12}, - {'cat': 'a', 'uni1': 2, 'uni2': 11}, - {'cat': 'a', 'uni1': 2, 'uni2': 12}, - {'cat': 'b', 'uni1': 1, 'uni2': 11}, - {'cat': 'b', 'uni1': 1, 'uni2': 12}, - {'cat': 'b', 'uni1': 2, 'uni2': 11}, - {'cat': 'b', 'uni1': 2, 'uni2': 12}] - - self.output_p1 = [{'uni1': 1, 'uni2': 11}, - {'uni1': 1, 'uni2': 12}, - {'uni1': 2, 'uni2': 11}, - {'uni1': 2, 'uni2': 12}] - - def test_nested_dict_unfolder_p1(self): - unfolder = NestedDictUnfolder(self.p1) - unfolded = unfolder.unfold() - - for it1, it2 in zip(unfolded, self.output_p1): - self.assertEqual(it1, it2) - - def test_nested_dict_unfolder_p2(self): - unfolder = NestedDictUnfolder(self.p2) - unfolded = unfolder.unfold() - - for it1, it2 in zip(unfolded, self.output_p2): - self.assertEqual(it1, it2) - - def test_nested_dict_unfolder_p3(self): - unfolder = NestedDictUnfolder(self.p3) - unfolded = unfolder.unfold() - for it1, it2 in zip(unfolded, self.output_p3): - self.assertEqual(it1, it2) - - - -if __name__ == '__main__': - unittest.main() - diff --git a/hyppopy/tests/test_projectmanager.py b/hyppopy/tests/test_projectmanager.py deleted file mode 100644 index 61412e9..0000000 --- a/hyppopy/tests/test_projectmanager.py +++ /dev/null @@ -1,135 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import tempfile -import unittest -from hyppopy.projectmanager import ProjectManager - - -DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") - - -class ProjectManagerTestSuite(unittest.TestCase): - - def setUp(self): - self.config = { - "hyperparameter": { - "C": { - "domain": "uniform", - "data": [0, 20], - "type": "float" - }, - "gamma": { - "domain": "uniform", - "data": [0.0001, 20.0], - "type": "float" - }, - "kernel": { - "domain": "categorical", - "data": ["linear", "sigmoid", "poly", "rbf"], - "type": "str" - }, - "decision_function_shape": { - "domain": "categorical", - "data": ["ovo", "ovr"], - "type": "str" - } - }, - "settings": { - "solver_plugin": { - "max_iterations": 300, - "use_plugin": "hyperopt", - "output_dir": os.path.join(tempfile.gettempdir(), 'results') - }, - "custom": { - "the_answer": 42 - } - }} - - def test_read_attrs(self): - ProjectManager.read_config(os.path.join(DATA_PATH, *('Titanic', 'rf_config.xml'))) - self.assertEqual(ProjectManager.data_name, 'train_cleaned.csv') - self.assertEqual(ProjectManager.labels_name, 'Survived') - self.assertEqual(ProjectManager.max_iterations, 3) - self.assertEqual(ProjectManager.use_plugin, 'optunity') - - hp = ProjectManager.get_hyperparameter() - self.assertTrue("n_estimators" in hp.keys()) - self.assertTrue("domain" in hp["n_estimators"].keys()) - self.assertTrue("data" in hp["n_estimators"].keys()) - self.assertTrue("type" in hp["n_estimators"].keys()) - self.assertEqual(hp["n_estimators"]["domain"], "uniform") - self.assertEqual(hp["n_estimators"]["type"], "int") - self.assertEqual(hp["n_estimators"]["data"], [3, 200]) - - self.assertTrue("max_depth" in hp.keys()) - self.assertTrue("domain" in hp["max_depth"].keys()) - self.assertTrue("data" in hp["max_depth"].keys()) - self.assertTrue("type" in hp["max_depth"].keys()) - self.assertEqual(hp["max_depth"]["domain"], "uniform") - self.assertEqual(hp["max_depth"]["type"], "int") - self.assertEqual(hp["max_depth"]["data"], [3, 50]) - - self.assertTrue("criterion" in hp.keys()) - self.assertTrue("domain" in hp["criterion"].keys()) - self.assertTrue("data" in hp["criterion"].keys()) - self.assertTrue("type" in hp["criterion"].keys()) - self.assertEqual(hp["criterion"]["domain"], "categorical") - self.assertEqual(hp["criterion"]["type"], "str") - self.assertEqual(hp["criterion"]["data"], ["gini", "entropy"]) - - def test_set_attrs(self): - self.assertTrue(ProjectManager.set_config(self.config)) - self.assertEqual(ProjectManager.max_iterations, 300) - self.assertEqual(ProjectManager.use_plugin, 'hyperopt') - self.assertEqual(ProjectManager.the_answer, 42) - - hp = ProjectManager.get_hyperparameter() - self.assertTrue("C" in hp.keys()) - self.assertTrue("domain" in hp["C"].keys()) - self.assertTrue("data" in hp["C"].keys()) - self.assertTrue("type" in hp["C"].keys()) - self.assertEqual(hp["C"]["domain"], "uniform") - self.assertEqual(hp["C"]["type"], "float") - self.assertEqual(hp["C"]["data"], [0, 20]) - - self.assertTrue("gamma" in hp.keys()) - self.assertTrue("domain" in hp["gamma"].keys()) - self.assertTrue("data" in hp["gamma"].keys()) - self.assertTrue("type" in hp["gamma"].keys()) - self.assertEqual(hp["gamma"]["domain"], "uniform") - self.assertEqual(hp["gamma"]["type"], "float") - self.assertEqual(hp["gamma"]["data"], [0.0001, 20.0]) - - self.assertTrue("kernel" in hp.keys()) - self.assertTrue("domain" in hp["kernel"].keys()) - self.assertTrue("data" in hp["kernel"].keys()) - self.assertTrue("type" in hp["kernel"].keys()) - self.assertEqual(hp["kernel"]["domain"], "categorical") - self.assertEqual(hp["kernel"]["type"], "str") - self.assertEqual(hp["kernel"]["data"], ["linear", "sigmoid", "poly", "rbf"]) - - self.assertTrue("decision_function_shape" in hp.keys()) - self.assertTrue("domain" in hp["decision_function_shape"].keys()) - self.assertTrue("data" in hp["decision_function_shape"].keys()) - self.assertTrue("type" in hp["decision_function_shape"].keys()) - self.assertEqual(hp["decision_function_shape"]["domain"], "categorical") - self.assertEqual(hp["decision_function_shape"]["type"], "str") - self.assertEqual(hp["decision_function_shape"]["data"], ["ovo", "ovr"]) - - -if __name__ == '__main__': - unittest.main() diff --git a/hyppopy/tests/test_settings_plugins.py b/hyppopy/tests/test_settings_plugins.py deleted file mode 100644 index 8aec1c7..0000000 --- a/hyppopy/tests/test_settings_plugins.py +++ /dev/null @@ -1,136 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import unittest -import numpy as np - -from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_SettingsParticle -from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_Settings - - -class ProjectManagerTestSuite(unittest.TestCase): - - def setUp(self): - self.hp = { - 'UniformFloat': { - 'domain': 'uniform', - 'data': [0, 1, 10], - 'type': 'float', - }, - 'UniformInt': { - 'domain': 'uniform', - 'data': [0, 7, 10], - 'type': 'int', - }, - 'NormalFloat': { - 'domain': 'normal', - 'data': [0, 1, 10], - 'type': 'float', - }, - 'NormalInt': { - 'domain': 'normal', - 'data': [0, 10, 10], - 'type': 'int', - }, - 'LogFloat': { - 'domain': 'loguniform', - 'data': [0.01, np.e, 10], - 'type': 'float', - }, - 'LogFloat': { - 'domain': 'loguniform', - 'data': [0.01, np.e, 10], - 'type': 'float', - }, - 'LogInt': { - 'domain': 'loguniform', - 'data': [0, 1000000, 10], - 'type': 'int', - }, - 'CategoricalStr': { - 'domain': 'categorical', - 'data': ['a', 'b'], - 'type': 'str', - }, - 'CategoricalInt': { - 'domain': 'categorical', - 'data': [0, 1], - 'type': 'int', - } - } - - - self.truth = { - 'UniformFloat': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], - 'UniformInt': [0, 1, 2, 3, 4, 5, 6, 7, 8], - 'NormalFloat': [0.0, 0.2592443381276233, 0.3673134565097225, 0.4251586871937128, 0.4649150940720099, 0.5, - 0.5350849059279901, 0.5748413128062873, 0.6326865434902775, 0.7407556618723767, 1.0], - 'NormalInt': [0, 3, 4, 5, 6, 7, 10], - 'LogFloat': [0.010000000000000004, 0.017515778645640943, 0.030680250156309114, 0.053738847053080116, - 0.0941277749653705, 0.16487212707001322, 0.28878636825943366, 0.5058318102310787, - 0.8860038019931427, 1.551904647490817, 2.7182818284590575], - 'LogInt': [0, 2, 1259, 1000000], - 'CategoricalStr': ['a', 'b'], - 'CategoricalInt': [0, 1] - } - - def test_gridsearch_settings(self): - gss = gridsearch_Settings() - gss.set_hyperparameter(self.hp) - res = gss.get_hyperparameter() - self.assertTrue('CategoricalInt' in res.keys()) - self.assertTrue(len(res) == 1) - self.assertTrue(0 in res['CategoricalInt'].keys()) - self.assertTrue(1 in res['CategoricalInt'].keys()) - self.assertTrue(len(res['CategoricalInt']) == 2) - self.assertTrue('a' in res['CategoricalInt'][0]['CategoricalStr'].keys()) - self.assertTrue('b' in res['CategoricalInt'][0]['CategoricalStr'].keys()) - self.assertTrue(len(res['CategoricalInt'][0]['CategoricalStr']) == 2) - self.assertTrue('a' in res['CategoricalInt'][1]['CategoricalStr'].keys()) - self.assertTrue('b' in res['CategoricalInt'][1]['CategoricalStr'].keys()) - self.assertTrue(len(res['CategoricalInt'][1]['CategoricalStr']) == 2) - - def check_truth(input_dict): - for key, value in self.truth.items(): - if not key.startswith('Categorical'): - self.assertTrue(key in input_dict.keys()) - if key == 'LogFloat': - a=0 - if key == 'LogInt': - a=0 - for n, v in enumerate(self.truth[key]): - self.assertAlmostEqual(v, input_dict[key][n]) - - check_truth(res['CategoricalInt'][0]['CategoricalStr']['a']) - check_truth(res['CategoricalInt'][1]['CategoricalStr']['a']) - check_truth(res['CategoricalInt'][0]['CategoricalStr']['b']) - check_truth(res['CategoricalInt'][1]['CategoricalStr']['b']) - - def test_gridsearch_particle(self): - for name, data in self.hp.items(): - gsp = gridsearch_SettingsParticle(name=name, - domain=data['domain'], - dtype=data['type'], - data=data['data']) - data = gsp.get() - for n in range(len(self.truth[name])): - self.assertAlmostEqual(data[n], self.truth[name][n]) - - def tearDown(self): - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/hyppopy/tests/test_solver_factory.py b/hyppopy/tests/test_solver_factory.py deleted file mode 100644 index b605055..0000000 --- a/hyppopy/tests/test_solver_factory.py +++ /dev/null @@ -1,99 +0,0 @@ -# -*- coding: utf-8 -*- -# -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import unittest - -from sklearn.svm import SVC -from sklearn import datasets -from sklearn.model_selection import cross_val_score -from sklearn.model_selection import train_test_split - -from hyppopy.solverfactory import SolverFactory -from hyppopy.projectmanager import ProjectManager - -from hyppopy.globals import TESTDATA_DIR -TESTPARAMFILE = os.path.join(TESTDATA_DIR, 'iris_svc_parameter') - -from hyppopy.deepdict import DeepDict - - -class SolverFactoryTestSuite(unittest.TestCase): - - def setUp(self): - iris = datasets.load_iris() - X, X_test, y, y_test = train_test_split(iris.data, iris.target, test_size=0.1, random_state=42) - self.my_IRIS_dta = [X, y] - - def test_solver_loading(self): - names = SolverFactory.list_solver() - self.assertTrue("hyperopt" in names) - self.assertTrue("optunity" in names) - - def test_iris_solver_execution(self): - - def my_SVC_loss_func(data, params): - clf = SVC(**params) - return -cross_val_score(clf, data[0], data[1], cv=3).mean() - - ProjectManager.read_config(TESTPARAMFILE + '.xml') - solver = SolverFactory.get_solver('optunity') - solver.set_data(self.my_IRIS_dta) - solver.set_loss_function(my_SVC_loss_func) - solver.run() - - ProjectManager.read_config(TESTPARAMFILE + '.json') - solver = SolverFactory.get_solver('hyperopt') - solver.set_data(self.my_IRIS_dta) - solver.set_loss_function(my_SVC_loss_func) - solver.run() - - def test_create_solver_from_settings_directly(self): - - def my_SVC_loss_func(data, params): - clf = SVC(**params) - return -cross_val_score(clf, data[0], data[1], cv=3).mean() - - solver = SolverFactory.from_settings(TESTPARAMFILE + '.xml') - self.assertEqual(solver.name, "optunity") - solver.set_data(self.my_IRIS_dta) - solver.set_loss_function(my_SVC_loss_func) - solver.run() - - solver = SolverFactory.from_settings(TESTPARAMFILE + '.json') - self.assertEqual(solver.name, "hyperopt") - solver.set_data(self.my_IRIS_dta) - solver.set_loss_function(my_SVC_loss_func) - solver.run() - - dd = DeepDict(TESTPARAMFILE + '.json') - solver = SolverFactory.from_settings(dd) - self.assertEqual(solver.name, "hyperopt") - solver.set_data(self.my_IRIS_dta) - solver.set_loss_function(my_SVC_loss_func) - solver.run() - - solver = SolverFactory.from_settings(dd.data) - self.assertEqual(solver.name, "hyperopt") - solver.set_data(self.my_IRIS_dta) - solver.set_loss_function(my_SVC_loss_func) - solver.run() - - -if __name__ == '__main__': - unittest.main() - diff --git a/hyppopy/tests/test_usecases.py b/hyppopy/tests/test_usecases.py deleted file mode 100644 index cc727a4..0000000 --- a/hyppopy/tests/test_usecases.py +++ /dev/null @@ -1,217 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import shutil -import unittest -import tempfile -import numpy as np -from sklearn.svm import SVC -from sklearn.metrics import accuracy_score -from sklearn.datasets import load_breast_cancer -from sklearn.ensemble import AdaBoostClassifier -from sklearn.neighbors import KNeighborsClassifier -from sklearn.ensemble import RandomForestClassifier -from sklearn.model_selection import train_test_split - -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase -from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase -from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase -from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase - - -DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") - - -class ProjectManagerTestSuite(unittest.TestCase): - - def setUp(self): - breast_cancer_data = load_breast_cancer() - x = breast_cancer_data.data - y = breast_cancer_data.target - x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=23) - - self.root = os.path.join(tempfile.gettempdir(), 'test_data') - #if os.path.isdir(self.root): - #shutil.rmtree(self.root) - if not os.path.isdir(self.root): - os.makedirs(self.root) - - x_train_fname = os.path.join(self.root, 'x_train.npy') - y_train_fname = os.path.join(self.root, 'y_train.npy') - np.save(x_train_fname, x_train) - np.save(y_train_fname, y_train) - - self.train = [x_train, y_train] - self.test = [x_test, y_test] - self.config = { - "hyperparameter": {}, - "settings": { - "solver_plugin": { - "max_iterations": 3, - "use_plugin": "hyperopt", - "output_dir": os.path.join(self.root, 'test_results') - }, - "custom": { - "data_path": self.root, - "data_name": "x_train.npy", - "labels_name": "y_train.npy" - } - }} - - # def test_svc_usecase(self): - # hyperparameter = { - # "C": { - # "domain": "uniform", - # "data": [0.0001, 300.0], - # "type": "float" - # }, - # "kernel": { - # "domain": "categorical", - # "data": ["linear", "poly", "rbf"], - # "type": "str" - # } - # } - # - # self.config["hyperparameter"] = hyperparameter - # ProjectManager.set_config(self.config) - # uc = svc_usecase() - # uc.run(save=True) - # res, best = uc.get_results() - # print("="*30) - # print(best) - # print("=" * 30) - # clf = SVC(C=best['C'], kernel=hyperparameter['kernel']['data'][best['kernel']]) - # clf.fit(self.train[0], self.train[1]) - # train_predictions = clf.predict(self.test[0]) - # acc = accuracy_score(self.test[1], train_predictions) - # print("Accuracy: {:.4%}".format(acc)) - # print("=" * 30) - - def test_randomforest_usecase(self): - hyperparameter = { - "n_estimators": { - "domain": "uniform", - "data": [1, 500], - "type": "int" - }, - "criterion": { - "domain": "categorical", - "data": ["gini", "entropy"], - "type": "str" - }, - "max_depth": { - "domain": "uniform", - "data": [1, 50], - "type": "int" - }, - "max_features": { - "domain": "categorical", - "data": ["auto", "sqrt", "log2"], - "type": "str" - } - } - - self.config["hyperparameter"] = hyperparameter - ProjectManager.set_config(self.config) - uc = randomforest_usecase() - uc.run(save=False) - res, best = uc.get_results() - print("=" * 30) - print(best) - print("=" * 30) - clf = RandomForestClassifier(n_estimators=best['n_estimators'], - criterion=hyperparameter['criterion']['data'][best['criterion']], - max_depth=best['max_depth'], - max_features=best['max_features']) - clf.fit(self.train[0], self.train[1]) - print("feature importance:\n", clf.feature_importances_) - train_predictions = clf.predict(self.test[0]) - acc = accuracy_score(self.test[1], train_predictions) - print("Accuracy: {:.4%}".format(acc)) - print("=" * 30) - - def test_adaboost_usecase(self): - hyperparameter = { - "n_estimators": { - "domain": "uniform", - "data": [1, 300], - "type": "int" - }, - "learning_rate": { - "domain": "loguniform", - "data": [0.01, 100], - "type": "float" - } - } - - self.config["hyperparameter"] = hyperparameter - ProjectManager.set_config(self.config) - uc = adaboost_usecase() - uc.run(save=True) - res, best = uc.get_results() - print("=" * 30) - print(best) - print("=" * 30) - clf = AdaBoostClassifier(n_estimators=best['n_estimators'], learning_rate=best['learning_rate']) - clf.fit(self.train[0], self.train[1]) - train_predictions = clf.predict(self.test[0]) - acc = accuracy_score(self.test[1], train_predictions) - print("Accuracy: {:.4%}".format(acc)) - print("=" * 30) - - def test_knc_usecase(self): - hyperparameter = { - "n_neighbors": { - "domain": "uniform", - "data": [1, 100], - "type": "int" - }, - "weights": { - "domain": "categorical", - "data": ["uniform", "distance"], - "type": "str" - }, - "algorithm": { - "domain": "categorical", - "data": ["auto", "ball_tree", "kd_tree", "brute"], - "type": "str" - } - } - - self.config["hyperparameter"] = hyperparameter - ProjectManager.set_config(self.config) - uc = knc_usecase() - uc.run(save=True) - res, best = uc.get_results() - print("=" * 30) - print(best) - print("=" * 30) - clf = KNeighborsClassifier(n_neighbors=best['n_neighbors'], - weights=hyperparameter['weights']['data'][best['weights']], - algorithm=hyperparameter['algorithm']['data'][best['algorithm']]) - clf.fit(self.train[0], self.train[1]) - train_predictions = clf.predict(self.test[0]) - acc = accuracy_score(self.test[1], train_predictions) - print("Accuracy: {:.4%}".format(acc)) - print("=" * 30) - - def tearDown(self): - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/hyppopy/tests/test_virtualfunction.py b/hyppopy/tests/test_virtualfunction.py index ece1d39..acc2bad 100644 --- a/hyppopy/tests/test_virtualfunction.py +++ b/hyppopy/tests/test_virtualfunction.py @@ -1,117 +1,117 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import unittest import numpy as np -from hyppopy.virtualfunction import VirtualFunction -from hyppopy.globals import TESTDATA_DIR +from ..VirtualFunction import VirtualFunction +from ..globals import TESTDATA_DIR class VirtualFunctionTestSuite(unittest.TestCase): def setUp(self): pass def test_imagereading(self): vfunc = VirtualFunction() vfunc.load_images(os.path.join(TESTDATA_DIR, 'functionsimulator')) self.assertTrue(isinstance(vfunc.data, np.ndarray)) self.assertEqual(vfunc.data.shape[0], 5) self.assertEqual(vfunc.data.shape[1], 512) gt = [0.83984375*5, 0.44140625*20-10, 0.25390625*20, 0.81640625*8-10, 0.67578125*2+2] for i in range(5): self.assertAlmostEqual(vfunc.data[i][0], gt[i]) gt = [[0, 1], [-10, 10], [0, 20], [-30, 5], [5, 10]] for i in range(5): self.assertEqual(vfunc.axis[i][0], gt[i][0]) self.assertEqual(vfunc.axis[i][1], gt[i][1]) def test_data_adding(self): gt = [[-10, 10], [-30, 5]] vfunc = VirtualFunction() dim0 = np.arange(0, 1.1, 0.1) dim1 = np.arange(1.0, -0.1, -0.1) vfunc.add_dimension(dim0, gt[0]) self.assertEqual(len(vfunc.data.shape), 2) self.assertEqual(vfunc.data.shape[0], 1) self.assertEqual(vfunc.data.shape[1], 11) vfunc.add_dimension(dim1, gt[1]) self.assertEqual(vfunc.data.shape[0], 2) self.assertEqual(vfunc.data.shape[1], 11) for n in range(11): self.assertAlmostEqual(dim0[n], vfunc.data[0, n]) self.assertAlmostEqual(dim1[n], vfunc.data[1, n]) for i in range(2): self.assertEqual(vfunc.axis[i][0], gt[i][0]) self.assertEqual(vfunc.axis[i][1], gt[i][1]) def test_sampling(self): vfunc = VirtualFunction() vfunc.load_images(os.path.join(TESTDATA_DIR, 'functionsimulator')) ranges = [[0, 1], [-10, 10], [0, 20], [-30, 5], [5, 10]] x_ranges = [] for r in ranges: dr = (r[1]-r[0])/512.0 x_ranges.append(np.arange(r[0], r[1], dr)) data = [[], [], [], [], []] for n in range(x_ranges[0].shape[0]): x = [x_ranges[0][n], x_ranges[1][n], x_ranges[2][n], x_ranges[3][n], x_ranges[4][n]] f = vfunc(*x) for i in range(5): data[i].append(f[i]) sum = 0 for i in range(512): for n in range(5): sum += vfunc.data[n][i]-data[n][i] self.assertTrue(sum < 18) def test_minima(self): vfunc = VirtualFunction() vfunc.load_images(os.path.join(TESTDATA_DIR, 'functionsimulator')) minima = vfunc.minima() gt = [[[0.7265625], 0.48828125], [[-4.0234375], -7.890625], [[2.265625], 0.859375], [ [-17.421875, -17.353515625, -17.28515625, -17.216796875, -17.1484375, -17.080078125, -17.01171875, -16.943359375, -16.875, -16.806640625, -16.73828125, -16.669921875, -16.6015625, -16.533203125, -16.46484375, -16.396484375, -16.328125, -16.259765625, -16.19140625, -16.123046875, -16.0546875, -15.986328125, -15.91796875, -15.849609375, -15.78125, -15.712890625, -15.64453125, -15.576171875, -15.5078125, -15.439453125, -15.37109375, -15.302734375, -15.234375, -15.166015625, -15.09765625, -15.029296875, -14.9609375, -14.892578125, -14.82421875, -14.755859375, -14.6875, -14.619140625, -14.55078125, -14.482421875, -14.4140625, -14.345703125, -14.27734375, -14.208984375, -14.140625, -14.072265625, -14.00390625, -13.935546875, -13.8671875, -13.798828125, -13.73046875, -13.662109375, -13.59375, -13.525390625, -13.45703125, -13.388671875, -13.3203125, -13.251953125, -13.18359375, -13.115234375, -13.046875, -12.978515625, -12.91015625, -12.841796875, -12.7734375, -12.705078125, -12.63671875, -12.568359375, -12.5, -12.431640625, -12.36328125, -12.294921875, -12.2265625, -12.158203125, -12.08984375, -12.021484375, -11.953125, -11.884765625, -11.81640625, -11.748046875, -11.6796875, -11.611328125, -11.54296875, -11.474609375, -11.40625, -11.337890625, -11.26953125, -11.201171875, -11.1328125, -11.064453125, -10.99609375, -10.927734375, -10.859375, -10.791015625, -10.72265625, -10.654296875, -10.5859375, -10.517578125, -10.44921875, -10.380859375, -10.3125, -10.244140625, -10.17578125, -10.107421875, -10.0390625, -9.970703125, -9.90234375, -9.833984375, -9.765625, -9.697265625, -9.62890625, -9.560546875, -9.4921875, -9.423828125, -9.35546875, -9.287109375, -9.21875, -9.150390625, -9.08203125, -9.013671875, -8.9453125, -8.876953125, -8.80859375, -8.740234375, -8.671875, -8.603515625, -8.53515625, -8.466796875, -8.3984375, -8.330078125, -8.26171875, -8.193359375, -8.125, -8.056640625, -7.98828125, -7.919921875, -7.8515625, -7.783203125, -7.71484375, -7.646484375, -7.578125, -7.509765625, -7.44140625, -7.373046875, -7.3046875, -7.236328125, -7.16796875, -7.099609375, -7.03125], -9.125], [[5.44921875, 5.458984375, 5.46875, 5.478515625, 5.48828125, 5.498046875, 5.5078125, 5.517578125, 5.52734375], 2.09375]] self.assertAlmostEqual(minima, gt) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_workflows.py b/hyppopy/tests/test_workflows.py deleted file mode 100644 index 2866495..0000000 --- a/hyppopy/tests/test_workflows.py +++ /dev/null @@ -1,151 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import unittest -from hyppopy.globals import TESTDATA_DIR -IRIS_DATA = os.path.join(TESTDATA_DIR, 'Iris') -TITANIC_DATA = os.path.join(TESTDATA_DIR, 'Titanic') - -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase -from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase -from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase -from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase - - -class WorkflowTestSuite(unittest.TestCase): - - def setUp(self): - self.results = [] - - def test_workflow_svc_on_iris_from_xml(self): - ProjectManager.read_config(os.path.join(IRIS_DATA, 'svc_config.xml')) - uc = svc_usecase() - uc.run(False) - res, best = uc.get_results() - self.assertTrue('C' in res.columns) - self.assertTrue('gamma' in res.columns) - self.assertTrue('kernel' in res.columns) - self.assertEqual(len(best.keys()), 3) - - def test_workflow_svc_on_iris_from_json(self): - ProjectManager.read_config(os.path.join(IRIS_DATA, 'svc_config.json')) - uc = svc_usecase() - uc.run(False) - res, best = uc.get_results() - self.assertTrue('C' in res.columns) - self.assertTrue('gamma' in res.columns) - self.assertTrue('kernel' in res.columns) - self.assertEqual(len(best.keys()), 3) - - def test_workflow_rf_on_iris_from_xml(self): - ProjectManager.read_config(os.path.join(IRIS_DATA, 'rf_config.xml')) - uc = randomforest_usecase() - uc.run(False) - res, best = uc.get_results() - self.assertTrue('n_estimators' in res.columns) - self.assertTrue('criterion' in res.columns) - self.assertTrue('max_depth' in res.columns) - self.assertEqual(len(best.keys()), 3) - - def test_workflow_rf_on_iris_from_json(self): - ProjectManager.read_config(os.path.join(IRIS_DATA, 'rf_config.json')) - uc = randomforest_usecase() - uc.run(False) - res, best = uc.get_results() - self.assertTrue('n_estimators' in res.columns) - self.assertTrue('criterion' in res.columns) - self.assertTrue('max_depth' in res.columns) - self.assertEqual(len(best.keys()), 3) - - def test_workflow_rf_on_iris_from_grid_xml(self): - ProjectManager.read_config(os.path.join(IRIS_DATA, 'rf_grid_config.xml')) - uc = randomforest_usecase() - uc.run(False) - res, best = uc.get_results() - self.assertTrue('n_estimators' in res.columns) - self.assertTrue('criterion' in res.columns) - self.assertTrue('max_depth' in res.columns) - self.assertEqual(len(best.keys()), 3) - - # def test_workflow_svc_on_titanic_from_xml(self): - # ProjectManager.read_config(os.path.join(TITANIC_DATA, 'svc_config.xml')) - # uc = svc_usecase() - # uc.run(False) - # res, best = uc.get_results() - # self.assertTrue('C' in res.columns) - # self.assertTrue('gamma' in res.columns) - # self.assertTrue('kernel' in res.columns) - # self.assertEqual(len(best.keys()), 3) - # - # def test_workflow_svc_on_titanic_from_json(self): - # ProjectManager.read_config(os.path.join(TITANIC_DATA, 'svc_config.json')) - # uc = svc_usecase() - # uc.run(False) - # res, best = uc.get_results() - # self.assertTrue('C' in res.columns) - # self.assertTrue('gamma' in res.columns) - # self.assertTrue('kernel' in res.columns) - # self.assertEqual(len(best.keys()), 3) - - def test_workflow_rf_on_titanic_from_xml(self): - ProjectManager.read_config(os.path.join(TITANIC_DATA, 'rf_config.xml')) - uc = randomforest_usecase() - uc.run(False) - res, best = uc.get_results() - self.assertTrue('n_estimators' in res.columns) - self.assertTrue('criterion' in res.columns) - self.assertTrue('max_depth' in res.columns) - self.assertEqual(len(best.keys()), 3) - - def test_workflow_rf_on_titanic_from_json(self): - ProjectManager.read_config(os.path.join(TITANIC_DATA, 'rf_config.json')) - uc = randomforest_usecase() - uc.run(False) - res, best = uc.get_results() - self.assertTrue('n_estimators' in res.columns) - self.assertTrue('criterion' in res.columns) - self.assertTrue('max_depth' in res.columns) - self.assertEqual(len(best.keys()), 3) - - def test_workflow_adaboost_on_titanic_from_xml(self): - ProjectManager.read_config(os.path.join(TITANIC_DATA, 'adaboost_config.xml')) - uc = adaboost_usecase() - uc.run(False) - res, best = uc.get_results() - self.assertTrue('n_estimators' in res.columns) - self.assertTrue('learning_rate' in res.columns) - self.assertEqual(len(best.keys()), 2) - - def test_workflow_knc_on_titanic_from_xml(self): - ProjectManager.read_config(os.path.join(TITANIC_DATA, 'knc_config.xml')) - uc = knc_usecase() - uc.run(False) - res, best = uc.get_results() - self.assertTrue('n_neighbors' in res.columns) - self.assertTrue('leaf_size' in res.columns) - self.assertTrue('weights' in res.columns) - self.assertEqual(len(best.keys()), 4) - - def tearDown(self): - print("") - for r in self.results: - print(r) - - -if __name__ == '__main__': - unittest.main() - diff --git a/hyppopy/workflows/__init__.py b/hyppopy/workflows/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/adaboost_usecase/__init__.py b/hyppopy/workflows/adaboost_usecase/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/adaboost_usecase/adaboost_usecase.py b/hyppopy/workflows/adaboost_usecase/adaboost_usecase.py deleted file mode 100644 index 5b904ca..0000000 --- a/hyppopy/workflows/adaboost_usecase/adaboost_usecase.py +++ /dev/null @@ -1,36 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -from sklearn.ensemble import AdaBoostClassifier -from sklearn.model_selection import cross_val_score - -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.workflowbase import WorkflowBase -from hyppopy.workflows.dataloader.simpleloader import SimpleDataLoader - - -class adaboost_usecase(WorkflowBase): - - def setup(self, **kwargs): - dl = SimpleDataLoader() - dl.start(path=ProjectManager.data_path, - data_name=ProjectManager.data_name, - labels_name=ProjectManager.labels_name) - self.solver.set_data(dl.data) - - def blackbox_function(self, data, params): - clf = AdaBoostClassifier(n_estimators=params['n_estimators'], - learning_rate=params['learning_rate']) - return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() diff --git a/hyppopy/workflows/dataloader/__init__.py b/hyppopy/workflows/dataloader/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/dataloader/dataloaderbase.py b/hyppopy/workflows/dataloader/dataloaderbase.py deleted file mode 100644 index 83cd117..0000000 --- a/hyppopy/workflows/dataloader/dataloaderbase.py +++ /dev/null @@ -1,36 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import abc - - -class DataLoaderBase(object): - - def __init__(self): - self.data = None - - def start(self, **kwargs): - self.read(**kwargs) - if self.data is None: - raise AttributeError("data is empty, did you missed to assign it while implementing read...?") - self.preprocess(**kwargs) - - @abc.abstractmethod - def read(self, **kwargs): - raise NotImplementedError("the read method has to be implemented in classes derived from DataLoader") - - @abc.abstractmethod - def preprocess(self, **kwargs): - pass diff --git a/hyppopy/workflows/dataloader/simpleloader.py b/hyppopy/workflows/dataloader/simpleloader.py deleted file mode 100644 index c2fab98..0000000 --- a/hyppopy/workflows/dataloader/simpleloader.py +++ /dev/null @@ -1,41 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import numpy as np -import pandas as pd - -from hyppopy.workflows.dataloader.dataloaderbase import DataLoaderBase - - -class SimpleDataLoader(DataLoaderBase): - - def read(self, **kwargs): - if kwargs['data_name'].endswith(".npy"): - if not kwargs['labels_name'].endswith(".npy"): - raise IOError("Expect both data_name and labels_name being of type .npy!") - self.data = [np.load(os.path.join(kwargs['path'], kwargs['data_name'])), np.load(os.path.join(kwargs['path'], kwargs['labels_name']))] - elif kwargs['data_name'].endswith(".csv"): - try: - dataset = pd.read_csv(os.path.join(kwargs['path'], kwargs['data_name'])) - y = dataset[kwargs['labels_name']].values - X = dataset.drop([kwargs['labels_name']], axis=1).values - self.data = [X, y] - except Exception as e: - print("Precondition violation, this usage case expects as data_name a " - "csv file and as label_name a name of a column in this csv table!") - else: - raise NotImplementedError("This combination of data_name and labels_name " - "does not yet exist, feel free to add it") diff --git a/hyppopy/workflows/dataloader/unetloader.py b/hyppopy/workflows/dataloader/unetloader.py deleted file mode 100644 index d103726..0000000 --- a/hyppopy/workflows/dataloader/unetloader.py +++ /dev/null @@ -1,159 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import pickle -import numpy as np -from medpy.io import load -from collections import defaultdict -from .dataloaderbase import DataLoaderBase - - -class UnetDataLoader(DataLoaderBase): - - def read(self, **kwargs): - # preprocess data if not already done - root_dir = os.path.join(kwargs['data_path'], kwargs['data_name']) - split_dir = os.path.join(kwargs['data_path'], kwargs['split_dir']) - preproc_dir = os.path.join(root_dir, 'preprocessed') - if not os.path.isdir(preproc_dir): - self.preprocess_data(root=root_dir, - image_dir=kwargs['image_dir'], - labels_dir=kwargs['labels_dir'], - output_dir=preproc_dir, - classes=kwargs['num_classes']) - self.data = self.create_splits(output_dir=split_dir, image_dir=preproc_dir) - else: - with open(os.path.join(split_dir, "splits.pkl"), 'rb') as f: - self.data = pickle.load(f) - - def subfiles(self, folder, join=True, prefix=None, suffix=None, sort=True): - if join: - l = os.path.join - else: - l = lambda x, y: y - res = [l(folder, i) for i in os.listdir(folder) if os.path.isfile(os.path.join(folder, i)) - and (prefix is None or i.startswith(prefix)) - and (suffix is None or i.endswith(suffix))] - if sort: - res.sort() - return res - - def reshape(self, orig_img, append_value=-1024, new_shape=(512, 512, 512)): - reshaped_image = np.zeros(new_shape) - reshaped_image[...] = append_value - x_offset = 0 - y_offset = 0 # (new_shape[1] - orig_img.shape[1]) // 2 - z_offset = 0 # (new_shape[2] - orig_img.shape[2]) // 2 - - reshaped_image[x_offset:orig_img.shape[0] + x_offset, y_offset:orig_img.shape[1] + y_offset, - z_offset:orig_img.shape[2] + z_offset] = orig_img - - return reshaped_image - - def preprocess_data(self, root, image_dir, labels_dir, output_dir, classes): - image_dir = os.path.join(root, image_dir) - label_dir = os.path.join(root, labels_dir) - output_dir = os.path.join(root, output_dir) - classes = classes - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - print('Created' + output_dir + '...') - - class_stats = defaultdict(int) - total = 0 - - nii_files = self.subfiles(image_dir, suffix=".nii.gz", join=False) - - for i in range(0, len(nii_files)): - if nii_files[i].startswith("._"): - nii_files[i] = nii_files[i][2:] - - for f in nii_files: - image, _ = load(os.path.join(image_dir, f)) - label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) - print(f) - - for i in range(classes): - class_stats[i] += np.sum(label == i) - total += np.sum(label == i) - - image = (image - image.min()) / (image.max() - image.min()) - - image = self.reshape(image, append_value=0, new_shape=(64, 64, 64)) - label = self.reshape(label, append_value=0, new_shape=(64, 64, 64)) - - result = np.stack((image, label)) - - np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result) - print(f) - - print(total) - for i in range(classes): - print(class_stats[i], class_stats[i] / total) - - def subfiles(self, folder, join=True, prefix=None, suffix=None, sort=True): - if join: - l = os.path.join - else: - l = lambda x, y: y - res = [l(folder, i) for i in os.listdir(folder) if os.path.isfile(os.path.join(folder, i)) - and (prefix is None or i.startswith(prefix)) - and (suffix is None or i.endswith(suffix))] - if sort: - res.sort() - return res - - def create_splits(self, output_dir, image_dir): - npy_files = self.subfiles(image_dir, suffix=".npy", join=False) - - trainset_size = len(npy_files)*50//100 - valset_size = len(npy_files)*25//100 - testset_size = len(npy_files)*25//100 - - splits = [] - for split in range(0, 5): - image_list = npy_files.copy() - trainset = [] - valset = [] - testset = [] - for i in range(0, trainset_size): - patient = np.random.choice(image_list) - image_list.remove(patient) - trainset.append(patient[:-4]) - for i in range(0, valset_size): - patient = np.random.choice(image_list) - image_list.remove(patient) - valset.append(patient[:-4]) - for i in range(0, testset_size): - patient = np.random.choice(image_list) - image_list.remove(patient) - testset.append(patient[:-4]) - split_dict = dict() - split_dict['train'] = trainset - split_dict['val'] = valset - split_dict['test'] = testset - - splits.append(split_dict) - - with open(os.path.join(output_dir, 'splits.pkl'), 'wb') as f: - pickle.dump(splits, f) - return splits - - - - - diff --git a/hyppopy/workflows/gradientboost_usecase/__init__.py b/hyppopy/workflows/gradientboost_usecase/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/gradientboost_usecase/gradientboost_usecase.py b/hyppopy/workflows/gradientboost_usecase/gradientboost_usecase.py deleted file mode 100644 index 65d8a7c..0000000 --- a/hyppopy/workflows/gradientboost_usecase/gradientboost_usecase.py +++ /dev/null @@ -1,36 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - - -from sklearn.ensemble import GradientBoostingClassifier -from sklearn.model_selection import cross_val_score - -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.workflowbase import WorkflowBase -from hyppopy.workflows.dataloader.simpleloader import SimpleDataLoader - - -class gradientboost_usecase(WorkflowBase): - - def setup(self, **kwargs): - dl = SimpleDataLoader() - dl.start(path=ProjectManager.data_path, - data_name=ProjectManager.data_name, - labels_name=ProjectManager.labels_name) - self.solver.set_data(dl.data) - - def blackbox_function(self, data, params): - clf = GradientBoostingClassifier(**params) - return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() diff --git a/hyppopy/workflows/imageregistration_usecase/__init__.py b/hyppopy/workflows/imageregistration_usecase/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/imageregistration_usecase/imageregistration_usecase.py b/hyppopy/workflows/imageregistration_usecase/imageregistration_usecase.py deleted file mode 100644 index 29d811a..0000000 --- a/hyppopy/workflows/imageregistration_usecase/imageregistration_usecase.py +++ /dev/null @@ -1,52 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: - -#------------------------------------------------------ -# this needs to be imported, dont remove these -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.workflowbase import WorkflowBase -#------------------------------------------------------ - -# import your external packages -from sklearn.ensemble import RandomForestClassifier -from sklearn.model_selection import cross_val_score - -# import your custom DataLoader -from hyppopy.workflows.dataloader.simpleloader import SimpleDataLoader # This is a dataloader class create your own - - -class imageregistration_usecase(WorkflowBase): - - def setup(self, **kwargs): - # here you create your own DataLoader instance - dl = SimpleDataLoader() - # call the start function of your DataLoader - dl.start(path=ProjectManager.data_path, - data_name=ProjectManager.data_name, - labels_name=ProjectManager.labels_name) - # pass the data to the solver - self.solver.set_data(dl.data) - - def blackbox_function(self, data, params): - # converting number back to integers is an ugly hack that will be removed in the future - if "n_estimators" in params.keys(): - params["n_estimators"] = int(round(params["n_estimators"])) - - # Do your training - clf = RandomForestClassifier(**params) - # compute your loss - loss = -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() - # return loss - return loss diff --git a/hyppopy/workflows/knc_usecase/__init__.py b/hyppopy/workflows/knc_usecase/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/knc_usecase/knc_usecase.py b/hyppopy/workflows/knc_usecase/knc_usecase.py deleted file mode 100644 index d59db98..0000000 --- a/hyppopy/workflows/knc_usecase/knc_usecase.py +++ /dev/null @@ -1,35 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -from sklearn.neighbors import KNeighborsClassifier -from sklearn.model_selection import cross_val_score - -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.workflowbase import WorkflowBase -from hyppopy.workflows.dataloader.simpleloader import SimpleDataLoader - - -class knc_usecase(WorkflowBase): - - def setup(self, **kwargs): - dl = SimpleDataLoader() - dl.start(path=ProjectManager.data_path, - data_name=ProjectManager.data_name, - labels_name=ProjectManager.labels_name) - self.solver.set_data(dl.data) - - def blackbox_function(self, data, params): - clf = KNeighborsClassifier(**params) - return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() diff --git a/hyppopy/workflows/randomforest_usecase/__init__.py b/hyppopy/workflows/randomforest_usecase/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py b/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py deleted file mode 100644 index 392b9b7..0000000 --- a/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py +++ /dev/null @@ -1,36 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - - -from sklearn.ensemble import RandomForestClassifier -from sklearn.model_selection import cross_val_score - -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.workflowbase import WorkflowBase -from hyppopy.workflows.dataloader.simpleloader import SimpleDataLoader - - -class randomforest_usecase(WorkflowBase): - - def setup(self, **kwargs): - dl = SimpleDataLoader() - dl.start(path=ProjectManager.data_path, - data_name=ProjectManager.data_name, - labels_name=ProjectManager.labels_name) - self.solver.set_data(dl.data) - - def blackbox_function(self, data, params): - clf = RandomForestClassifier(**params) - return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() diff --git a/hyppopy/workflows/svc_usecase/__init__.py b/hyppopy/workflows/svc_usecase/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/svc_usecase/svc_usecase.py b/hyppopy/workflows/svc_usecase/svc_usecase.py deleted file mode 100644 index f1ba78a..0000000 --- a/hyppopy/workflows/svc_usecase/svc_usecase.py +++ /dev/null @@ -1,60 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -from sklearn.svm import SVC -from sklearn.model_selection import cross_val_score - -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.workflowbase import WorkflowBase -from hyppopy.workflows.dataloader.simpleloader import SimpleDataLoader - - -class svc_usecase(WorkflowBase): - - def setup(self, **kwargs): - dl = SimpleDataLoader() - dl.start(path=ProjectManager.data_path, - data_name=ProjectManager.data_name, - labels_name=ProjectManager.labels_name) - self.solver.set_data(dl.data) - - def blackbox_function(self, data, params): - if 'C' not in params.keys(): - print("Warning: missing parameter C, use default value 1.0!") - params['C'] = 1.0 - if 'kernel' not in params.keys(): - print("Warning: missing parameter kernel, use default value linear!") - params['kernel'] = 'linear' - - if params['kernel'] == 'linear': - clf = SVC(kernel='linear', C=params['C']) - elif params['kernel'] == 'poly': - if 'degree' not in params.keys(): - print("Warning: missing parameter degree, use default value 3!") - params['degree'] = 3 - if 'coef0' not in params.keys(): - print("Warning: missing parameter coef0, use default value 0.0!") - params['coef0'] = 0.0 - clf = SVC(kernel='poly', C=params['C'], degree=params['degree'], coef0=params['coef0']) - elif params['kernel'] == 'rbf': - if 'gamma' not in params.keys(): - print("Warning: missing parameter gamma, use default value 'auto'!") - params['gamma'] = 'scale' - clf = SVC(kernel='rbf', C=params['C'], gamma=params['gamma']) - else: - raise IOError("Unknown kernel function: %s".format(params['kernel'])) - - return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() - diff --git a/hyppopy/workflows/unet_usecase/__init__.py b/hyppopy/workflows/unet_usecase/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/unet_usecase/datasets/__init__.py b/hyppopy/workflows/unet_usecase/datasets/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/unet_usecase/datasets/data_loader.py b/hyppopy/workflows/unet_usecase/datasets/data_loader.py deleted file mode 100644 index 3666ad8..0000000 --- a/hyppopy/workflows/unet_usecase/datasets/data_loader.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from torch.utils.data import DataLoader, Dataset -from trixi.util.pytorchutils import set_seed - - -class WrappedDataset(Dataset): - def __init__(self, dataset, transform): - self.transform = transform - self.dataset = dataset - - self.is_indexable = False - if hasattr(self.dataset, "__getitem__") and not (hasattr(self.dataset, "use_next") and self.dataset.use_next is True): - self.is_indexable = True - - def __getitem__(self, index): - - if not self.is_indexable: - item = next(self.dataset) - else: - item = self.dataset[index] - item = self.transform(**item) - return item - - def __len__(self): - return int(self.dataset.num_batches) - - -class MultiThreadedDataLoader(object): - def __init__(self, data_loader, transform, num_processes, **kwargs): - - self.cntr = 1 - self.ds_wrapper = WrappedDataset(data_loader, transform) - - self.generator = DataLoader(self.ds_wrapper, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, - num_workers=num_processes, pin_memory=True, drop_last=False, - worker_init_fn=self.get_worker_init_fn()) - - self.num_processes = num_processes - self.iter = None - - def get_worker_init_fn(self): - def init_fn(worker_id): - set_seed(worker_id + self.cntr) - - return init_fn - - def __iter__(self): - self.kill_iterator() - self.iter = iter(self.generator) - return self.iter - - def __next__(self): - if self.iter is None: - self.iter = iter(self.generator) - return next(self.iter) - - def renew(self): - self.cntr += 1 - self.kill_iterator() - self.generator.worker_init_fn = self.get_worker_init_fn() - self.iter = iter(self.generator) - - def restart(self): - pass - # self.iter = iter(self.generator) - - def kill_iterator(self): - try: - if self.iter is not None: - self.iter._shutdown_workers() - for p in self.iter.workers: - p.terminate() - except: - print("Could not kill Dataloader Iterator") diff --git a/hyppopy/workflows/unet_usecase/datasets/example_dataset/__init__.py b/hyppopy/workflows/unet_usecase/datasets/example_dataset/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/unet_usecase/datasets/example_dataset/create_splits.py b/hyppopy/workflows/unet_usecase/datasets/example_dataset/create_splits.py deleted file mode 100644 index 41ee520..0000000 --- a/hyppopy/workflows/unet_usecase/datasets/example_dataset/create_splits.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pickle -from utilities.file_and_folder_operations import subfiles - -import os -import numpy as np - - -def create_splits(output_dir, image_dir): - npy_files = subfiles(image_dir, suffix=".npy", join=False) - - trainset_size = len(npy_files)*50//100 - valset_size = len(npy_files)*25//100 - testset_size = len(npy_files)*25//100 - - splits = [] - for split in range(0, 5): - image_list = npy_files.copy() - trainset = [] - valset = [] - testset = [] - for i in range(0, trainset_size): - patient = np.random.choice(image_list) - image_list.remove(patient) - trainset.append(patient[:-4]) - for i in range(0, valset_size): - patient = np.random.choice(image_list) - image_list.remove(patient) - valset.append(patient[:-4]) - for i in range(0, testset_size): - patient = np.random.choice(image_list) - image_list.remove(patient) - testset.append(patient[:-4]) - split_dict = dict() - split_dict['train'] = trainset - split_dict['val'] = valset - split_dict['test'] = testset - - splits.append(split_dict) - - with open(os.path.join(output_dir, 'splits.pkl'), 'wb') as f: - pickle.dump(splits, f) diff --git a/hyppopy/workflows/unet_usecase/datasets/example_dataset/download_dataset.py b/hyppopy/workflows/unet_usecase/datasets/example_dataset/download_dataset.py deleted file mode 100644 index 77a6a5b..0000000 --- a/hyppopy/workflows/unet_usecase/datasets/example_dataset/download_dataset.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from os.path import exists -import tarfile - -from google_drive_downloader import GoogleDriveDownloader as gdd - -def download_dataset(dest_path, dataset, id='1RzPB1_bqzQhlWvU-YGvZzhx2omcDh38C'): - tar_path = os.path.join(dest_path, dataset) + '.tar' - gdd.download_file_from_google_drive(file_id=id, - dest_path=tar_path, overwrite=False, - unzip=False) - - if not exists(os.path.join(dest_path, dataset)): - print('Extracting data [STARTED]') - tar = tarfile.open(tar_path) - tar.extractall(dest_path) - print('Extracting data [DONE]') - else: - print('Data already downloaded. Files are not extracted again.') - - return diff --git a/hyppopy/workflows/unet_usecase/datasets/example_dataset/preprocessing.py b/hyppopy/workflows/unet_usecase/datasets/example_dataset/preprocessing.py deleted file mode 100644 index ebe132c..0000000 --- a/hyppopy/workflows/unet_usecase/datasets/example_dataset/preprocessing.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from collections import defaultdict - -from medpy.io import load -import os -import numpy as np - -from datasets.utils import reshape -from utilities.file_and_folder_operations import subfiles - - -def preprocess_data(root_dir): - image_dir = os.path.join(root_dir, 'imagesTr') - label_dir = os.path.join(root_dir, 'labelsTr') - output_dir = os.path.join(root_dir, 'preprocessed') - classes = 3 - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - print('Created' + output_dir + '...') - - class_stats = defaultdict(int) - total = 0 - - nii_files = subfiles(image_dir, suffix=".nii.gz", join=False) - - for i in range(0, len(nii_files)): - if nii_files[i].startswith("._"): - nii_files[i] = nii_files[i][2:] - - for f in nii_files: - image, _ = load(os.path.join(image_dir, f)) - label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) - - print(f) - - for i in range(classes): - class_stats[i] += np.sum(label == i) - total += np.sum(label == i) - - image = (image - image.min())/(image.max()-image.min()) - - image = reshape(image, append_value=0, new_shape=(64, 64, 64)) - label = reshape(label, append_value=0, new_shape=(64, 64, 64)) - - result = np.stack((image, label)) - - np.save(os.path.join(output_dir, f.split('.')[0]+'.npy'), result) - print(f) - - print(total) - for i in range(classes): - print(class_stats[i], class_stats[i]/total) diff --git a/hyppopy/workflows/unet_usecase/datasets/three_dim/NumpyDataLoader.py b/hyppopy/workflows/unet_usecase/datasets/three_dim/NumpyDataLoader.py deleted file mode 100644 index 6b42934..0000000 --- a/hyppopy/workflows/unet_usecase/datasets/three_dim/NumpyDataLoader.py +++ /dev/null @@ -1,173 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import fnmatch -import random - -import numpy as np - -from batchgenerators.dataloading import SlimDataLoaderBase -from datasets.data_loader import MultiThreadedDataLoader -from .data_augmentation import get_transforms - - -def load_dataset(base_dir, pattern='*.npy', keys=None): - fls = [] - files_len = [] - dataset = [] - - for root, dirs, files in os.walk(base_dir): - i = 0 - for filename in sorted(fnmatch.filter(files, pattern)): - - if keys is not None and filename[:-4] in keys: - npy_file = os.path.join(root, filename) - numpy_array = np.load(npy_file, mmap_mode="r") - - fls.append(npy_file) - files_len.append(numpy_array.shape[1]) - - dataset.extend([i]) - - i += 1 - - return fls, files_len, dataset - - -class NumpyDataSet(object): - """ - TODO - """ - def __init__(self, base_dir, mode="train", batch_size=16, num_batches=10000000, seed=None, num_processes=8, num_cached_per_queue=8 * 4, target_size=128, - file_pattern='*.npy', label=1, input=(0,), do_reshuffle=True, keys=None): - - data_loader = NumpyDataLoader(base_dir=base_dir, mode=mode, batch_size=batch_size, num_batches=num_batches, seed=seed, file_pattern=file_pattern, - input=input, label=label, keys=keys) - - self.data_loader = data_loader - self.batch_size = batch_size - self.do_reshuffle = do_reshuffle - self.number_of_slices = 1 - - self.transforms = get_transforms(mode=mode, target_size=target_size) - self.augmenter = MultiThreadedDataLoader(data_loader, self.transforms, num_processes=num_processes, - num_cached_per_queue=num_cached_per_queue, seeds=seed, - shuffle=do_reshuffle) - self.augmenter.restart() - - def __len__(self): - return len(self.data_loader) - - def __iter__(self): - if self.do_reshuffle: - self.data_loader.reshuffle() - self.augmenter.renew() - return self.augmenter - - def __next__(self): - return next(self.augmenter) - - -class NumpyDataLoader(SlimDataLoaderBase): - def __init__(self, base_dir, mode="train", batch_size=16, num_batches=10000000, - seed=None, file_pattern='*.npy', label=1, input=(0,), keys=None): - - self.files, self.file_len, self.dataset = load_dataset(base_dir=base_dir, pattern=file_pattern, keys=keys, ) - super(NumpyDataLoader, self).__init__(self.dataset, batch_size, num_batches) - - self.batch_size = batch_size - - self.use_next = False - if mode == "train": - self.use_next = False - - self.idxs = list(range(0, len(self.dataset))) - - self.data_len = len(self.dataset) - - self.num_batches = min((self.data_len // self.batch_size)+10, num_batches) - - if isinstance(label, int): - label = (label,) - self.input = input - self.label = label - - self.np_data = np.asarray(self.dataset) - - def reshuffle(self): - print("Reshuffle...") - random.shuffle(self.idxs) - print("Initializing... this might take a while...") - - def generate_train_batch(self): - open_arr = random.sample(self._data, self.batch_size) - return self.get_data_from_array(open_arr) - - def __len__(self): - n_items = min(self.data_len // self.batch_size, self.num_batches) - return n_items - - def __getitem__(self, item): - idxs = self.idxs - data_len = len(self.dataset) - np_data = self.np_data - - if item > len(self): - raise StopIteration() - if (item * self.batch_size) == data_len: - raise StopIteration() - - start_idx = (item * self.batch_size) % data_len - stop_idx = ((item + 1) * self.batch_size) % data_len - - if ((item + 1) * self.batch_size) == data_len: - stop_idx = data_len - - if stop_idx > start_idx: - idxs = idxs[start_idx:stop_idx] - else: - raise StopIteration() - - open_arr = np_data[idxs] - - return self.get_data_from_array(open_arr) - - def get_data_from_array(self, open_array): - data = [] - fnames = [] - idxs = [] - labels = [] - - for idx in open_array: - fn_name = self.files[idx] - - numpy_array = np.load(fn_name, mmap_mode="r") - - data.append(numpy_array[None, self.input[0]]) # 'None' keeps the dimension - - if self.label is not None: - labels.append(numpy_array[None, self.label[0]]) # 'None' keeps the dimension - - fnames.append(self.files[idx]) - idxs.append(idx) - - ret_dict = {'data': data, 'fnames': fnames, 'idxs': idxs} - if self.label is not None: - ret_dict['seg'] = labels - - return ret_dict diff --git a/hyppopy/workflows/unet_usecase/datasets/three_dim/__init__.py b/hyppopy/workflows/unet_usecase/datasets/three_dim/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/unet_usecase/datasets/three_dim/data_augmentation.py b/hyppopy/workflows/unet_usecase/datasets/three_dim/data_augmentation.py deleted file mode 100644 index c91a6a9..0000000 --- a/hyppopy/workflows/unet_usecase/datasets/three_dim/data_augmentation.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from batchgenerators.transforms import Compose, MirrorTransform -from batchgenerators.transforms.crop_and_pad_transforms import CenterCropTransform, RandomCropTransform -from batchgenerators.transforms.spatial_transforms import ResizeTransform, SpatialTransform -from batchgenerators.transforms.utility_transforms import NumpyToTensor - - -def get_transforms(mode="train", target_size=128): - tranform_list = [] - - if mode == "train": - tranform_list = [CenterCropTransform(crop_size=target_size), - ResizeTransform(target_size=target_size, order=1), - MirrorTransform(axes=(2,)), - SpatialTransform(patch_size=(target_size,target_size,target_size), random_crop=False, - patch_center_dist_from_border=target_size // 2, - do_elastic_deform=True, alpha=(0., 1000.), sigma=(40., 60.), - do_rotation=True, - angle_x=(-0.1, 0.1), angle_y=(0, 1e-8), angle_z=(0, 1e-8), - scale=(0.9, 1.4), - border_mode_data="nearest", border_mode_seg="nearest"), - ] - - - elif mode == "val": - tranform_list = [CenterCropTransform(crop_size=target_size), - ResizeTransform(target_size=target_size, order=1), - ] - - elif mode == "test": - tranform_list = [CenterCropTransform(crop_size=target_size), - ResizeTransform(target_size=target_size, order=1), - ] - - tranform_list.append(NumpyToTensor()) - - return Compose(tranform_list) diff --git a/hyppopy/workflows/unet_usecase/datasets/two_dim/NumpyDataLoader.py b/hyppopy/workflows/unet_usecase/datasets/two_dim/NumpyDataLoader.py deleted file mode 100644 index 1ae0bb4..0000000 --- a/hyppopy/workflows/unet_usecase/datasets/two_dim/NumpyDataLoader.py +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import fnmatch -import random - -import numpy as np - -from batchgenerators.dataloading import SlimDataLoaderBase -from ..data_loader import MultiThreadedDataLoader -from .data_augmentation import get_transforms - - -def load_dataset(base_dir, pattern='*.npy', slice_offset=5, keys=None): - fls = [] - files_len = [] - slices_ax = [] - - for root, dirs, files in os.walk(base_dir): - i = 0 - for filename in sorted(fnmatch.filter(files, pattern)): - - if keys is not None and filename[:-4] in keys: - npy_file = os.path.join(root, filename) - numpy_array = np.load(npy_file, mmap_mode="r") - - fls.append(npy_file) - files_len.append(numpy_array.shape[1]) - - slices_ax.extend([(i, j) for j in range(slice_offset, files_len[-1] - slice_offset)]) - - i += 1 - - return fls, files_len, slices_ax, - - -class NumpyDataSet(object): - """ - TODO - """ - def __init__(self, base_dir, mode="train", batch_size=16, num_batches=10000000, seed=None, num_processes=8, num_cached_per_queue=8 * 4, target_size=128, - file_pattern='*.npy', label_slice=1, input_slice=(0,), do_reshuffle=True, keys=None): - - data_loader = NumpyDataLoader(base_dir=base_dir, mode=mode, batch_size=batch_size, num_batches=num_batches, seed=seed, file_pattern=file_pattern, - input_slice=input_slice, label_slice=label_slice, keys=keys) - - self.data_loader = data_loader - self.batch_size = batch_size - self.do_reshuffle = do_reshuffle - self.number_of_slices = 1 - - self.transforms = get_transforms(mode=mode, target_size=target_size) - self.augmenter = MultiThreadedDataLoader(data_loader, self.transforms, num_processes=1, - num_cached_per_queue=num_cached_per_queue, seeds=seed, - shuffle=do_reshuffle) - self.augmenter.restart() - - def __len__(self): - return len(self.data_loader) - - def __iter__(self): - if self.do_reshuffle: - self.data_loader.reshuffle() - self.augmenter.renew() - return self.augmenter - - def __next__(self): - return next(self.augmenter) - - -class NumpyDataLoader(SlimDataLoaderBase): - def __init__(self, base_dir, mode="train", batch_size=16, num_batches=10000000, - seed=None, file_pattern='*.npy', label_slice=1, input_slice=(0,), keys=None): - - self.files, self.file_len, self.slices = load_dataset(base_dir=base_dir, pattern=file_pattern, slice_offset=0, keys=keys, ) - super(NumpyDataLoader, self).__init__(self.slices, batch_size, num_batches) - - self.batch_size = batch_size - - self.use_next = False - if mode == "train": - self.use_next = False - - self.slice_idxs = list(range(0, len(self.slices))) - - self.data_len = len(self.slices) - - self.num_batches = min((self.data_len // self.batch_size)+10, num_batches) - - if isinstance(label_slice, int): - label_slice = (label_slice,) - self.input_slice = input_slice - self.label_slice = label_slice - - self.np_data = np.asarray(self.slices) - - def reshuffle(self): - print("Reshuffle...") - random.shuffle(self.slice_idxs) - print("Initializing... this might take a while...") - - def generate_train_batch(self): - open_arr = random.sample(self._data, self.batch_size) - return self.get_data_from_array(open_arr) - - def __len__(self): - n_items = min(self.data_len // self.batch_size, self.num_batches) - return n_items - - def __getitem__(self, item): - slice_idxs = self.slice_idxs - data_len = len(self.slices) - np_data = self.np_data - - if item > len(self): - raise StopIteration() - if (item * self.batch_size) == data_len: - raise StopIteration() - - start_idx = (item * self.batch_size) % data_len - stop_idx = ((item + 1) * self.batch_size) % data_len - - if ((item + 1) * self.batch_size) == data_len: - stop_idx = data_len - - if stop_idx > start_idx: - idxs = slice_idxs[start_idx:stop_idx] - else: - raise StopIteration() - - open_arr = np_data[idxs] - - return self.get_data_from_array(open_arr) - - def get_data_from_array(self, open_array): - data = [] - fnames = [] - slice_idxs = [] - labels = [] - - for slice in open_array: - fn_name = self.files[slice[0]] - - numpy_array = np.load(fn_name, mmap_mode="r") - - numpy_slice = numpy_array[ :, slice[1], ] - data.append(numpy_slice[None, self.input_slice[0]]) # 'None' keeps the dimension - - if self.label_slice is not None: - labels.append(numpy_slice[None, self.label_slice[0]]) # 'None' keeps the dimension - - fnames.append(self.files[slice[0]]) - slice_idxs.append(slice[1]) - - ret_dict = {'data': np.asarray(data), 'fnames': fnames, 'slice_idxs': slice_idxs} - if self.label_slice is not None: - ret_dict['seg'] = np.asarray(labels) - - return ret_dict diff --git a/hyppopy/workflows/unet_usecase/datasets/two_dim/__init__.py b/hyppopy/workflows/unet_usecase/datasets/two_dim/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/unet_usecase/datasets/two_dim/data_augmentation.py b/hyppopy/workflows/unet_usecase/datasets/two_dim/data_augmentation.py deleted file mode 100644 index ba7b7dd..0000000 --- a/hyppopy/workflows/unet_usecase/datasets/two_dim/data_augmentation.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from batchgenerators.transforms import Compose, MirrorTransform -from batchgenerators.transforms.crop_and_pad_transforms import CenterCropTransform, RandomCropTransform -from batchgenerators.transforms.spatial_transforms import ResizeTransform, SpatialTransform -from batchgenerators.transforms.utility_transforms import NumpyToTensor - - -def get_transforms(mode="train", target_size=128): - tranform_list = [] - - if mode == "train": - tranform_list = [# CenterCropTransform(crop_size=target_size), - ResizeTransform(target_size=(target_size,target_size), order=1), - MirrorTransform(axes=(1,)), - SpatialTransform(patch_size=(target_size, target_size), random_crop=False, - patch_center_dist_from_border=target_size // 2, - do_elastic_deform=True, alpha=(0., 1000.), sigma=(40., 60.), - do_rotation=True, p_rot_per_sample=0.5, - angle_x=(-0.1, 0.1), angle_y=(0, 1e-8), angle_z=(0, 1e-8), - scale=(0.5, 1.9), p_scale_per_sample=0.5, - border_mode_data="nearest", border_mode_seg="nearest"), - ] - - - elif mode == "val": - tranform_list = [CenterCropTransform(crop_size=target_size), - ResizeTransform(target_size=target_size, order=1), - ] - - elif mode == "test": - tranform_list = [CenterCropTransform(crop_size=target_size), - ResizeTransform(target_size=target_size, order=1), - ] - - tranform_list.append(NumpyToTensor()) - - return Compose(tranform_list) diff --git a/hyppopy/workflows/unet_usecase/datasets/utils.py b/hyppopy/workflows/unet_usecase/datasets/utils.py deleted file mode 100644 index 755b088..0000000 --- a/hyppopy/workflows/unet_usecase/datasets/utils.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - - -def reshape(orig_img, append_value=-1024, new_shape=(512, 512, 512)): - reshaped_image = np.zeros(new_shape) - reshaped_image[...] = append_value - x_offset = 0 - y_offset = 0 # (new_shape[1] - orig_img.shape[1]) // 2 - z_offset = 0 # (new_shape[2] - orig_img.shape[2]) // 2 - - reshaped_image[x_offset:orig_img.shape[0]+x_offset, y_offset:orig_img.shape[1]+y_offset, z_offset:orig_img.shape[2]+z_offset] = orig_img - # insert temp_img.min() as background value - - return reshaped_image diff --git a/hyppopy/workflows/unet_usecase/loss_functions/__init__.py b/hyppopy/workflows/unet_usecase/loss_functions/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hyppopy/workflows/unet_usecase/loss_functions/dice_loss.py b/hyppopy/workflows/unet_usecase/loss_functions/dice_loss.py deleted file mode 100644 index 48c7acc..0000000 --- a/hyppopy/workflows/unet_usecase/loss_functions/dice_loss.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import numpy as np -from torch import nn - - -def sum_tensor(input, axes, keepdim=False): - axes = np.unique(axes).astype(int) - if keepdim: - for ax in axes: - input = input.sum(int(ax), keepdim=True) - else: - for ax in sorted(axes, reverse=True): - input = input.sum(int(ax)) - return input - - -def mean_tensor(input, axes, keepdim=False): - axes = np.unique(axes).astype(int) - if keepdim: - for ax in axes: - input = input.mean(int(ax), keepdim=True) - else: - for ax in sorted(axes, reverse=True): - input = input.mean(int(ax)) - return input - - -class SoftDiceLoss(nn.Module): - def __init__(self, smooth=1., apply_nonlin=None, batch_dice=False, do_bg=True, smooth_in_nom=True, background_weight=1, rebalance_weights=None): - """ - hahaa no documentation for you today - :param smooth: - :param apply_nonlin: - :param batch_dice: - :param do_bg: - :param smooth_in_nom: - :param background_weight: - :param rebalance_weights: - """ - super(SoftDiceLoss, self).__init__() - if not do_bg: - assert background_weight == 1, "if there is no bg, then set background weight to 1 you dummy" - self.rebalance_weights = rebalance_weights - self.background_weight = background_weight - self.smooth_in_nom = smooth_in_nom - self.do_bg = do_bg - self.batch_dice = batch_dice - self.apply_nonlin = apply_nonlin - self.smooth = smooth - self.y_onehot = None - if not smooth_in_nom: - self.nom_smooth = 0 - else: - self.nom_smooth = smooth - - def forward(self, x, y): - with torch.no_grad(): - y = y.long() - shp_x = x.shape - shp_y = y.shape - if self.apply_nonlin is not None: - x = self.apply_nonlin(x) - if len(shp_x) != len(shp_y): - y = y.view((shp_y[0], 1, *shp_y[1:])) - # now x and y should have shape (B, C, X, Y(, Z))) and (B, 1, X, Y(, Z))), respectively - y_onehot = torch.zeros(shp_x) - if x.device.type == "cuda": - y_onehot = y_onehot.cuda(x.device.index) - y_onehot.scatter_(1, y, 1) - if not self.do_bg: - x = x[:, 1:] - y_onehot = y_onehot[:, 1:] - if not self.batch_dice: - if self.background_weight != 1 or (self.rebalance_weights is not None): - raise NotImplementedError("nah son") - l = soft_dice(x, y_onehot, self.smooth, self.smooth_in_nom) - else: - l = soft_dice_per_batch_2(x, y_onehot, self.smooth, self.smooth_in_nom, - background_weight=self.background_weight, - rebalance_weights=self.rebalance_weights) - return l - - -def soft_dice_per_batch(net_output, gt, smooth=1., smooth_in_nom=1., background_weight=1): - axes = tuple([0] + list(range(2, len(net_output.size())))) - intersect = sum_tensor(net_output * gt, axes, keepdim=False) - denom = sum_tensor(net_output + gt, axes, keepdim=False) - weights = torch.ones(intersect.shape) - weights[0] = background_weight - if net_output.device.type == "cuda": - weights = weights.cuda(net_output.device.index) - result = (- ((2 * intersect + smooth_in_nom) / (denom + smooth)) * weights).mean() - return result - - -def soft_dice_per_batch_2(net_output, gt, smooth=1., smooth_in_nom=1., background_weight=1, rebalance_weights=None): - if rebalance_weights is not None and len(rebalance_weights) != gt.shape[1]: - rebalance_weights = rebalance_weights[1:] # this is the case when use_bg=False - axes = tuple([0] + list(range(2, len(net_output.size())))) - tp = sum_tensor(net_output * gt, axes, keepdim=False) - fn = sum_tensor((1 - net_output) * gt, axes, keepdim=False) - fp = sum_tensor(net_output * (1 - gt), axes, keepdim=False) - weights = torch.ones(tp.shape) - weights[0] = background_weight - if net_output.device.type == "cuda": - weights = weights.cuda(net_output.device.index) - if rebalance_weights is not None: - rebalance_weights = torch.from_numpy(rebalance_weights).float() - if net_output.device.type == "cuda": - rebalance_weights = rebalance_weights.cuda(net_output.device.index) - tp = tp * rebalance_weights - fn = fn * rebalance_weights - result = (- ((2 * tp + smooth_in_nom) / (2 * tp + fp + fn + smooth)) * weights).mean() - return result - - -def soft_dice(net_output, gt, smooth=1., smooth_in_nom=1.): - axes = tuple(range(2, len(net_output.size()))) - intersect = sum_tensor(net_output * gt, axes, keepdim=False) - denom = sum_tensor(net_output + gt, axes, keepdim=False) - result = (- ((2 * intersect + smooth_in_nom) / (denom + smooth)) * weights).mean() #TODO: Was ist weights and er Stelle? - return result - - -class MultipleOutputLoss(nn.Module): - def __init__(self, loss, weight_factors=None): - """ - use this if you have several outputs that should predict the same y - :param loss: - :param weight_factors: - """ - super(MultipleOutputLoss, self).__init__() - self.weight_factors = weight_factors - self.loss = loss - - def forward(self, x, y): - assert isinstance(x, (tuple, list)), "x must be either tuple or list" - if self.weight_factors is None: - weights = [1] * len(x) - else: - weights = self.weight_factors - l = weights[0] * self.loss(x[0], y) - for i in range(1, len(x)): - l += weights[i] * self.loss(x[i], y) - return l \ No newline at end of file diff --git a/hyppopy/workflows/unet_usecase/networks/RecursiveUNet.py b/hyppopy/workflows/unet_usecase/networks/RecursiveUNet.py deleted file mode 100644 index 8ca7017..0000000 --- a/hyppopy/workflows/unet_usecase/networks/RecursiveUNet.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Defines the Unet. -# |num_downs|: number of downsamplings in UNet. For example, -# if |num_downs| == 7, image of size 128x128 will become of size 1x1 at the bottleneck - -# recursive implementation of Unet -import torch - -from torch import nn - - -class UNet(nn.Module): - def __init__(self, num_classes=3, in_channels=1, initial_filter_size=64, kernel_size=3, num_downs=4, norm_layer=nn.InstanceNorm2d): - # norm_layer=nn.BatchNorm2d, use_dropout=False): - super(UNet, self).__init__() - - # construct unet structure - unet_block = UnetSkipConnectionBlock(in_channels=initial_filter_size * 2 ** (num_downs-1), out_channels=initial_filter_size * 2 ** num_downs, - num_classes=num_classes, kernel_size=kernel_size, norm_layer=norm_layer, innermost=True) - for i in range(1, num_downs): - unet_block = UnetSkipConnectionBlock(in_channels=initial_filter_size * 2 ** (num_downs-(i+1)), - out_channels=initial_filter_size * 2 ** (num_downs-i), - num_classes=num_classes, kernel_size=kernel_size, submodule=unet_block, norm_layer=norm_layer) - unet_block = UnetSkipConnectionBlock(in_channels=in_channels, out_channels=initial_filter_size, - num_classes=num_classes, kernel_size=kernel_size, submodule=unet_block, norm_layer=norm_layer, - outermost=True) - - self.model = unet_block - - def forward(self, x): - return self.model(x) - - -# Defines the submodule with skip connection. -# X -------------------identity---------------------- X -# |-- downsampling -- |submodule| -- upsampling --| -class UnetSkipConnectionBlock(nn.Module): - def __init__(self, in_channels=None, out_channels=None, num_classes=1, kernel_size=3, - submodule=None, outermost=False, innermost=False, norm_layer=nn.InstanceNorm2d, use_dropout=False): - super(UnetSkipConnectionBlock, self).__init__() - self.outermost = outermost - # downconv - pool = nn.MaxPool2d(2, stride=2) - conv1 = self.contract(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, norm_layer=norm_layer) - conv2 = self.contract(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, norm_layer=norm_layer) - - # upconv - conv3 = self.expand(in_channels=out_channels*2, out_channels=out_channels, kernel_size=kernel_size) - conv4 = self.expand(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size) - - if outermost: - final = nn.Conv2d(out_channels, num_classes, kernel_size=1) - down = [conv1, conv2] - up = [conv3, conv4, final] - model = down + [submodule] + up - elif innermost: - upconv = nn.ConvTranspose2d(in_channels*2, in_channels, - kernel_size=2, stride=2) - model = [pool, conv1, conv2, upconv] - else: - upconv = nn.ConvTranspose2d(in_channels*2, in_channels, kernel_size=2, stride=2) - - down = [pool, conv1, conv2] - up = [conv3, conv4, upconv] - - if use_dropout: - model = down + [submodule] + up + [nn.Dropout(0.5)] - else: - model = down + [submodule] + up - - self.model = nn.Sequential(*model) - - @staticmethod - def contract(in_channels, out_channels, kernel_size=3, norm_layer=nn.InstanceNorm2d): - layer = nn.Sequential( - nn.Conv2d(in_channels, out_channels, kernel_size, padding=1), - norm_layer(out_channels), - nn.LeakyReLU(inplace=True)) - return layer - - @staticmethod - def expand(in_channels, out_channels, kernel_size=3): - layer = nn.Sequential( - nn.Conv2d(in_channels, out_channels, kernel_size, padding=1), - nn.LeakyReLU(inplace=True), - ) - return layer - - @staticmethod - def center_crop(layer, target_width, target_height): - batch_size, n_channels, layer_width, layer_height = layer.size() - xy1 = (layer_width - target_width) // 2 - xy2 = (layer_height - target_height) // 2 - return layer[:, :, xy1:(xy1 + target_width), xy2:(xy2 + target_height)] - - def forward(self, x): - if self.outermost: - return self.model(x) - else: - crop = self.center_crop(self.model(x), x.size()[2], x.size()[3]) - return torch.cat([x, crop], 1) diff --git a/hyppopy/workflows/unet_usecase/networks/RecursiveUNet3D.py b/hyppopy/workflows/unet_usecase/networks/RecursiveUNet3D.py deleted file mode 100644 index 0801adf..0000000 --- a/hyppopy/workflows/unet_usecase/networks/RecursiveUNet3D.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Defines the Unet. -# |num_downs|: number of downsamplings in UNet. For example, -# if |num_downs| == 7, image of size 128x128 will become of size 1x1 at the bottleneck - -# recursive implementation of Unet -import torch - -from torch import nn - - -class UNet3D(nn.Module): - def __init__(self, num_classes=3, in_channels=1, initial_filter_size=64, kernel_size=3, num_downs=3, norm_layer=nn.InstanceNorm3d): - # norm_layer=nn.BatchNorm2d, use_dropout=False): - super(UNet3D, self).__init__() - - # construct unet structure - unet_block = UnetSkipConnectionBlock(in_channels=initial_filter_size * 2 ** (num_downs-1), out_channels=initial_filter_size * 2 ** num_downs, - num_classes=num_classes, kernel_size=kernel_size, norm_layer=norm_layer, innermost=True) - for i in range(1, num_downs): - unet_block = UnetSkipConnectionBlock(in_channels=initial_filter_size * 2 ** (num_downs-(i+1)), - out_channels=initial_filter_size * 2 ** (num_downs-i), - num_classes=num_classes, kernel_size=kernel_size, submodule=unet_block, norm_layer=norm_layer) - unet_block = UnetSkipConnectionBlock(in_channels=in_channels, out_channels=initial_filter_size, - num_classes=num_classes, kernel_size=kernel_size, submodule=unet_block, norm_layer=norm_layer, - outermost=True) - - self.model = unet_block - - def forward(self, x): - return self.model(x) - - -# Defines the submodule with skip connection. -# X -------------------identity---------------------- X -# |-- downsampling -- |submodule| -- upsampling --| -class UnetSkipConnectionBlock(nn.Module): - def __init__(self, in_channels=None, out_channels=None, num_classes=1, kernel_size=3, - submodule=None, outermost=False, innermost=False, norm_layer=nn.InstanceNorm3d, use_dropout=False): - super(UnetSkipConnectionBlock, self).__init__() - self.outermost = outermost - # downconv - pool = nn.MaxPool3d(2, stride=2) - conv1 = self.contract(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, norm_layer=norm_layer) - conv2 = self.contract(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, norm_layer=norm_layer) - - # upconv - conv3 = self.expand(in_channels=out_channels*2, out_channels=out_channels, kernel_size=kernel_size) - conv4 = self.expand(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size) - - if outermost: - final = nn.Conv3d(out_channels, num_classes, kernel_size=1) - down = [conv1, conv2] - up = [conv3, conv4, final] - model = down + [submodule] + up - elif innermost: - upconv = nn.ConvTranspose3d(in_channels*2, in_channels, - kernel_size=2, stride=2) - model = [pool, conv1, conv2, upconv] - else: - upconv = nn.ConvTranspose3d(in_channels*2, in_channels, kernel_size=2, stride=2) - - down = [pool, conv1, conv2] - up = [conv3, conv4, upconv] - - if use_dropout: - model = down + [submodule] + up + [nn.Dropout(0.5)] - else: - model = down + [submodule] + up - - self.model = nn.Sequential(*model) - - @staticmethod - def contract(in_channels, out_channels, kernel_size=3, norm_layer=nn.InstanceNorm3d): - layer = nn.Sequential( - nn.Conv3d(in_channels, out_channels, kernel_size, padding=1), - norm_layer(out_channels), - nn.LeakyReLU(inplace=True)) - return layer - - @staticmethod - def expand(in_channels, out_channels, kernel_size=3): - layer = nn.Sequential( - nn.Conv3d(in_channels, out_channels, kernel_size, padding=1), - nn.LeakyReLU(inplace=True), - ) - return layer - - @staticmethod - def center_crop(layer, target_depth, target_width, target_height): - batch_size, n_channels, layer_depth, layer_width, layer_height = layer.size() - xy0 = (layer_depth - target_depth) // 2 - xy1 = (layer_width - target_width) // 2 - xy2 = (layer_height - target_height) // 2 - return layer[:, :, xy0:(xy0 + target_depth), xy1:(xy1 + target_width), xy2:(xy2 + target_height)] - - def forward(self, x): - if self.outermost: - return self.model(x) - else: - crop = self.center_crop(self.model(x), x.size()[2], x.size()[3], x.size()[4]) - return torch.cat([x, crop], 1) diff --git a/hyppopy/workflows/unet_usecase/networks/UNET.py b/hyppopy/workflows/unet_usecase/networks/UNET.py deleted file mode 100644 index 11bd2ee..0000000 --- a/hyppopy/workflows/unet_usecase/networks/UNET.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright 2017 Division of Medical Image Computing, German Cancer Research Center (DKFZ) -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn - - -class UNet(nn.Module): - - def __init__(self, num_classes, in_channels=1, initial_filter_size=64, kernel_size=3, do_instancenorm=True): - super().__init__() - - self.contr_1_1 = self.contract(in_channels, initial_filter_size, kernel_size, instancenorm=do_instancenorm) - self.contr_1_2 = self.contract(initial_filter_size, initial_filter_size, kernel_size, instancenorm=do_instancenorm) - self.pool = nn.MaxPool2d(2, stride=2) - - self.contr_2_1 = self.contract(initial_filter_size, initial_filter_size*2, kernel_size, instancenorm=do_instancenorm) - self.contr_2_2 = self.contract(initial_filter_size*2, initial_filter_size*2, kernel_size, instancenorm=do_instancenorm) - # self.pool2 = nn.MaxPool2d(2, stride=2) - - self.contr_3_1 = self.contract(initial_filter_size*2, initial_filter_size*2**2, kernel_size, instancenorm=do_instancenorm) - self.contr_3_2 = self.contract(initial_filter_size*2**2, initial_filter_size*2**2, kernel_size, instancenorm=do_instancenorm) - # self.pool3 = nn.MaxPool2d(2, stride=2) - - self.contr_4_1 = self.contract(initial_filter_size*2**2, initial_filter_size*2**3, kernel_size, instancenorm=do_instancenorm) - self.contr_4_2 = self.contract(initial_filter_size*2**3, initial_filter_size*2**3, kernel_size, instancenorm=do_instancenorm) - # self.pool4 = nn.MaxPool2d(2, stride=2) - - self.center = nn.Sequential( - nn.Conv2d(initial_filter_size*2**3, initial_filter_size*2**4, 3, padding=1), - nn.ReLU(inplace=True), - nn.Conv2d(initial_filter_size*2**4, initial_filter_size*2**4, 3, padding=1), - nn.ReLU(inplace=True), - nn.ConvTranspose2d(initial_filter_size*2**4, initial_filter_size*2**3, 2, stride=2), - nn.ReLU(inplace=True), - ) - - self.expand_4_1 = self.expand(initial_filter_size*2**4, initial_filter_size*2**3) - self.expand_4_2 = self.expand(initial_filter_size*2**3, initial_filter_size*2**3) - self.upscale4 = nn.ConvTranspose2d(initial_filter_size*2**3, initial_filter_size*2**2, kernel_size=2, stride=2) - - self.expand_3_1 = self.expand(initial_filter_size*2**3, initial_filter_size*2**2) - self.expand_3_2 = self.expand(initial_filter_size*2**2, initial_filter_size*2**2) - self.upscale3 = nn.ConvTranspose2d(initial_filter_size*2**2, initial_filter_size*2, 2, stride=2) - - self.expand_2_1 = self.expand(initial_filter_size*2**2, initial_filter_size*2) - self.expand_2_2 = self.expand(initial_filter_size*2, initial_filter_size*2) - self.upscale2 = nn.ConvTranspose2d(initial_filter_size*2, initial_filter_size, 2, stride=2) - - self.expand_1_1 = self.expand(initial_filter_size*2, initial_filter_size) - self.expand_1_2 = self.expand(initial_filter_size, initial_filter_size) - # Output layer for segmentation - self.final = nn.Conv2d(initial_filter_size, num_classes, kernel_size=1) # kernel size for final layer = 1, see paper - - self.softmax = torch.nn.Softmax2d() - - # Output layer for "autoencoder-mode" - self.output_reconstruction_map = nn.Conv2d(initial_filter_size, out_channels=1, kernel_size=1) - - @staticmethod - def contract(in_channels, out_channels, kernel_size=3, instancenorm=True): - if instancenorm: - layer = nn.Sequential( - nn.Conv2d(in_channels, out_channels, kernel_size, padding=1), - nn.InstanceNorm2d(out_channels), - nn.LeakyReLU(inplace=True)) - else: - layer = nn.Sequential( - nn.Conv2d(in_channels, out_channels, kernel_size, padding=1), - nn.LeakyReLU(inplace=True)) - return layer - - @staticmethod - def expand(in_channels, out_channels, kernel_size=3): - layer = nn.Sequential( - nn.Conv2d(in_channels, out_channels, kernel_size, padding=1), - nn.LeakyReLU(inplace=True), - ) - return layer - - @staticmethod - def center_crop(layer, target_width, target_height): - batch_size, n_channels, layer_width, layer_height = layer.size() - xy1 = (layer_width - target_width) // 2 - xy2 = (layer_height - target_height) // 2 - return layer[:, :, xy1:(xy1 + target_width), xy2:(xy2 + target_height)] - - def forward(self, x, enable_concat=True, print_layer_shapes=False): - concat_weight = 1 - if not enable_concat: - concat_weight = 0 - - contr_1 = self.contr_1_2(self.contr_1_1(x)) - pool = self.pool(contr_1) - - contr_2 = self.contr_2_2(self.contr_2_1(pool)) - pool = self.pool(contr_2) - - contr_3 = self.contr_3_2(self.contr_3_1(pool)) - pool = self.pool(contr_3) - - contr_4 = self.contr_4_2(self.contr_4_1(pool)) - pool = self.pool(contr_4) - - center = self.center(pool) - - crop = self.center_crop(contr_4, center.size()[2], center.size()[3]) - concat = torch.cat([center, crop*concat_weight], 1) - - expand = self.expand_4_2(self.expand_4_1(concat)) - upscale = self.upscale4(expand) - - crop = self.center_crop(contr_3, upscale.size()[2], upscale.size()[3]) - concat = torch.cat([upscale, crop*concat_weight], 1) - - expand = self.expand_3_2(self.expand_3_1(concat)) - upscale = self.upscale3(expand) - - crop = self.center_crop(contr_2, upscale.size()[2], upscale.size()[3]) - concat = torch.cat([upscale, crop*concat_weight], 1) - - expand = self.expand_2_2(self.expand_2_1(concat)) - upscale = self.upscale2(expand) - - crop = self.center_crop(contr_1, upscale.size()[2], upscale.size()[3]) - concat = torch.cat([upscale, crop*concat_weight], 1) - - expand = self.expand_1_2(self.expand_1_1(concat)) - - if enable_concat: - output = self.final(expand) - if not enable_concat: - output = self.output_reconstruction_map(expand) - - return output diff --git a/hyppopy/workflows/unet_usecase/unet_uscase_utils.py b/hyppopy/workflows/unet_usecase/unet_uscase_utils.py deleted file mode 100644 index a5028f1..0000000 --- a/hyppopy/workflows/unet_usecase/unet_uscase_utils.py +++ /dev/null @@ -1,417 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import torch -import pickle -import fnmatch -import numpy as np -from torch import nn -from medpy.io import load -from collections import defaultdict -from abc import ABCMeta, abstractmethod - - -def sum_tensor(input, axes, keepdim=False): - axes = np.unique(axes).astype(int) - if keepdim: - for ax in axes: - input = input.sum(int(ax), keepdim=True) - else: - for ax in sorted(axes, reverse=True): - input = input.sum(int(ax)) - return input - - -def soft_dice_per_batch_2(net_output, gt, smooth=1., smooth_in_nom=1., background_weight=1, rebalance_weights=None): - if rebalance_weights is not None and len(rebalance_weights) != gt.shape[1]: - rebalance_weights = rebalance_weights[1:] # this is the case when use_bg=False - axes = tuple([0] + list(range(2, len(net_output.size())))) - tp = sum_tensor(net_output * gt, axes, keepdim=False) - fn = sum_tensor((1 - net_output) * gt, axes, keepdim=False) - fp = sum_tensor(net_output * (1 - gt), axes, keepdim=False) - weights = torch.ones(tp.shape) - weights[0] = background_weight - if net_output.device.type == "cuda": - weights = weights.cuda(net_output.device.index) - if rebalance_weights is not None: - rebalance_weights = torch.from_numpy(rebalance_weights).float() - if net_output.device.type == "cuda": - rebalance_weights = rebalance_weights.cuda(net_output.device.index) - tp = tp * rebalance_weights - fn = fn * rebalance_weights - result = (- ((2 * tp + smooth_in_nom) / (2 * tp + fp + fn + smooth)) * weights).mean() - return result - - -def soft_dice(net_output, gt, smooth=1., smooth_in_nom=1.): - axes = tuple(range(2, len(net_output.size()))) - intersect = sum_tensor(net_output * gt, axes, keepdim=False) - denom = sum_tensor(net_output + gt, axes, keepdim=False) - result = (- ((2 * intersect + smooth_in_nom) / (denom + smooth)) * weights).mean() #TODO: Was ist weights and er Stelle? - return result - - -class SoftDiceLoss(nn.Module): - def __init__(self, smooth=1., apply_nonlin=None, batch_dice=False, do_bg=True, smooth_in_nom=True, background_weight=1, rebalance_weights=None): - """ - hahaa no documentation for you today - :param smooth: - :param apply_nonlin: - :param batch_dice: - :param do_bg: - :param smooth_in_nom: - :param background_weight: - :param rebalance_weights: - """ - super(SoftDiceLoss, self).__init__() - if not do_bg: - assert background_weight == 1, "if there is no bg, then set background weight to 1 you dummy" - self.rebalance_weights = rebalance_weights - self.background_weight = background_weight - self.smooth_in_nom = smooth_in_nom - self.do_bg = do_bg - self.batch_dice = batch_dice - self.apply_nonlin = apply_nonlin - self.smooth = smooth - self.y_onehot = None - if not smooth_in_nom: - self.nom_smooth = 0 - else: - self.nom_smooth = smooth - - def forward(self, x, y): - with torch.no_grad(): - y = y.long() - shp_x = x.shape - shp_y = y.shape - if self.apply_nonlin is not None: - x = self.apply_nonlin(x) - if len(shp_x) != len(shp_y): - y = y.view((shp_y[0], 1, *shp_y[1:])) - # now x and y should have shape (B, C, X, Y(, Z))) and (B, 1, X, Y(, Z))), respectively - y_onehot = torch.zeros(shp_x) - if x.device.type == "cuda": - y_onehot = y_onehot.cuda(x.device.index) - y_onehot.scatter_(1, y, 1) - if not self.do_bg: - x = x[:, 1:] - y_onehot = y_onehot[:, 1:] - if not self.batch_dice: - if self.background_weight != 1 or (self.rebalance_weights is not None): - raise NotImplementedError("nah son") - l = soft_dice(x, y_onehot, self.smooth, self.smooth_in_nom) - else: - l = soft_dice_per_batch_2(x, y_onehot, self.smooth, self.smooth_in_nom, - background_weight=self.background_weight, - rebalance_weights=self.rebalance_weights) - return l - - -def load_dataset(base_dir, pattern='*.npy', slice_offset=5, keys=None): - fls = [] - files_len = [] - slices_ax = [] - - for root, dirs, files in os.walk(base_dir): - i = 0 - for filename in sorted(fnmatch.filter(files, pattern)): - - if keys is not None and filename[:-4] in keys: - npy_file = os.path.join(root, filename) - numpy_array = np.load(npy_file, mmap_mode="r") - - fls.append(npy_file) - files_len.append(numpy_array.shape[1]) - - slices_ax.extend([(i, j) for j in range(slice_offset, files_len[-1] - slice_offset)]) - - i += 1 - - return fls, files_len, slices_ax, - - -class SlimDataLoaderBase(object): - def __init__(self, data, batch_size, number_of_threads_in_multithreaded=None): - """ - Slim version of DataLoaderBase (which is now deprecated). Only provides very simple functionality. - You must derive from this class to implement your own DataLoader. You must overrive self.generate_train_batch() - If you use our MultiThreadedAugmenter you will need to also set and use number_of_threads_in_multithreaded. See - multithreaded_dataloading in examples! - :param data: will be stored in self._data. You can use it to generate your batches in self.generate_train_batch() - :param batch_size: will be stored in self.batch_size for use in self.generate_train_batch() - :param number_of_threads_in_multithreaded: will be stored in self.number_of_threads_in_multithreaded. - None per default. If you wish to iterate over all your training data only once per epoch, you must coordinate - your Dataloaders and you will need this information - """ - __metaclass__ = ABCMeta - self.number_of_threads_in_multithreaded = number_of_threads_in_multithreaded - self._data = data - self.batch_size = batch_size - self.thread_id = 0 - - def set_thread_id(self, thread_id): - self.thread_id = thread_id - - def __iter__(self): - return self - - def __next__(self): - return self.generate_train_batch() - - @abstractmethod - def generate_train_batch(self): - '''override this - Generate your batch from self._data .Make sure you generate the correct batch size (self.BATCH_SIZE) - ''' - pass - - -class NumpyDataLoader(SlimDataLoaderBase): - def __init__(self, base_dir, mode="train", batch_size=16, num_batches=10000000, - seed=None, file_pattern='*.npy', label_slice=1, input_slice=(0,), keys=None): - - self.files, self.file_len, self.slices = load_dataset(base_dir=base_dir, pattern=file_pattern, slice_offset=0, keys=keys, ) - super(NumpyDataLoader, self).__init__(self.slices, batch_size, num_batches) - - self.batch_size = batch_size - - self.use_next = False - if mode == "train": - self.use_next = False - - self.slice_idxs = list(range(0, len(self.slices))) - - self.data_len = len(self.slices) - - self.num_batches = min((self.data_len // self.batch_size)+10, num_batches) - - if isinstance(label_slice, int): - label_slice = (label_slice,) - self.input_slice = input_slice - self.label_slice = label_slice - - self.np_data = np.asarray(self.slices) - - def reshuffle(self): - print("Reshuffle...") - random.shuffle(self.slice_idxs) - print("Initializing... this might take a while...") - - def generate_train_batch(self): - open_arr = random.sample(self._data, self.batch_size) - return self.get_data_from_array(open_arr) - - def __len__(self): - n_items = min(self.data_len // self.batch_size, self.num_batches) - return n_items - - def __getitem__(self, item): - slice_idxs = self.slice_idxs - data_len = len(self.slices) - np_data = self.np_data - - if item > len(self): - raise StopIteration() - if (item * self.batch_size) == data_len: - raise StopIteration() - - start_idx = (item * self.batch_size) % data_len - stop_idx = ((item + 1) * self.batch_size) % data_len - - if ((item + 1) * self.batch_size) == data_len: - stop_idx = data_len - - if stop_idx > start_idx: - idxs = slice_idxs[start_idx:stop_idx] - else: - raise StopIteration() - - open_arr = np_data[idxs] - - return self.get_data_from_array(open_arr) - - def get_data_from_array(self, open_array): - data = [] - fnames = [] - slice_idxs = [] - labels = [] - - for slice in open_array: - fn_name = self.files[slice[0]] - - numpy_array = np.load(fn_name, mmap_mode="r") - - numpy_slice = numpy_array[ :, slice[1], ] - data.append(numpy_slice[None, self.input_slice[0]]) # 'None' keeps the dimension - - if self.label_slice is not None: - labels.append(numpy_slice[None, self.label_slice[0]]) # 'None' keeps the dimension - - fnames.append(self.files[slice[0]]) - slice_idxs.append(slice[1]) - - ret_dict = {'data': np.asarray(data), 'fnames': fnames, 'slice_idxs': slice_idxs} - if self.label_slice is not None: - ret_dict['seg'] = np.asarray(labels) - - return ret_dict - - -class NumpyDataSet(object): - """ - TODO - """ - def __init__(self, base_dir, mode="train", batch_size=16, num_batches=10000000, seed=None, num_processes=8, num_cached_per_queue=8 * 4, target_size=128, - file_pattern='*.npy', label_slice=1, input_slice=(0,), do_reshuffle=True, keys=None): - - data_loader = NumpyDataLoader(base_dir=base_dir, mode=mode, batch_size=batch_size, num_batches=num_batches, seed=seed, file_pattern=file_pattern, - input_slice=input_slice, label_slice=label_slice, keys=keys) - - self.data_loader = data_loader - self.batch_size = batch_size - self.do_reshuffle = do_reshuffle - self.number_of_slices = 1 - - self.transforms = get_transforms(mode=mode, target_size=target_size) - self.augmenter = MultiThreadedDataLoader(data_loader, self.transforms, num_processes=num_processes, - num_cached_per_queue=num_cached_per_queue, seeds=seed, - shuffle=do_reshuffle) - self.augmenter.restart() - - def __len__(self): - return len(self.data_loader) - - def __iter__(self): - if self.do_reshuffle: - self.data_loader.reshuffle() - self.augmenter.renew() - return self.augmenter - - def __next__(self): - return next(self.augmenter) - - - -def reshape(orig_img, append_value=-1024, new_shape=(512, 512, 512)): - reshaped_image = np.zeros(new_shape) - reshaped_image[...] = append_value - x_offset = 0 - y_offset = 0 # (new_shape[1] - orig_img.shape[1]) // 2 - z_offset = 0 # (new_shape[2] - orig_img.shape[2]) // 2 - - reshaped_image[x_offset:orig_img.shape[0] + x_offset, y_offset:orig_img.shape[1] + y_offset, - z_offset:orig_img.shape[2] + z_offset] = orig_img - # insert temp_img.min() as background value - - return reshaped_image - - -def subfiles(folder, join=True, prefix=None, suffix=None, sort=True): - if join: - l = os.path.join - else: - l = lambda x, y: y - res = [l(folder, i) for i in os.listdir(folder) if os.path.isfile(os.path.join(folder, i)) - and (prefix is None or i.startswith(prefix)) - and (suffix is None or i.endswith(suffix))] - if sort: - res.sort() - return res - - -def preprocess_data(root_dir): - print("preprocess data...") - image_dir = os.path.join(root_dir, 'imagesTr') - print("image_dir: {}".format(image_dir)) - label_dir = os.path.join(root_dir, 'labelsTr') - print("label_dir: {}".format(label_dir)) - output_dir = os.path.join(root_dir, 'preprocessed') - print("output_dir: {} ... ".format(output_dir), end="") - classes = 3 - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - print("created!") - else: - print("found!\npreprocessed data already available, aborted preprocessing!") - return False - - print("start preprocessing ... ", end="") - class_stats = defaultdict(int) - total = 0 - - nii_files = subfiles(image_dir, suffix=".nii.gz", join=False) - - for i in range(0, len(nii_files)): - if nii_files[i].startswith("._"): - nii_files[i] = nii_files[i][2:] - - for i, f in enumerate(nii_files): - image, _ = load(os.path.join(image_dir, f)) - label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) - - for i in range(classes): - class_stats[i] += np.sum(label == i) - total += np.sum(label == i) - - image = (image - image.min()) / (image.max() - image.min()) - - image = reshape(image, append_value=0, new_shape=(64, 64, 64)) - label = reshape(label, append_value=0, new_shape=(64, 64, 64)) - - result = np.stack((image, label)) - - np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result) - print("finished!") - return True - - -def create_splits(output_dir, image_dir): - print("creating splits ... ", end="") - npy_files = subfiles(image_dir, suffix=".npy", join=False) - - trainset_size = len(npy_files) * 50 // 100 - valset_size = len(npy_files) * 25 // 100 - testset_size = len(npy_files) * 25 // 100 - - splits = [] - for split in range(0, 5): - image_list = npy_files.copy() - trainset = [] - valset = [] - testset = [] - for i in range(0, trainset_size): - patient = np.random.choice(image_list) - image_list.remove(patient) - trainset.append(patient[:-4]) - for i in range(0, valset_size): - patient = np.random.choice(image_list) - image_list.remove(patient) - valset.append(patient[:-4]) - for i in range(0, testset_size): - patient = np.random.choice(image_list) - image_list.remove(patient) - testset.append(patient[:-4]) - split_dict = dict() - split_dict['train'] = trainset - split_dict['val'] = valset - split_dict['test'] = testset - - splits.append(split_dict) - - with open(os.path.join(output_dir, 'splits.pkl'), 'wb') as f: - pickle.dump(splits, f) - print("finished!") diff --git a/hyppopy/workflows/unet_usecase/unet_usecase.py b/hyppopy/workflows/unet_usecase/unet_usecase.py deleted file mode 100644 index 3e6046d..0000000 --- a/hyppopy/workflows/unet_usecase/unet_usecase.py +++ /dev/null @@ -1,132 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -import os -import torch -import numpy as np -import pandas as pd -from sklearn.svm import SVC -import torch.optim as optim -import torch.nn.functional as F -from .networks.RecursiveUNet import UNet -from .loss_functions.dice_loss import SoftDiceLoss -from sklearn.model_selection import cross_val_score -from torch.optim.lr_scheduler import ReduceLROnPlateau -from .datasets.two_dim.NumpyDataLoader import NumpyDataSet - -from hyppopy.projectmanager import ProjectManager -from hyppopy.workflows.workflowbase import WorkflowBase -from hyppopy.workflows.dataloader.unetloader import UnetDataLoader - - -class unet_usecase(WorkflowBase): - - def setup(self): - dl = UnetDataLoader() - dl.start(data_path=ProjectManager.data_path, - data_name=ProjectManager.data_name, - image_dir=ProjectManager.image_dir, - labels_dir=ProjectManager.labels_dir, - split_dir=ProjectManager.split_dir, - output_dir=ProjectManager.data_path, - num_classes=ProjectManager.num_classes) - self.solver.set_data(dl.data) - - def blackbox_function(self, data, params): - if "batch_size" in params.keys(): - params["batch_size"] = int(round(params["batch_size"])) - if "batch_size" in params.keys(): - params["batch_size"] = int(round(params["batch_size"])) - if "n_epochs" in params.keys(): - params["n_epochs"] = int(round(params["n_epochs"])) - - batch_size = 8 - patch_size = 64 - - tr_keys = data[ProjectManager.fold]['train'] - val_keys = data[ProjectManager.fold]['val'] - - data_dir = os.path.join(ProjectManager.data_path, *(ProjectManager.data_name, ProjectManager.preprocessed_dir)) - - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - train_data_loader = NumpyDataSet(data_dir, - target_size=patch_size, - batch_size=batch_size, - keys=tr_keys) - val_data_loader = NumpyDataSet(data_dir, - target_size=patch_size, - batch_size=batch_size, - keys=val_keys, - mode="val", - do_reshuffle=False) - - model = UNet(num_classes=ProjectManager.num_classes, - in_channels=ProjectManager.in_channels) - model.to(device) - - # We use a combination of DICE-loss and CE-Loss in this example. - # This proved good in the medical segmentation decathlon. - dice_loss = SoftDiceLoss(batch_dice=True) # Softmax für DICE Loss! - ce_loss = torch.nn.CrossEntropyLoss() # Kein Softmax für CE Loss -> ist in torch schon mit drin! - - optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"]) - scheduler = ReduceLROnPlateau(optimizer, 'min') - - losses = [] - print("n_epochs {}".format(params['n_epochs'])) - for epoch in range(params["n_epochs"]): - #### Train #### - model.train() - data = None - batch_counter = 0 - for data_batch in train_data_loader: - optimizer.zero_grad() - - # Shape of data_batch = [1, b, c, w, h] - # Desired shape = [b, c, w, h] - # Move data and target to the GPU - data = data_batch['data'][0].float().to(device) - target = data_batch['seg'][0].long().to(device) - - pred = model(data) - pred_softmax = F.softmax(pred, dim=1) # We calculate a softmax, because our SoftDiceLoss expects that as an input. The CE-Loss does the softmax internally. - - loss = dice_loss(pred_softmax, target.squeeze()) + ce_loss(pred, target.squeeze()) - loss.backward() - optimizer.step() - batch_counter += 1 - ############### - - #### Validate #### - model.eval() - data = None - loss_list = [] - with torch.no_grad(): - for data_batch in val_data_loader: - data = data_batch['data'][0].float().to(device) - target = data_batch['seg'][0].long().to(device) - - pred = model(data) - pred_softmax = F.softmax(pred) # We calculate a softmax, because our SoftDiceLoss expects that as an input. The CE-Loss does the softmax internally. - - loss = dice_loss(pred_softmax, target.squeeze()) + ce_loss(pred, target.squeeze()) - loss_list.append(loss.item()) - - assert data is not None, 'data is None. Please check if your dataloader works properly' - scheduler.step(np.mean(loss_list)) - losses.append(np.mean(loss_list)) - ################## - - return np.mean(losses) diff --git a/hyppopy/workflows/workflowbase.py b/hyppopy/workflows/workflowbase.py deleted file mode 100644 index f991036..0000000 --- a/hyppopy/workflows/workflowbase.py +++ /dev/null @@ -1,67 +0,0 @@ -# -*- coding: utf-8 -*- -# -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical and Biological Informatics. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE.txt or http://www.mitk.org for details. -# -# Author: Sven Wanner (s.wanner@dkfz.de) - -from hyppopy.deepdict import DeepDict -from hyppopy.solverfactory import SolverFactory -from hyppopy.projectmanager import ProjectManager -from hyppopy.globals import SETTINGSCUSTOMPATH, SETTINGSSOLVERPATH - -import os -import abc -import logging -from hyppopy.globals import DEBUGLEVEL -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - - -class WorkflowBase(object): - - def __init__(self): - self._solver = SolverFactory.get_solver() - - def run(self, save=True): - self.setup() - self.solver.set_loss_function(self.blackbox_function) - self.solver.run() - if save: - self.solver.save_results() - self.test() - - def get_results(self): - return self.solver.get_results() - - def save_results(self, savedir=None, show=False): - if savedir is None: - savedir = ProjectManager.output_dir - return self.solver.save_results(savedir=savedir, show=show) - - @abc.abstractmethod - def setup(self, **kwargs): - raise NotImplementedError('the user has to implement this function') - - @abc.abstractmethod - def blackbox_function(self): - raise NotImplementedError('the user has to implement this function') - - @abc.abstractmethod - def test(self): - pass - - @property - def solver(self): - return self._solver -