diff --git a/examples/solver_tutorial.py b/examples/solver_tutorial.py index 2a2300f..cac378d 100644 --- a/examples/solver_tutorial.py +++ b/examples/solver_tutorial.py @@ -1,128 +1,124 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import sys -import tempfile from hyppopy.HyppopyProject import HyppopyProject from hyppopy.solver.HyperoptSolver import HyperoptSolver from hyppopy.solver.OptunitySolver import OptunitySolver from hyppopy.solver.RandomsearchSolver import RandomsearchSolver from hyppopy.solver.GridsearchSolver import GridsearchSolver from hyppopy.BlackboxFunction import BlackboxFunction sys.path.append(os.path.dirname(os.path.abspath(__file__))) from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score config = { "hyperparameter": { "C": { "domain": "uniform", - "data": [0.0001, 20, 20], + "data": [0.0001, 20], "type": "float" }, "gamma": { "domain": "uniform", - "data": [0.0001, 20.0, 20], + "data": [0.0001, 20.0], "type": "float" }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": "str" }, "decision_function_shape": { "domain": "categorical", "data": ["ovo", "ovr"], "type": "str" } }, "settings": { "solver": { - "max_iterations": 300, - "plugin": "gridsearch", - "output_dir": os.path.join(tempfile.gettempdir(), 'results') + "max_iterations": 300 }, "custom": { - "the_answer": 42 + "use_solver": "hyperopt" } }} project = HyppopyProject(config=config) print("--------------------------------------------------------------") print("max_iterations:\t{}".format(project.solver_max_iterations)) -print("plugin:\t{}".format(project.solver_plugin)) -print("output_dir:\t{}".format(project.solver_output_dir)) -print("the_answer:\t{}".format(project.custom_the_answer)) +print("plugin:\t{}".format(project.custom_use_solver)) def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() def my_dataloader_function(**kwargs): print("Dataloading...") # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) iris_data = load_iris() return [iris_data.data, iris_data.target] def my_preprocess_function(**kwargs): print("Preprocessing...") # kwargs['data'] allows accessing the input data print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. - print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param'])) + print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n") # if the preprocessing function returns something, # the input data will be replaced with the data returned by this function. x = kwargs['data'][0] y = kwargs['data'][1] for i in range(x.shape[0]): x[i, :] += kwargs['params']['my_preproc_param'] return [x, y] def my_callback_function(**kwargs): print("\r{}".format(kwargs), end="") blackbox = BlackboxFunction(blackbox_func=my_loss_function, dataloader_func=my_dataloader_function, preprocess_func=my_preprocess_function, callback_func=my_callback_function, #data=input_data, # data can be set directly or via a dataloader function my_preproc_param=1, my_dataloader_input='could/be/a/path') -if project.solver_plugin == "hyperopt": +if project.custom_use_solver == "hyperopt": solver = HyperoptSolver(project) -elif project.solver_plugin == "optunity": +elif project.custom_use_solver == "optunity": solver = OptunitySolver(project) -elif project.solver_plugin == "randomsearch": +elif project.custom_use_solver == "randomsearch": solver = RandomsearchSolver(project) -elif project.solver_plugin == "gridsearch": +elif project.custom_use_solver == "gridsearch": solver = GridsearchSolver(project) if solver is not None: solver.blackbox = blackbox solver.run() +df, best = solver.get_results() \ No newline at end of file diff --git a/examples/solver_tutorial_II_hyppopy.py b/examples/solver_tutorial_II_hyppopy.py new file mode 100644 index 0000000..49bfdc5 --- /dev/null +++ b/examples/solver_tutorial_II_hyppopy.py @@ -0,0 +1,184 @@ +# In this tutorial we solve an optimization problem using the Hyperopt Solver (http://hyperopt.github.io/hyperopt/). +# Hyperopt uses a Baysian - Tree Parzen Estimator - Optimization approach, which means that each iteration computes a +# new function value of the blackbox, interpolates a guess for the whole energy function and predicts a point to +# compute the next function value at. This next point is not necessarily a "better" value, it's only the value with +# the highest uncertainty for the function interpolation. +# +# See a visual explanation e.g. here (http://philipperemy.github.io/visualization/) + + +# import the HyppopyProject class keeping track of inputs +from hyppopy.HyppopyProject import HyppopyProject + +# import the HyperoptSolver classes +from hyppopy.solver.HyperoptSolver import HyperoptSolver +from hyppopy.solver.OptunitySolver import OptunitySolver +from hyppopy.solver.RandomsearchSolver import RandomsearchSolver +from hyppopy.solver.GridsearchSolver import GridsearchSolver + +# import the Blackboxfunction class wrapping your problem for Hyppopy +from hyppopy.BlackboxFunction import BlackboxFunction + + +# Next step is defining the problem space and all settings Hyppopy needs to optimize your problem. +# The config is a simple nested dictionary with two obligatory main sections, hyperparameter and settings. +# The hyperparameter section defines your searchspace. Each hyperparameter is again a dictionary with: +# +# - a domain ['categorical', 'uniform', 'normal', 'loguniform'] +# - the domain data [left bound, right bound] and +# - a type of your domain ['str', 'int', 'float'] +# +# The settings section has two subcategories, solver and custom. The first contains settings for the solver, +# here 'max_iterations' - is the maximum number of iteration. +# +# The custom section allows defining custom parameter. An entry here is transformed to a member variable of the +# HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script. +# Here we use it as a solver switch to control the usage of our solver via the config. This means with the script +# below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',... +# It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is +# ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so: +# 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals. + +config = { +"hyperparameter": { + "C": { + "domain": "uniform", + "data": [0.0001, 20], + "type": "float" + }, + "gamma": { + "domain": "uniform", + "data": [0.0001, 20.0], + "type": "float" + }, + "kernel": { + "domain": "categorical", + "data": ["linear", "sigmoid", "poly", "rbf"], + "type": "str" + }, + "decision_function_shape": { + "domain": "categorical", + "data": ["ovo", "ovr"], + "type": "str" + } +}, +"settings": { + "solver": { + "max_iterations": 300 + }, + "custom": { + "use_solver": "hyperopt" + } +}} + + +# When creating a HyppopyProject instance we pass the config dictionary to the constructor. +project = HyppopyProject(config=config) + +# demonstration of the custom parameter access +print("-"*30) +print("max_iterations:\t{}".format(project.solver_max_iterations)) +print("solver chosen -> {}".format(project.custom_use_solver)) +print("-"*30) + + +# Hyppopy offers a class called BlackboxFunction to wrap your problem for Hyppopy. +# The function signature is as follows: +# BlackboxFunction(blackbox_func=None, +# dataloader_func=None, +# preprocess_func=None, +# callback_func=None, +# data=None, +# **kwargs) +# +# Means we can set a couple of function pointers, a data object and an arbitrary number of custom parameter via kwargs. +# +# - blackbox_func: a function pointer to the actual, user defined, blackbox function that is computing our loss +# - dataloader_func: a function pointer to a function handling the dataloading +# - preprocess_func: a function pointer to a function automatically executed before starting the optimization process +# - callback_func: a function pointer to a function that is called after each iteration with the trail object as input +# - data: setting data can be done via dataloader_func or directly +# - kwargs are passed to all functions above and thus can be used for parameter sharing between the functions +# +# (more details see in the documentation) +# +# Below we demonstrate the usage of all the above by defining a my_dataloader_function which in fact only grabs the +# iris dataset from sklearn and returns it. A my_preprocess_function which also does nothing useful here but +# demonstrating that a custom parameter can be set via kwargs and used in all of our functions when called within +# Hyppopy. The my_callback_function gets as input the dictionary containing the state of the iteration and thus can be +# used to access the current state of each solver iteration. Finally we define the actual loss_function +# my_loss_function, which gets as input a data object and params. Both parameter are fixed, the first is defined by +# the user depending on what is dataloader returns or the data object set in the constructor, the second is a dictionary +# with a sample of your hyperparameter space which content is in the choice of the solver. + +from sklearn.svm import SVC +from sklearn.datasets import load_iris +from sklearn.model_selection import cross_val_score + + +def my_dataloader_function(**kwargs): + print("Dataloading...") + # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. + print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) + iris_data = load_iris() + return [iris_data.data, iris_data.target] + + +def my_preprocess_function(**kwargs): + print("Preprocessing...") + # kwargs['data'] allows accessing the input data + print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) + # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. + print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n") + # if the preprocessing function returns something, + # the input data will be replaced with the data returned by this function. + x = kwargs['data'][0] + y = kwargs['data'][1] + for i in range(x.shape[0]): + x[i, :] += kwargs['params']['my_preproc_param'] + return [x, y] + + +def my_callback_function(**kwargs): + print("\r{}".format(kwargs), end="") + + +def my_loss_function(data, params): + clf = SVC(**params) + return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() + + +# We now create the BlackboxFunction object and pass all function pointers defined above, +# as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes. +blackbox = BlackboxFunction(blackbox_func=my_loss_function, + dataloader_func=my_dataloader_function, + preprocess_func=my_preprocess_function, + callback_func=my_callback_function, + my_preproc_param=1, + my_dataloader_input='could/be/a/path') + + +# Last step, is we use our use_solver config parameter defined in the custom section to decide which solver +# should be used, create the solver instance respectively, give it the blackbox and run it. After execution +# we can get the result via get_result() which returns a pandas dataframe containing the complete history and +# a dict best containing the best parameter set. + +if project.custom_use_solver == "hyperopt": + solver = HyperoptSolver(project) +elif project.custom_use_solver == "optunity": + solver = OptunitySolver(project) +elif project.custom_use_solver == "randomsearch": + solver = RandomsearchSolver(project) +elif project.custom_use_solver == "gridsearch": + solver = GridsearchSolver(project) + +if solver is not None: + solver.blackbox = blackbox +solver.run() +df, best = solver.get_results() + +print("\n") +print("*"*100) +print("Best Parameter Set:\n{}".format(best)) +print("*"*100) + diff --git a/examples/solver_tutorial_I_hyppopy.py b/examples/solver_tutorial_I_hyppopy.py new file mode 100644 index 0000000..0ddcf02 --- /dev/null +++ b/examples/solver_tutorial_I_hyppopy.py @@ -0,0 +1,73 @@ +# A hyppopy minimal example optimizing a simple demo function f(x,y) = x**2+y**2 + +# import the HyppopyProject class keeping track of inputs +from hyppopy.HyppopyProject import HyppopyProject + +# import the HyperoptSolver class +from hyppopy.solver.HyperoptSolver import HyperoptSolver + +# To configure the Hyppopy solver we use a simple nested dictionary with two obligatory main sections, +# hyperparameter and settings. The hyperparameter section defines your searchspace. Each hyperparameter +# is again a dictionary with: +# +# - a domain ['categorical', 'uniform', 'normal', 'loguniform'] +# - the domain data [left bound, right bound] and +# - a type of your domain ['str', 'int', 'float'] +# +# The settings section has two subcategories, solver and custom. The first contains settings for the solver, +# here 'max_iterations' - is the maximum number of iteration. +# +# The custom section allows defining custom parameter. An entry here is transformed to a member variable of the +# HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script. +# Here we use it as a solver switch to control the usage of our solver via the config. This means with the script +# below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',... +# It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is +# ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so: +# 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals. + +config = { +"hyperparameter": { + "x": { + "domain": "normal", + "data": [-10.0, 10.0], + "type": "float" + }, + "y": { + "domain": "uniform", + "data": [-10.0, 10.0], + "type": "float" + } +}, +"settings": { + "solver": { + "max_iterations": 500 + }, + "custom": {} +}} + + +# When creating a HyppopyProject instance we +# pass the config dictionary to the constructor. +project = HyppopyProject(config=config) + + +# The user defined loss function +def my_loss_function(x, y): + return x**2+y**2 + +# create a solver instance +solver = HyperoptSolver(project) +# pass the loss function to the solver +solver.blackbox = my_loss_function +# run the solver +solver.run() +# get the result via get_result() which returns a pandas dataframe +# containing the complete history and a dict best containing the +# best parameter set. +df, best = solver.get_results() + +print("\n") +print("*"*100) +print("Best Parameter Set:\n{}".format(best)) +print("*"*100) + diff --git a/hyppopy/Solver/GridsearchSolver.py b/hyppopy/Solver/GridsearchSolver.py index 382be65..0681213 100644 --- a/hyppopy/Solver/GridsearchSolver.py +++ b/hyppopy/Solver/GridsearchSolver.py @@ -1,466 +1,468 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import logging import datetime import numpy as np from hyperopt import Trials from scipy.stats import norm from itertools import product from hyppopy.globals import DEBUGLEVEL from .HyppopySolver import HyppopySolver +from ..BlackboxFunction import BlackboxFunction LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def get_uniform_axis_sample(a, b, N, dtype): """ returns a uniform sample x(n) in the range [a,b] sampled at N pojnts :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" assert isinstance(dtype, str), "condition type of type str violated!" if dtype == "int": return list(np.linspace(a, b, N).astype(int)) elif dtype == "float" or dtype == "double": return list(np.linspace(a, b, N)) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) def get_norm_cdf(N): """ returns a normed gaussian cdf (range [0,1]) with N sampling points :param N: sampling points :return: [ndarray] gaussian cdf function values """ assert isinstance(N, int), "condition N of type int violated!" even = True if N % 2 != 0: N -= 1 even = False N = int(N/2) sigma = 1/3 x = np.linspace(0, 1, N) y1 = norm.cdf(x, loc=0, scale=sigma)-0.5 if not even: y1 = np.append(y1, [0.5]) y2 = 1-(norm.cdf(x, loc=0, scale=sigma)-0.5) y2 = np.flip(y2, axis=0) y = np.concatenate((y1, y2), axis=0) return y def get_gaussian_axis_sample(a, b, N, dtype): """ returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" assert isinstance(dtype, str), "condition type of type str violated!" data = [] for n in range(N): x = a + get_norm_cdf(N)[n]*(b-a) if dtype == "int": data.append(int(x)) elif dtype == "float" or dtype == "double": data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data def get_logarithmic_axis_sample(a, b, N, dtype): """ returns a function value f(n) where f is logarithmic function e^x sampling the exponent range [log(a), log(b)] linear at N sampling points. The function values returned are in the range [a, b]. :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" + assert a > 0, "condition a > 0 violated!" assert isinstance(N, int), "condition N of type int violated!" assert isinstance(dtype, str), "condition type of type str violated!" lexp = np.log(a) rexp = np.log(b) exp_range = np.linspace(lexp, rexp, N) data = [] for n in range(exp_range.shape[0]): x = np.exp(exp_range[n]) if dtype == "int": data.append(int(x)) elif dtype == "float" or dtype == "double": data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data class GridsearchSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._tid = None def loss_function(self, params): loss = None vals = {} idx = {} for key, value in params.items(): vals[key] = [value] idx[key] = [self._tid] trial = {'tid': self._tid, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._tid, 'idxs': idx, 'vals': vals }, 'book_time': datetime.datetime.now(), 'refresh_time': None } try: loss = self.blackbox(**params) if loss is None: trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' else: trial['result']['loss'] = loss except Exception as e: LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' trial['refresh_time'] = datetime.datetime.now() self._trials.trials.append(trial) - if self.blackbox.callback_func is not None: + if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: cbd = copy.deepcopy(params) cbd['iterations'] = self._tid + 1 cbd['loss'] = loss cbd['status'] = trial['result']['status'] self.blackbox.callback_func(**cbd) return def execute_solver(self, searchspace): self._tid = 0 self._trials = Trials() for x in product(*searchspace[1]): params = {} for name, value in zip(searchspace[0], x): params[name] = value try: self.loss_function(params) self._tid += 1 except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): searchspace = [[], []] for name, param in hyperparameter.items(): if param["domain"] == "categorical": searchspace[0].append(name) searchspace[1].append(param["data"]) elif param["domain"] == "uniform": searchspace[0].append(name) searchspace[1].append(get_uniform_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) elif param["domain"] == "normal": searchspace[0].append(name) searchspace[1].append(get_gaussian_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) elif param["domain"] == "loguniform": searchspace[0].append(name) searchspace[1].append(get_logarithmic_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) return searchspace # def get_uniform_axis_sample(n, a, b, N): # """ # returns a uniform sample x(n) in the range [a,b] sampled at N pojnts # :param n: input position within range [0,N] # :param a: left value range bound # :param b: right value range bound # :param N: discretization of intervall [a,b] # :return: [float] x(n) # """ # assert a < b, "condition a < b violated!" # assert n >= 0, "condition n >= 0 violated!" # assert n < N, "condition n < N violated!" # assert isinstance(n, int), "condition n of type int violated!" # assert isinstance(N, int), "condition N of type int violated!" # return np.linspace(a, b, N)[n] # # # def get_norm_cdf(N): # """ # returns a normed gaussian cdf (range [0,1]) with N sampling points # :param N: sampling points # :return: [ndarray] gaussian cdf function values # """ # assert isinstance(N, int), "condition N of type int violated!" # even = True # if N % 2 != 0: # N -= 1 # even = False # N = int(N/2) # sigma = 1/3 # x = np.linspace(0, 1, N) # y1 = norm.cdf(x, loc=0, scale=sigma)-0.5 # if not even: # y1 = np.append(y1, [0.5]) # y2 = 1-(norm.cdf(x, loc=0, scale=sigma)-0.5) # y2 = np.flip(y2, axis=0) # y = np.concatenate((y1, y2), axis=0) # return y # # # def get_gaussian_axis_sample(n, a, b, N): # """ # returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points # :param n: input position within range [0,N] # :param a: left value range bound # :param b: right value range bound # :param N: discretization of intervall [a,b] # :return: [float] f(n) # """ # assert a < b, "condition a < b violated!" # assert n >= 0, "condition n >= 0 violated!" # assert n < N, "condition n < N violated!" # assert isinstance(n, int), "condition n of type int violated!" # assert isinstance(N, int), "condition N of type int violated!" # return a + get_norm_cdf(N)[n]*(b-a) # # # def get_logarithmic_axis_sample(n, a, b, N): # """ # returns a function value f(n) where f is logarithmic function e^x sampling # the exponent range [log(a), log(b)] linear at N sampling points. # The function values returned are in the range [a, b]. # :param n: sampling point [0, N-1] # :param a: left range bound # :param b: right range bound # :param N: discretization of intervall [log(a),log(b)] # :return: [float] f(x) # """ # assert a < b, "condition a < b violated!" # assert n >= 0, "condition n >= 0 violated!" # assert n < N, "condition n < N violated!" # assert isinstance(n, int), "condition n of type int violated!" # assert isinstance(N, int), "condition N of type int violated!" # lexp = np.log(a) # rexp = np.log(b) # exp_range = np.linspace(lexp, rexp, N) # return np.exp(exp_range[n]) # # # class GridAxis(object): # _data = None # _name = None # _type = None # _domain = None # _sampling = None # _is_categorical = False # _current_pos = 0 # # def __init__(self, name, param): # self._name = name # self._domain = param["domain"] # self.data = param["data"] # self.type = param["type"] # if param["domain"] == "categorical": # self._is_categorical = True # # def elems_left(self): # return self._sampling - self._current_pos - 1 # # def increment(self): # self._current_pos += 1 # if self._current_pos > self._sampling - 1: # self._current_pos = 0 # # def get_value(self): # if self._domain == "categorical": # return self.data[self._current_pos] # elif self._domain == "uniform": # return get_uniform_axis_sample(self._current_pos, self.data[0], self.data[1], self._sampling) # elif self._domain == "normal": # return get_gaussian_axis_sample(self._current_pos, self.data[0], self.data[1], self._sampling) # elif self._domain == "loguniform": # return get_logarithmic_axis_sample(self._current_pos, self.data[0], self.data[1], self._sampling) # # @property # def name(self): # return self._name # # @property # def data(self): # return self._data # # @data.setter # def data(self, value): # if self._domain == "categorical": # assert len(value) > 0, "Precondition violation, empty data cannot be handled!" # self._data = value # self._sampling = len(value) # else: # assert len(value) == 3, "precondition violation, gridsearch axis needs low, high and sampling value!" # self._data = value[0:2] # self._sampling = value[2] # # @property # def sampling(self): # return self._sampling # # @property # def type(self): # return self._type # # @type.setter # def type(self, value): # assert isinstance(value, str), "precondition violation, value expects a str!" # if value == "str": # self._type = str # elif value == "int": # self._type = int # if value == "float" or value == "double": # self._type = float # # # class GridSampler(object): # # def __init__(self): # self._axis = [] # self._loops = [] # # def get_gridsize(self): # n = 1 # for ax in self._axis: # n *= ax.sampling # return n # # def add_axis(self, axis): # self._axis.append(axis) # self.update_loops() # # def update_loops(self): # if len(self._axis) == 1: # self._loops.append(1) # else: # lens = [] # for ax in self._axis: # lens.append(ax.sampling) # self._loops.append(np.cumprod(lens)) # # def get_sample(self): # sample = [] # for ax in self._axis: # sample.append(ax.get_value()) # return sample # # # class GridsearchSolver(HyppopySolver): # # def __init__(self, project=None): # HyppopySolver.__init__(self, project) # self._tid = None # # def loss_function(self, params): # loss = None # vals = {} # idx = {} # for key, value in params.items(): # vals[key] = [value] # idx[key] = [self._tid] # trial = {'tid': self._tid, # 'result': {'loss': None, 'status': 'ok'}, # 'misc': { # 'tid': self._tid, # 'idxs': idx, # 'vals': vals # }, # 'book_time': datetime.datetime.now(), # 'refresh_time': None # } # try: # loss = self.blackbox(**params) # if loss is None: # trial['result']['loss'] = np.nan # trial['result']['status'] = 'failed' # else: # trial['result']['loss'] = loss # except Exception as e: # LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) # trial['result']['loss'] = np.nan # trial['result']['status'] = 'failed' # trial['refresh_time'] = datetime.datetime.now() # self._trials.trials.append(trial) # if self.blackbox.callback_func is not None: # cbd = copy.deepcopy(params) # cbd['iterations'] = self._tid + 1 # cbd['loss'] = loss # cbd['status'] = trial['result']['status'] # self.blackbox.callback_func(**cbd) # return # # def execute_solver(self, searchspace): # self._tid = 0 # self._trials = Trials() # # while True: # params = {} # for axis in searchspace: # params[axis.name] = axis.next() # if params[axis.name] is None: # break # try: # self.loss_function(params) # self._tid += 1 # except Exception as e: # msg = "internal error in randomsearch execute_solver occured. {}".format(e) # LOG.error(msg) # raise BrokenPipeError(msg) # self.best = self._trials.argmin # # def convert_searchspace(self, hyperparameter): # searchspace = [] # for name, param in hyperparameter.items(): # if param["domain"] != "categorical": # searchspace.append(GridAxis(name, param)) # for name, param in hyperparameter.items(): # if param["domain"] == "categorical": # searchspace.append(GridAxis(name, param)) # searchspace[-1].is_looping = False # return searchspace diff --git a/hyppopy/Solver/HyperoptSolver.py b/hyppopy/Solver/HyperoptSolver.py index f49d623..ca27cb0 100644 --- a/hyppopy/Solver/HyperoptSolver.py +++ b/hyppopy/Solver/HyperoptSolver.py @@ -1,141 +1,142 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import logging import numpy as np from pprint import pformat from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials from hyppopy.globals import DEBUGLEVEL from .HyppopySolver import HyppopySolver +from ..BlackboxFunction import BlackboxFunction LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyperoptSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) def loss_function(self, params): status = STATUS_FAIL try: loss = self.blackbox(**params) if loss is not None: status = STATUS_OK else: loss = 1e9 except Exception as e: LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) status = STATUS_FAIL loss = 1e9 - if self.blackbox.callback_func is not None: + if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: cbd = copy.deepcopy(params) cbd['iterations'] = self._trials.trials[-1]['tid'] + 1 cbd['loss'] = loss cbd['status'] = status self.blackbox.callback_func(**cbd) return {'loss': loss, 'status': status} def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self.trials = Trials() try: self.best = fmin(fn=self.loss_function, space=searchspace, algo=tpe.suggest, max_evals=self.max_iterations, trials=self.trials) except Exception as e: msg = "internal error in hyperopt.fmin occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) def convert_searchspace(self, hyperparameter): solution_space = {} for name, content in hyperparameter.items(): param_settings = {'name': name} for key, value in content.items(): if key == 'domain': param_settings['domain'] = value elif key == 'data': param_settings['data'] = value elif key == 'type': param_settings['dtype'] = value solution_space[name] = self.convert(param_settings) return solution_space def convert(self, param_settings): name = param_settings["name"] domain = param_settings["domain"] dtype = param_settings["dtype"] data = param_settings["data"] if domain == "uniform": if dtype == "float" or dtype == "double": return hp.uniform(name, data[0], data[1]) elif dtype == "int": data = list(np.arange(int(data[0]), int(data[1] + 1))) return hp.choice(name, data) else: msg = "cannot convert the type {} in domain {}".format(dtype, domain) LOG.error(msg) raise LookupError(msg) elif domain == "loguniform": if dtype == "float" or dtype == "double": if data[0] == 0: data[0] += 1e-23 assert data[0] > 0, "Precondition Violation, a < 0!" assert data[0] < data[1], "Precondition Violation, a > b!" assert data[1] > 0, "Precondition Violation, b < 0!" lexp = np.log(data[0]) rexp = np.log(data[1]) assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!" assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!" return hp.loguniform(name, lexp, rexp) else: msg = "cannot convert the type {} in domain {}".format(dtype, domain) LOG.error(msg) raise LookupError(msg) elif domain == "normal": if dtype == "float" or dtype == "double": mu = (data[1] - data[0]) / 2.0 sigma = mu / 3 return hp.normal(name, data[0] + mu, sigma) else: msg = "cannot convert the type {} in domain {}".format(dtype, domain) LOG.error(msg) raise LookupError(msg) elif domain == "categorical": if dtype == 'str': return hp.choice(name, data) elif dtype == 'bool': data = [] for elem in data: if elem == "true" or elem == "True" or elem == 1 or elem == "1": data.append(True) elif elem == "false" or elem == "False" or elem == 0 or elem == "0": data.append(False) else: msg = "cannot convert the type {} in domain {}, unknown bool type value".format(dtype, domain) LOG.error(msg) raise LookupError(msg) return hp.choice(name, data) diff --git a/hyppopy/Solver/HyppopySolver.py b/hyppopy/Solver/HyppopySolver.py index ac95f00..fb027dd 100644 --- a/hyppopy/Solver/HyppopySolver.py +++ b/hyppopy/Solver/HyppopySolver.py @@ -1,195 +1,221 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import abc import os import types import logging import datetime import numpy as np import pandas as pd from ..globals import DEBUGLEVEL from ..HyppopyProject import HyppopyProject from ..BlackboxFunction import BlackboxFunction +from ..VirtualFunction import VirtualFunction from hyppopy.globals import DEBUGLEVEL, DEFAULTITERATIONS LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyppopySolver(object): def __init__(self, project=None): self._best = None self._trials = None self._blackbox = None self._max_iterations = None self._project = project self._total_duration = None + self._solver_overhead = None + self._time_per_iteration = None + self._accumulated_blackbox_time = None @abc.abstractmethod def execute_solver(self, searchspace): raise NotImplementedError('users must define execute_solver to use this class') - # @abc.abstractmethod - # def convert_results(self): - # raise NotImplementedError('users must define convert_results to use this class') - @abc.abstractmethod def convert_searchspace(self, hyperparameter): raise NotImplementedError('users must define convert_searchspace to use this class') def run(self, print_stats=True): if 'solver_max_iterations' not in self.project.__dict__: msg = "Missing max_iteration entry in project, use default {}!".format(DEFAULTITERATIONS) LOG.warning(msg) print("WARNING: {}".format(msg)) setattr(self.project, 'solver_max_iterations', DEFAULTITERATIONS) self._max_iterations = self.project.solver_max_iterations start_time = datetime.datetime.now() try: self.execute_solver(self.convert_searchspace(self.project.hyperparameter)) except Exception as e: raise e end_time = datetime.datetime.now() dt = end_time - start_time days = divmod(dt.total_seconds(), 86400) hours = divmod(days[1], 3600) minutes = divmod(hours[1], 60) seconds = divmod(minutes[1], 1) milliseconds = divmod(seconds[1], 0.001) self._total_duration = [int(days[0]), int(hours[0]), int(minutes[0]), int(seconds[0]), int(milliseconds[0])] if print_stats: self.print_best() self.print_timestats() - def convert_results(self): + def get_results(self): results = {'duration': [], 'losses': []} pset = self.trials.trials[0]['misc']['vals'] for p in pset.keys(): results[p] = [] for n, trial in enumerate(self.trials.trials): t1 = trial['book_time'] t2 = trial['refresh_time'] results['duration'].append((t2 - t1).microseconds / 1000.0) results['losses'].append(trial['result']['loss']) losses = np.array(results['losses']) results['losses'] = list(losses) pset = trial['misc']['vals'] for p in pset.items(): results[p[0]].append(p[1][0]) return pd.DataFrame.from_dict(results), self.best def print_best(self): print("\n") print("#" * 40) print("### Best Parameter Choice ###") print("#" * 40) for name, value in self.best.items(): print(" - {}\t:\t{}".format(name, value)) + print("\n - number of iterations\t:\t{}".format(self.trials.trials[-1]['tid']+1)) + print(" - total time\t:\t{}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], + self._total_duration[1], + self._total_duration[2], + self._total_duration[3], + self._total_duration[4])) print("#" * 40) - def print_timestats(self): + def compute_time_statistics(self): dts = [] - tot = self._total_duration[0]*86400 + \ - self._total_duration[1]*3600 + \ - self._total_duration[2]*60 + \ - self._total_duration[3]*1000 + \ - self._total_duration[4] - overhead = tot for trial in self._trials.trials: if 'book_time' in trial.keys() and 'refresh_time' in trial.keys(): dt = trial['refresh_time'] - trial['book_time'] dts.append(dt.total_seconds()) - overhead -= dt.total_seconds()*1000.0 + self._time_per_iteration = np.mean(dts) * 1e3 + self._accumulated_blackbox_time = np.sum(dts) * 1e3 + tmp = self.total_duration - self._accumulated_blackbox_time + self._solver_overhead = int(np.round(100.0 / self.total_duration * tmp)) + + def print_timestats(self): print("\n") print("#" * 40) print("### Timing Statistics ###") print("#" * 40) - per_iter = int(np.mean(dts)*1e6)/1000.0 - print(" - per iteration: {}ms".format(per_iter)) + print(" - per iteration: {}ms".format(int(self.time_per_iteration*1e4)/10000)) print(" - total time: {}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) - print(" - overhead: {}%".format(int(np.round(100.0/tot*overhead)))) + print(" - solver overhead: {}%".format(self.solver_overhead)) print("#" * 40) @property def project(self): return self._project @project.setter def project(self, value): if not isinstance(value, HyppopyProject): msg = "Input error, project_manager of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) self._project = value @property def blackbox(self): return self._blackbox @blackbox.setter def blackbox(self, value): - if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction): + if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction) or isinstance(value, VirtualFunction): self._blackbox = value else: self._blackbox = None msg = "Input error, blackbox of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) @property def best(self): return self._best @best.setter def best(self, value): if not isinstance(value, dict): msg = "Input error, best of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) self._best = value @property def trials(self): return self._trials @trials.setter def trials(self, value): self._trials = value @property def max_iterations(self): return self._max_iterations @max_iterations.setter def max_iterations(self, value): if not isinstance(value, int): msg = "Input error, max_iterations of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) if value < 1: msg = "Precondition violation, max_iterations < 1!" LOG.error(msg) raise IOError(msg) self._max_iterations = value + + @property + def total_duration(self): + return (self._total_duration[0] * 86400 + self._total_duration[1] * 3600 + self._total_duration[2] * 60 + self._total_duration[3]) * 1000 + self._total_duration[4] + + @property + def solver_overhead(self): + if self._solver_overhead is None: + self.compute_time_statistics() + return self._solver_overhead + + @property + def time_per_iteration(self): + if self._time_per_iteration is None: + self.compute_time_statistics() + return self._time_per_iteration + + @property + def accumulated_blackbox_time(self): + if self._accumulated_blackbox_time is None: + self.compute_time_statistics() + return self._accumulated_blackbox_time diff --git a/hyppopy/Solver/OptunitySolver.py b/hyppopy/Solver/OptunitySolver.py index b838e62..359e930 100644 --- a/hyppopy/Solver/OptunitySolver.py +++ b/hyppopy/Solver/OptunitySolver.py @@ -1,117 +1,118 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import logging import optunity import datetime import numpy as np from pprint import pformat from hyperopt import Trials from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from .HyppopySolver import HyppopySolver from ..helpers import split_categorical +from ..BlackboxFunction import BlackboxFunction class OptunitySolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._solver_info = None self.opt_trials = None self._idx = None def loss_function(self, **params): self._idx += 1 vals = {} idx = {} for key, value in params.items(): vals[key] = [value] idx[key] = [self._idx] trial = {'tid': self._idx, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._idx, 'idxs': idx, 'vals': vals }, 'book_time': datetime.datetime.now(), 'refresh_time': None } try: for key in params.keys(): if self.project.get_typeof(key) is int: params[key] = int(round(params[key])) loss = self.blackbox(**params) trial['result']['loss'] = loss trial['result']['status'] = 'ok' except Exception as e: LOG.error("computing loss failed due to:\n {}".format(e)) loss = np.nan trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' trial['refresh_time'] = datetime.datetime.now() self._trials.trials.append(trial) - if self.blackbox.callback_func is not None: + if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: cbd = copy.deepcopy(params) cbd['iterations'] = self._idx cbd['loss'] = loss cbd['status'] = trial['result']['status'] self.blackbox.callback_func(**cbd) return loss def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self.trials = Trials() self._idx = 0 try: self.best, self.opt_trials, self._solver_info = optunity.minimize_structured(f=self.loss_function, num_evals=self.max_iterations, search_space=searchspace) except Exception as e: LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) def convert_searchspace(self, hyperparameter): solution_space = {} # split input in categorical and non-categorical data cat, uni = split_categorical(hyperparameter) # build up dictionary keeping all non-categorical data uniforms = {} for key, value in uni.items(): for key2, value2 in value.items(): if key2 == 'data': uniforms[key] = value2 if len(cat) == 0: return uniforms # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} tmp2 = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level tmp2[key] = tmp inner_level = tmp2 solution_space = tmp2 return solution_space diff --git a/hyppopy/Solver/RandomsearchSolver.py b/hyppopy/Solver/RandomsearchSolver.py index 2986b02..1d6a5f3 100644 --- a/hyppopy/Solver/RandomsearchSolver.py +++ b/hyppopy/Solver/RandomsearchSolver.py @@ -1,152 +1,153 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import random import logging import datetime import numpy as np from pprint import pformat from hyperopt import Trials from hyppopy.globals import DEBUGLEVEL from .HyppopySolver import HyppopySolver +from ..BlackboxFunction import BlackboxFunction LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def draw_uniform_sample(param): assert param['type'] != 'str', "Cannot sample a string list uniformly!" assert param['data'][0] < param['data'][1], "Precondition violation: data[0] > data[1]!" s = random.random() s *= np.abs(param['data'][1] - param['data'][0]) s += param['data'][0] if param['type'] == 'int': s = int(np.round(s)) if s < param['data'][0]: s = int(param['data'][0]) if s > param['data'][1]: s = int(param['data'][1]) return s def draw_normal_sample(param): mu = (param['data'][1] - param['data'][0]) / 2 sigma = mu / 3 s = np.random.normal(loc=param['data'][0] + mu, scale=sigma) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] return s def draw_loguniform_sample(param): p = copy.deepcopy(param) p['data'][0] = np.log(param['data'][0]) p['data'][1] = np.log(param['data'][1]) assert p['data'][0] is not np.nan, "Precondition violation, left bound input error, results in nan!" assert p['data'][1] is not np.nan, "Precondition violation, right bound input error, results in nan!" x = draw_uniform_sample(p) s = np.exp(x) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] return s def draw_categorical_sample(param): return random.sample(param['data'], 1)[0] def draw_sample(param): if param['domain'] == "uniform": return draw_uniform_sample(param) elif param['domain'] == "normal": return draw_normal_sample(param) elif param['domain'] == "loguniform": return draw_loguniform_sample(param) elif param['domain'] == "categorical": return draw_categorical_sample(param) else: raise LookupError("Unknown domain {}".format(param['domain'])) class RandomsearchSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._tid = None def loss_function(self, params): loss = None vals = {} idx = {} for key, value in params.items(): vals[key] = [value] idx[key] = [self._tid] trial = {'tid': self._tid, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._tid, 'idxs': idx, 'vals': vals }, 'book_time': datetime.datetime.now(), 'refresh_time': None } try: loss = self.blackbox(**params) if loss is None: trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' else: trial['result']['loss'] = loss except Exception as e: LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' trial['refresh_time'] = datetime.datetime.now() self._trials.trials.append(trial) - if self.blackbox.callback_func is not None: + if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: cbd = copy.deepcopy(params) cbd['iterations'] = self._tid + 1 cbd['loss'] = loss cbd['status'] = trial['result']['status'] self.blackbox.callback_func(**cbd) return def execute_solver(self, searchspace): self._tid = 0 self._trials = Trials() N = self.max_iterations try: for n in range(N): params = {} for name, p in searchspace.items(): params[name] = draw_sample(p) self.loss_function(params) self._tid += 1 except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) return hyperparameter diff --git a/hyppopy/VirtualFunction.py b/hyppopy/VirtualFunction.py index a136e5e..42a428e 100644 --- a/hyppopy/VirtualFunction.py +++ b/hyppopy/VirtualFunction.py @@ -1,209 +1,222 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) ######################################################################################################################## # USAGE # # The class VirtualFunction is meant to be a virtual energy function with an arbitrary dimensionality. The user can # simply scribble functions as a binary image using e.g. Gimp, defining their ranges using .cfg file and loading them # into the VirtualFunction. An instance of the class can then be used like a normal function returning the sampling of # each dimension loaded. # # 1. create binary images (IMPORTANT same shape for each), background black the function signature white, ensure that # each column has a white pixel. If more than one pixel appears in a column, only the lowest will be used. # # 2. create a .cfg file, see an example in hyppopy/virtualparameterspace # # 3. vfunc = VirtualFunction() # vfunc.load_images(path/of/your/binaryfiles/and/the/configfile) # # 4. use vfunc like a normal function, if you loaded 4 dimension binary images use it like f = vfunc(a,b,c,d) ######################################################################################################################## import os import sys import numpy as np import configparser from glob import glob import matplotlib.pyplot as plt import matplotlib.image as mpimg +from .globals import VFUNCDATAPATH class VirtualFunction(object): def __init__(self): self.config = None self.data = None self.axis = [] - def __call__(self, *args): + def __call__(self, *args, **kwargs): + if len(kwargs) == self.dims(): + args = [0]*len(kwargs) + for key, value in kwargs.items(): + index = int(key.split("_")[1]) + args[index] = value assert len(args) == self.dims(), "wrong number of arguments!" for i in range(len(args)): assert self.axis[i][0] <= args[i] <= self.axis[i][1], "out of range access on axis {}!".format(i) lpos, rpos, fracs = self.pos_to_indices(args) fl = self.data[(list(range(self.dims())), lpos)] fr = self.data[(list(range(self.dims())), rpos)] - return fl*np.array(fracs) + fr*(1-np.array(fracs)) + return np.sum(fl*np.array(fracs) + fr*(1-np.array(fracs))) def clear(self): self.axis.clear() self.data = None self.config = None def dims(self): return self.data.shape[0] def size(self): return self.data.shape[1] def minima(self): glob_mins = [] for dim in range(self.dims()): x = [] fmin = np.min(self.data[dim, :]) for _x in range(self.size()): if self.data[dim, _x] <= fmin: x.append(_x/self.size()*(self.axis[dim][1]-self.axis[dim][0])+self.axis[dim][0]) glob_mins.append([x, fmin]) return glob_mins def pos_to_indices(self, positions): lpos = [] rpos = [] pfracs = [] for n in range(self.dims()): pos = positions[n] pos -= self.axis[n][0] pos /= np.abs(self.axis[n][1]-self.axis[n][0]) pos *= self.data.shape[1]-1 lp = int(np.floor(pos)) if lp < 0: lp = 0 rp = int(np.ceil(pos)) if rp > self.data.shape[1]-1: rp = self.data.shape[1]-1 pfracs.append(1.0-(pos-np.floor(pos))) lpos.append(lp) rpos.append(rp) return lpos, rpos, pfracs def plot(self, dim=None, title=""): if dim is None: dim = list(range(self.dims())) else: dim = [dim] fig = plt.figure(figsize=(10, 8)) for i in range(len(dim)): width = np.abs(self.axis[dim[i]][1]-self.axis[dim[i]][0]) ax = np.arange(self.axis[dim[i]][0], self.axis[dim[i]][1], width/self.size()) plt.plot(ax, self.data[dim[i], :], '.', label='axis_{}'.format(str(dim[i]).zfill(2))) plt.legend() plt.grid() plt.title(title) plt.show() def add_dimension(self, data, x_range): if self.data is None: self.data = data if len(self.data.shape) == 1: self.data = self.data.reshape((1, self.data.shape[0])) else: if len(data.shape) == 1: data = data.reshape((1, data.shape[0])) assert self.data.shape[1] == data.shape[1], "shape mismatch while adding dimension!" dims = self.data.shape[0] size = self.data.shape[1] tmp = np.append(self.data, data) self.data = tmp.reshape((dims+1, size)) self.axis.append(x_range) + def load_default(self, dim=3): + path = os.path.join(VFUNCDATAPATH, "{}D".format(dim)) + if os.path.exists(path): + self.load_images(path) + else: + raise FileExistsError("No virtualfunction of dimension {} available".format(dim)) + def load_images(self, path): self.config = None self.data = None self.axis.clear() img_fnames = [] for f in glob(path + os.sep + "*"): if f.endswith(".png"): img_fnames.append(f) elif f.endswith(".cfg"): self.config = self.read_config(f) else: print("WARNING: files of type {} not supported, the file {} is ignored!".format(f.split(".")[-1], os.path.basename(f))) if self.config is None: print("Aborted, failed to read configfile!") sys.exit() sections = self.config.sections() if len(sections) != len(img_fnames): print("Aborted, inconsistent number of image tmplates and axis specifications!") sys.exit() img_fnames.sort() size_x = None size_y = None for n, fname in enumerate(img_fnames): img = mpimg.imread(fname) if len(img.shape) > 2: img = img[:, :, 0] if size_x is None: size_x = img.shape[1] if size_y is None: size_y = img.shape[0] self.data = np.zeros((len(img_fnames), size_x), dtype=np.float32) assert img.shape[0] == size_y, "Shape mismatch in dimension y {} is not {}".format(img.shape[0], size_y) assert img.shape[1] == size_x, "Shape mismatch in dimension x {} is not {}".format(img.shape[1], size_x) self.sample_image(img, n) def sample_image(self, img, dim): sec_name = "axis_{}".format(str(dim).zfill(2)) assert sec_name in self.config.sections(), "config section {} not found!".format(sec_name) settings = self.get_axis_settings(sec_name) self.axis.append([float(settings['min_x']), float(settings['max_x'])]) y_range = [float(settings['min_y']), float(settings['max_y'])] for x in range(img.shape[1]): candidates = np.where(img[:, x] > 0) assert len(candidates[0]) > 0, "non function value in image detected, ensure each column has at least one value > 0!" y_pos = candidates[0][0]/img.shape[0] self.data[dim, x] = 1-y_pos self.data[dim, :] *= np.abs(y_range[1] - y_range[0]) self.data[dim, :] += y_range[0] def read_config(self, fname): try: config = configparser.ConfigParser() config.read(fname) return config except Exception as e: print(e) return None def get_axis_settings(self, section): dict1 = {} options = self.config.options(section) for option in options: try: dict1[option] = self.config.get(section, option) if dict1[option] == -1: print("skip: %s" % option) except: print("exception on %s!" % option) dict1[option] = None return dict1 diff --git a/hyppopy/__init__.py b/hyppopy/__init__.py index 1c6ff54..651862e 100644 --- a/hyppopy/__init__.py +++ b/hyppopy/__init__.py @@ -1 +1 @@ -__version__ = '0.4' \ No newline at end of file +__version__ = '0.3.4.1' diff --git a/hyppopy/globals.py b/hyppopy/globals.py index cafe5b9..d7814d0 100644 --- a/hyppopy/globals.py +++ b/hyppopy/globals.py @@ -1,30 +1,31 @@ # DKFZ # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. import os import sys import logging ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, ROOT) LIBNAME = "hyppopy" TESTDATA_DIR = os.path.join(ROOT, *(LIBNAME, "tests", "data")) HYPERPARAMETERPATH = "hyperparameter" SETTINGSPATH = "settings" +VFUNCDATAPATH = os.path.join(os.path.join(ROOT, LIBNAME), "virtualparameterspace") DEFAULTITERATIONS = 500 LOGFILENAME = os.path.join(ROOT, '{}_log.log'.format(LIBNAME)) DEBUGLEVEL = logging.DEBUG logging.basicConfig(filename=LOGFILENAME, filemode='w', format='%(levelname)s: %(name)s - %(message)s') diff --git a/hyppopy/tests/test_gridsearchsolver.py b/hyppopy/tests/test_gridsearchsolver.py index 91c1f53..03bb0ba 100644 --- a/hyppopy/tests/test_gridsearchsolver.py +++ b/hyppopy/tests/test_gridsearchsolver.py @@ -1,127 +1,206 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) -import os import unittest -import numpy as np from ..solver.GridsearchSolver import * -from ..globals import TESTDATA_DIR +from ..VirtualFunction import VirtualFunction +from hyppopy.HyppopyProject import HyppopyProject class GridsearchTestSuite(unittest.TestCase): def setUp(self): pass def test_get_uniform_axis_sample(self): drange = [0, 10] N = 11 data = get_uniform_axis_sample(drange[0], drange[1], N, "float") for i in range(11): self.assertEqual(float(i), data[i]) drange = [-10, 10] N = 21 data = get_uniform_axis_sample(drange[0], drange[1], N, "int") self.assertEqual(data[0], -10) self.assertEqual(data[20], 10) self.assertEqual(data[10], 0) def test_get_norm_cdf(self): res = [0, 0.27337265, 0.4331928, 0.48777553, 0.4986501, 0.5013499, 0.51222447, 0.5668072, 0.72662735, 1] f = get_norm_cdf(10) for n, v in enumerate(res): self.assertAlmostEqual(v, f[n]) res = [0.0, 0.27337264762313174, 0.4331927987311419, 0.48777552734495533, 0.4986501019683699, 0.5, 0.5013498980316301, 0.5122244726550447, 0.5668072012688581, 0.7266273523768683, 1.0] f = get_norm_cdf(11) for n, v in enumerate(res): self.assertAlmostEqual(v, f[n]) def test_get_gaussian_axis_sampling(self): res = [-5.0, -2.2662735237686826, -0.6680720126885813, -0.12224472655044671, -0.013498980316301257, 0.013498980316301257, 0.12224472655044671, 0.6680720126885813, 2.2662735237686826, 5.0] bounds = (-5, 5) N = 10 data = get_gaussian_axis_sample(bounds[0], bounds[1], N, "float") for n in range(N): self.assertAlmostEqual(res[n], data[n]) res = [-5.0, -2.2662735237686826, -0.6680720126885813, -0.12224472655044671, -0.013498980316301257, 0.0, 0.013498980316301257, 0.12224472655044671, 0.6680720126885813, 2.2662735237686826, 5.0] bounds = (-5, 5) N = 11 data = get_gaussian_axis_sample(bounds[0], bounds[1], N, "float") for n in range(N): self.assertAlmostEqual(res[n], data[n]) def test_get_logarithmic_axis_sample(self): res = [0.0010000000000000002, 0.0035938136638046297, 0.012915496650148841, 0.046415888336127795, 0.1668100537200059, 0.5994842503189414, 2.154434690031884, 7.7426368268112675, 27.825594022071247, 100.00000000000004] bounds = (0.001, 1e2) N = 10 data = get_logarithmic_axis_sample(bounds[0], bounds[1], N, "float") for n in range(N): self.assertAlmostEqual(res[n], data[n]) res = [0.0010000000000000002, 0.003162277660168382, 0.010000000000000004, 0.03162277660168381, 0.10000000000000006, 0.31622776601683833, 1.0000000000000009, 3.1622776601683813, 10.00000000000001, 31.622776601683846, 100.00000000000004] bounds = (0.001, 1e2) N = 11 data = get_logarithmic_axis_sample(bounds[0], bounds[1], N, "float") for n in range(N): self.assertAlmostEqual(res[n], data[n]) + def test_solver(self): + config = { + "hyperparameter": { + "value 1": { + "domain": "uniform", + "data": [0, 20, 11], + "type": "int" + }, + "value 2": { + "domain": "normal", + "data": [0, 20.0, 11], + "type": "float" + }, + "value 3": { + "domain": "loguniform", + "data": [1, 10000, 11], + "type": "float" + }, + "categorical": { + "domain": "categorical", + "data": ["a", "b"], + "type": "str" + } + }, + "settings": { + "solver": {}, + "custom": {} + }} + res_labels = ['value 1', 'value 2', 'value 3', 'categorical'] + res_values = [[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20], + [0.0, 5.467452952462635, 8.663855974622837, 9.755510546899107, 9.973002039367397, 10.0, + 10.026997960632603, 10.244489453100893, 11.336144025377163, 14.532547047537365, 20.0], + [1.0, 2.51188643150958, 6.309573444801933, 15.848931924611136, 39.810717055349734, + 100.00000000000004, 251.18864315095806, 630.9573444801938, 1584.8931924611143, + 3981.071705534977, 10000.00000000001], + ['a', 'b'] + ] + solver = GridsearchSolver(config) + searchspace = solver.convert_searchspace(config["hyperparameter"]) + for n in range(len(res_labels)): + self.assertEqual(res_labels[n], searchspace[0][n]) + for i in range(3): + self.assertAlmostEqual(res_values[i], searchspace[1][i]) + self.assertEqual(res_values[3], searchspace[1][3]) + + def test_solver_complete(self): + config = { + "hyperparameter": { + "axis_00": { + "domain": "normal", + "data": [300, 800, 11], + "type": "float" + }, + "axis_01": { + "domain": "normal", + "data": [-1, 1, 11], + "type": "float" + }, + "axis_02": { + "domain": "uniform", + "data": [0, 10, 11], + "type": "float" + } + }, + "settings": { + "solver": {}, + "custom": {} + }} + + project = HyppopyProject(config) + solver = GridsearchSolver(project) + vfunc = VirtualFunction() + vfunc.load_default() + solver.blackbox = vfunc + solver.run(print_stats=False) + df, best = solver.get_results() + self.assertAlmostEqual(best['axis_00'], 583.40, places=1) + self.assertAlmostEqual(best['axis_01'], 0.45, places=1) + self.assertAlmostEqual(best['axis_02'], 5.0, places=1) + if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_virtualfunction.py b/hyppopy/tests/test_virtualfunction.py index acc2bad..7a05d1f 100644 --- a/hyppopy/tests/test_virtualfunction.py +++ b/hyppopy/tests/test_virtualfunction.py @@ -1,117 +1,96 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import unittest import numpy as np from ..VirtualFunction import VirtualFunction from ..globals import TESTDATA_DIR class VirtualFunctionTestSuite(unittest.TestCase): def setUp(self): pass def test_imagereading(self): vfunc = VirtualFunction() vfunc.load_images(os.path.join(TESTDATA_DIR, 'functionsimulator')) self.assertTrue(isinstance(vfunc.data, np.ndarray)) self.assertEqual(vfunc.data.shape[0], 5) self.assertEqual(vfunc.data.shape[1], 512) gt = [0.83984375*5, 0.44140625*20-10, 0.25390625*20, 0.81640625*8-10, 0.67578125*2+2] for i in range(5): self.assertAlmostEqual(vfunc.data[i][0], gt[i]) gt = [[0, 1], [-10, 10], [0, 20], [-30, 5], [5, 10]] for i in range(5): self.assertEqual(vfunc.axis[i][0], gt[i][0]) self.assertEqual(vfunc.axis[i][1], gt[i][1]) def test_data_adding(self): gt = [[-10, 10], [-30, 5]] vfunc = VirtualFunction() dim0 = np.arange(0, 1.1, 0.1) dim1 = np.arange(1.0, -0.1, -0.1) vfunc.add_dimension(dim0, gt[0]) self.assertEqual(len(vfunc.data.shape), 2) self.assertEqual(vfunc.data.shape[0], 1) self.assertEqual(vfunc.data.shape[1], 11) vfunc.add_dimension(dim1, gt[1]) self.assertEqual(vfunc.data.shape[0], 2) self.assertEqual(vfunc.data.shape[1], 11) for n in range(11): self.assertAlmostEqual(dim0[n], vfunc.data[0, n]) self.assertAlmostEqual(dim1[n], vfunc.data[1, n]) for i in range(2): self.assertEqual(vfunc.axis[i][0], gt[i][0]) self.assertEqual(vfunc.axis[i][1], gt[i][1]) - def test_sampling(self): - vfunc = VirtualFunction() - vfunc.load_images(os.path.join(TESTDATA_DIR, 'functionsimulator')) - ranges = [[0, 1], [-10, 10], [0, 20], [-30, 5], [5, 10]] - x_ranges = [] - for r in ranges: - dr = (r[1]-r[0])/512.0 - x_ranges.append(np.arange(r[0], r[1], dr)) - data = [[], [], [], [], []] - for n in range(x_ranges[0].shape[0]): - x = [x_ranges[0][n], x_ranges[1][n], x_ranges[2][n], x_ranges[3][n], x_ranges[4][n]] - f = vfunc(*x) - for i in range(5): - data[i].append(f[i]) - - sum = 0 - for i in range(512): - for n in range(5): - sum += vfunc.data[n][i]-data[n][i] - self.assertTrue(sum < 18) - def test_minima(self): vfunc = VirtualFunction() vfunc.load_images(os.path.join(TESTDATA_DIR, 'functionsimulator')) minima = vfunc.minima() gt = [[[0.7265625], 0.48828125], [[-4.0234375], -7.890625], [[2.265625], 0.859375], [ [-17.421875, -17.353515625, -17.28515625, -17.216796875, -17.1484375, -17.080078125, -17.01171875, -16.943359375, -16.875, -16.806640625, -16.73828125, -16.669921875, -16.6015625, -16.533203125, -16.46484375, -16.396484375, -16.328125, -16.259765625, -16.19140625, -16.123046875, -16.0546875, -15.986328125, -15.91796875, -15.849609375, -15.78125, -15.712890625, -15.64453125, -15.576171875, -15.5078125, -15.439453125, -15.37109375, -15.302734375, -15.234375, -15.166015625, -15.09765625, -15.029296875, -14.9609375, -14.892578125, -14.82421875, -14.755859375, -14.6875, -14.619140625, -14.55078125, -14.482421875, -14.4140625, -14.345703125, -14.27734375, -14.208984375, -14.140625, -14.072265625, -14.00390625, -13.935546875, -13.8671875, -13.798828125, -13.73046875, -13.662109375, -13.59375, -13.525390625, -13.45703125, -13.388671875, -13.3203125, -13.251953125, -13.18359375, -13.115234375, -13.046875, -12.978515625, -12.91015625, -12.841796875, -12.7734375, -12.705078125, -12.63671875, -12.568359375, -12.5, -12.431640625, -12.36328125, -12.294921875, -12.2265625, -12.158203125, -12.08984375, -12.021484375, -11.953125, -11.884765625, -11.81640625, -11.748046875, -11.6796875, -11.611328125, -11.54296875, -11.474609375, -11.40625, -11.337890625, -11.26953125, -11.201171875, -11.1328125, -11.064453125, -10.99609375, -10.927734375, -10.859375, -10.791015625, -10.72265625, -10.654296875, -10.5859375, -10.517578125, -10.44921875, -10.380859375, -10.3125, -10.244140625, -10.17578125, -10.107421875, -10.0390625, -9.970703125, -9.90234375, -9.833984375, -9.765625, -9.697265625, -9.62890625, -9.560546875, -9.4921875, -9.423828125, -9.35546875, -9.287109375, -9.21875, -9.150390625, -9.08203125, -9.013671875, -8.9453125, -8.876953125, -8.80859375, -8.740234375, -8.671875, -8.603515625, -8.53515625, -8.466796875, -8.3984375, -8.330078125, -8.26171875, -8.193359375, -8.125, -8.056640625, -7.98828125, -7.919921875, -7.8515625, -7.783203125, -7.71484375, -7.646484375, -7.578125, -7.509765625, -7.44140625, -7.373046875, -7.3046875, -7.236328125, -7.16796875, -7.099609375, -7.03125], -9.125], [[5.44921875, 5.458984375, 5.46875, 5.478515625, 5.48828125, 5.498046875, 5.5078125, 5.517578125, 5.52734375], 2.09375]] self.assertAlmostEqual(minima, gt) if __name__ == '__main__': unittest.main()