diff --git a/hyppopy/Solver/BayesOptSolver.py b/hyppopy/Solver/BayesOptSolver.py index 4179539..338cb8b 100644 --- a/hyppopy/Solver/BayesOptSolver.py +++ b/hyppopy/Solver/BayesOptSolver.py @@ -1,84 +1,84 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging import warnings import numpy as np from pprint import pformat from hyperopt import Trials from bayes_opt import BayesianOptimization from hyppopy.globals import DEBUGLEVEL from hyppopy.solver.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class BayesOptSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None self._idx = None def reformat_parameter(self, params): out_params = {} for name, value in params.items(): if self._searchspace[name]["domain"] == "categorical": out_params[name] = self._searchspace[name]["data"][int(np.round(value))] else: if self._searchspace[name]["type"] == "int": out_params[name] = int(np.round(value)) else: out_params[name] = value return out_params - def loss_function_call(self, trial, params): + def loss_function_call(self, params): params = self.reformat_parameter(params) for key in params.keys(): if self.project.get_typeof(key) is int: params[key] = int(round(params[key])) return self.blackbox(**params) def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self.trials = Trials() self._idx = 0 try: optimizer = BayesianOptimization(f=self.loss_function, pbounds=searchspace, verbose=0) optimizer.maximize(init_points=2, n_iter=self.max_iterations) self.best = self.reformat_parameter(optimizer.max["params"]) except Exception as e: LOG.error("internal error in bayes_opt maximize occured. {}".format(e)) raise BrokenPipeError("internal error in bayes_opt maximize occured. {}".format(e)) def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) self._searchspace = hyperparameter pbounds = {} for name, param in hyperparameter.items(): if param["domain"] != "categorical": if param["domain"] != "uniform": msg = "Warning: BayesOpt cannot handle {} domain. Only uniform and categorical domains are supported!".format( param["domain"]) warnings.warn(msg) LOG.warning(msg) pbounds[name] = (param["data"][0], param["data"][1]) else: pbounds[name] = (0, len(param["data"])-1) return pbounds diff --git a/hyppopy/Solver/GridsearchSolver.py b/hyppopy/Solver/GridsearchSolver.py index 920c33f..d0a6604 100644 --- a/hyppopy/Solver/GridsearchSolver.py +++ b/hyppopy/Solver/GridsearchSolver.py @@ -1,182 +1,182 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging import numpy as np from pprint import pformat from scipy.stats import norm from itertools import product from hyppopy.globals import DEBUGLEVEL from hyppopy.solver.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def get_uniform_axis_sample(a, b, N, dtype): """ returns a uniform sample x(n) in the range [a,b] sampled at N pojnts :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" assert isinstance(dtype, str), "condition type of type str violated!" if dtype == "int": return list(np.linspace(a, b, N).astype(int)) elif dtype == "float" or dtype == "double": return list(np.linspace(a, b, N)) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) def get_norm_cdf(N): """ returns a normed gaussian cdf (range [0,1]) with N sampling points :param N: sampling points :return: [ndarray] gaussian cdf function values """ assert isinstance(N, int), "condition N of type int violated!" even = True if N % 2 != 0: N -= 1 even = False N = int(N/2) sigma = 1/3 x = np.linspace(0, 1, N) y1 = norm.cdf(x, loc=0, scale=sigma)-0.5 if not even: y1 = np.append(y1, [0.5]) y2 = 1-(norm.cdf(x, loc=0, scale=sigma)-0.5) y2 = np.flip(y2, axis=0) y = np.concatenate((y1, y2), axis=0) return y def get_gaussian_axis_sample(a, b, N, dtype): """ returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" assert isinstance(dtype, str), "condition type of type str violated!" data = [] for n in range(N): x = a + get_norm_cdf(N)[n]*(b-a) if dtype == "int": data.append(int(x)) elif dtype == "float" or dtype == "double": data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data def get_logarithmic_axis_sample(a, b, N, dtype): """ returns a function value f(n) where f is logarithmic function e^x sampling the exponent range [log(a), log(b)] linear at N sampling points. The function values returned are in the range [a, b]. :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert a > 0, "condition a > 0 violated!" assert isinstance(N, int), "condition N of type int violated!" assert isinstance(dtype, str), "condition type of type str violated!" # convert input range into exponent range lexp = np.log(a) rexp = np.log(b) exp_range = np.linspace(lexp, rexp, N) data = [] for n in range(exp_range.shape[0]): x = np.exp(exp_range[n]) if dtype == "int": data.append(int(x)) elif dtype == "float" or dtype == "double": data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data class GridsearchSolver(HyppopySolver): """ The GridsearchSolver class implements a gridsearch optimization. The gridsearch supports categorical, uniform, normal and loguniform sampling. To use the GridsearchSolver, besides a range, one must specifiy the number of samples in the domain, e.g. 'data': [0, 1, 100] """ def __init__(self, project=None): HyppopySolver.__init__(self, project) self._has_maxiteration_field = False - def loss_function_call(self, trial, params): + def loss_function_call(self, params): loss = self.blackbox(**params) if loss is None: return np.nan return loss def execute_solver(self, searchspace): for x in product(*searchspace[1]): params = {} for name, value in zip(searchspace[0], x): params[name] = value try: self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): """ the function converts the standard parameter input into a range list depending on the domain. These rangelists are later used with itertools product to create a paramater space sample of each combination. :param hyperparameter: [dict] hyperparameter space :return: [list] name and range for each parameter space axis """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) searchspace = [[], []] for name, param in hyperparameter.items(): if param["domain"] == "categorical": searchspace[0].append(name) searchspace[1].append(param["data"]) elif param["domain"] == "uniform": searchspace[0].append(name) searchspace[1].append(get_uniform_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) elif param["domain"] == "normal": searchspace[0].append(name) searchspace[1].append(get_gaussian_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) elif param["domain"] == "loguniform": searchspace[0].append(name) searchspace[1].append(get_logarithmic_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) return searchspace diff --git a/hyppopy/Solver/HyppopySolver.py b/hyppopy/Solver/HyppopySolver.py index 07d3808..7d59869 100644 --- a/hyppopy/Solver/HyppopySolver.py +++ b/hyppopy/Solver/HyppopySolver.py @@ -1,287 +1,329 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import abc import os import copy import types import logging import datetime import numpy as np import pandas as pd from hyperopt import Trials from hyppopy.globals import DEBUGLEVEL from hyppopy.HyppopyProject import HyppopyProject from hyppopy.BlackboxFunction import BlackboxFunction from hyppopy.VirtualFunction import VirtualFunction from hyppopy.globals import DEBUGLEVEL, DEFAULTITERATIONS LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyppopySolver(object): """ The HyppopySolver class is the base class for all solver addons. It defines virtual functions a child class has to implement to deal with the front-end communication, orchestrating the optimization process and ensuring a proper process information storing. The key idea is that the HyppopySolver class defines an interface to configure and run an object instance of itself independently from the concrete solver lib used to optimize in the background. To achieve this goal an addon developer needs to implement the abstract methods 'convert_searchspace', 'execute_solver' and 'loss_function_call'. These methods abstract the peculiarities of the solver libs to offer, on the user side, a simple and consistent - parameter space configuration and optimization procedure. The method 'convert_searchspace' + parameter space configuration and optimization procedure. The method 'convert_searchspace' transforms the hyppopy + parameter space description into the solver lib specific description. The method loss_function_call is used to + handle solver lib specifics of calling the actual blackbox function and execute_solver is executed when the run + method is invoked und takes care of calling the solver lib solving routine. """ def __init__(self, project=None): self._idx = None self._best = None self._trials = None self._blackbox = None self._max_iterations = None self._project = project self._total_duration = None self._solver_overhead = None self._time_per_iteration = None self._accumulated_blackbox_time = None self._has_maxiteration_field = True - @abc.abstractmethod - def execute_solver(self, searchspace): - raise NotImplementedError('users must define execute_solver to use this class') - @abc.abstractmethod def convert_searchspace(self, hyperparameter): + """ + This function gets the unified hyppopy-like parameterspace description as input and, if necessary, should + convert it into a solver lib specific format. The function is invoked when run is called and what it returns + is passed as searchspace argument to the function execute_solver. + :param hyperparameter: [dict] nested parameter description dict e.g. {'name': {'domain':'uniform', 'data':[0,1], 'type':'float'}, ...} + :return: [object] converted hyperparameter space + """ raise NotImplementedError('users must define convert_searchspace to use this class') @abc.abstractmethod - def loss_function_call(self): + def execute_solver(self, searchspace): + """ + This function is called immediatly after convert_searchspace and get the output of the latter as input. It's + purpose is to call the solver libs main optimization function. + :param searchspace: converted hyperparameter space + """ + raise NotImplementedError('users must define execute_solver to use this class') + + @abc.abstractmethod + def loss_function_call(self, params): + """ + This function is called within the function loss_function and encapsulates the actual blackbox function call + in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver + lib might need some special treatment between the parameter set selection and the calling of the actual blackbox + function, e.g. parameter converting. + :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} + :return: [float] loss + """ raise NotImplementedError('users must define convert_searchspace to use this class') def loss_function(self, **params): + """ + This function is called each iteration with a selected parameter set. The parameter set selection is driven by + the solver lib itself. The purpose of this function is to take care of the iteration reporting and the calling + of the callback_func is available. As a developer you might want to overwrite this function completely (e.g. + HyperoptSolver) but then you need to take care for iteration reporting for yourself. The alternative is to only + implement loss_function_call (e.g. OptunitySolver). + :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} + :return: [float] loss + """ self._idx += 1 vals = {} idx = {} for key, value in params.items(): vals[key] = [value] idx[key] = [self._idx] trial = {'tid': self._idx, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._idx, 'idxs': idx, 'vals': vals }, 'book_time': datetime.datetime.now(), 'refresh_time': None } try: - loss = self.loss_function_call(trial, params) + loss = self.loss_function_call(params) trial['result']['loss'] = loss trial['result']['status'] = 'ok' if loss == np.nan: trial['result']['status'] = 'failed' except Exception as e: LOG.error("computing loss failed due to:\n {}".format(e)) loss = np.nan trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' trial['refresh_time'] = datetime.datetime.now() self._trials.trials.append(trial) if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: cbd = copy.deepcopy(params) cbd['iterations'] = self._idx cbd['loss'] = loss cbd['status'] = trial['result']['status'] self.blackbox.callback_func(**cbd) return loss def run(self, print_stats=True): + """ + This function starts the optimization process. + :param print_stats: [bool] en- or disable console output + """ self._idx = 0 self.trials = Trials() if self._has_maxiteration_field: if 'solver_max_iterations' not in self.project.__dict__: msg = "Missing max_iteration entry in project, use default {}!".format(DEFAULTITERATIONS) LOG.warning(msg) print("WARNING: {}".format(msg)) setattr(self.project, 'solver_max_iterations', DEFAULTITERATIONS) self._max_iterations = self.project.solver_max_iterations start_time = datetime.datetime.now() try: search_space = self.convert_searchspace(self.project.hyperparameter) except Exception as e: msg = "Failed to convert searchspace, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) try: self.execute_solver(search_space) except Exception as e: msg = "Failed to execute solver, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) end_time = datetime.datetime.now() dt = end_time - start_time days = divmod(dt.total_seconds(), 86400) hours = divmod(days[1], 3600) minutes = divmod(hours[1], 60) seconds = divmod(minutes[1], 1) milliseconds = divmod(seconds[1], 0.001) self._total_duration = [int(days[0]), int(hours[0]), int(minutes[0]), int(seconds[0]), int(milliseconds[0])] if print_stats: self.print_best() self.print_timestats() def get_results(self): - results = {'duration': [], 'losses': []} + """ + This function returns a complete optimization history as pandas DataFrame and a dict with the optimal parameter set. + :return: [DataFrame], [dict] history and optimal parameter set + """ + assert isinstance(self.trials, Trials), "precondition violation, wrong trials type! Maybe solver was not yet executed?" + results = {'duration': [], 'losses': [], 'status': []} pset = self.trials.trials[0]['misc']['vals'] for p in pset.keys(): results[p] = [] for n, trial in enumerate(self.trials.trials): t1 = trial['book_time'] t2 = trial['refresh_time'] results['duration'].append((t2 - t1).microseconds / 1000.0) results['losses'].append(trial['result']['loss']) + results['status'].append(trial['result']['status'] == 'ok') losses = np.array(results['losses']) results['losses'] = list(losses) pset = trial['misc']['vals'] for p in pset.items(): results[p[0]].append(p[1][0]) return pd.DataFrame.from_dict(results), self.best def print_best(self): print("\n") print("#" * 40) print("### Best Parameter Choice ###") print("#" * 40) for name, value in self.best.items(): print(" - {}\t:\t{}".format(name, value)) print("\n - number of iterations\t:\t{}".format(self.trials.trials[-1]['tid']+1)) print(" - total time\t:\t{}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) def compute_time_statistics(self): dts = [] for trial in self._trials.trials: if 'book_time' in trial.keys() and 'refresh_time' in trial.keys(): dt = trial['refresh_time'] - trial['book_time'] dts.append(dt.total_seconds()) self._time_per_iteration = np.mean(dts) * 1e3 self._accumulated_blackbox_time = np.sum(dts) * 1e3 tmp = self.total_duration - self._accumulated_blackbox_time self._solver_overhead = int(np.round(100.0 / (self.total_duration+1e-12) * tmp)) def print_timestats(self): print("\n") print("#" * 40) print("### Timing Statistics ###") print("#" * 40) print(" - per iteration: {}ms".format(int(self.time_per_iteration*1e4)/10000)) print(" - total time: {}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) print(" - solver overhead: {}%".format(self.solver_overhead)) @property def project(self): return self._project @project.setter def project(self, value): if not isinstance(value, HyppopyProject): msg = "Input error, project_manager of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) self._project = value @property def blackbox(self): return self._blackbox @blackbox.setter def blackbox(self, value): if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction) or isinstance(value, VirtualFunction): self._blackbox = value else: self._blackbox = None msg = "Input error, blackbox of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) @property def best(self): return self._best @best.setter def best(self, value): if not isinstance(value, dict): msg = "Input error, best of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) self._best = value @property def trials(self): return self._trials @trials.setter def trials(self, value): self._trials = value @property def max_iterations(self): return self._max_iterations @max_iterations.setter def max_iterations(self, value): if not isinstance(value, int): msg = "Input error, max_iterations of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) if value < 1: msg = "Precondition violation, max_iterations < 1!" LOG.error(msg) raise IOError(msg) self._max_iterations = value @property def total_duration(self): return (self._total_duration[0]*86400 + self._total_duration[1] * 3600 + self._total_duration[2] * 60 + self._total_duration[3]) * 1000 + self._total_duration[4] @property def solver_overhead(self): if self._solver_overhead is None: self.compute_time_statistics() return self._solver_overhead @property def time_per_iteration(self): if self._time_per_iteration is None: self.compute_time_statistics() return self._time_per_iteration @property def accumulated_blackbox_time(self): if self._accumulated_blackbox_time is None: self.compute_time_statistics() return self._accumulated_blackbox_time diff --git a/hyppopy/Solver/OptunaSolver.py b/hyppopy/Solver/OptunaSolver.py index 6c848fc..9074a31 100644 --- a/hyppopy/Solver/OptunaSolver.py +++ b/hyppopy/Solver/OptunaSolver.py @@ -1,82 +1,82 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import os import optuna import logging import warnings import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL from hyppopy.solver.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class OptunaSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None def reformat_parameter(self, params): out_params = {} for name, value in params.items(): if self._searchspace[name]["domain"] == "categorical": out_params[name] = self._searchspace[name]["data"][int(np.round(value))] else: if self._searchspace[name]["type"] == "int": out_params[name] = int(np.round(value)) else: out_params[name] = value return out_params def trial_cache(self, trial): params = {} for name, param in self._searchspace.items(): if param["domain"] == "categorical": params[name] = trial.suggest_categorical(name, param["data"]) else: params[name] = trial.suggest_uniform(name, param["data"][0], param["data"][1]) return self.loss_function(**params) - def loss_function_call(self, trial, params): + def loss_function_call(self, params): for key in params.keys(): if self.project.get_typeof(key) is int: params[key] = int(round(params[key])) return self.blackbox(**params) def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self._searchspace = searchspace try: study = optuna.create_study() study.optimize(self.trial_cache, n_trials=self.max_iterations) self.best = study.best_trial.params except Exception as e: LOG.error("internal error in bayes_opt maximize occured. {}".format(e)) raise BrokenPipeError("internal error in bayes_opt maximize occured. {}".format(e)) def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) for name, param in hyperparameter.items(): if param["domain"] != "categorical" and param["domain"] != "uniform": msg = "Warning: Optuna cannot handle {} domain. Only uniform and categorical domains are supported!".format(param["domain"]) warnings.warn(msg) LOG.warning(msg) return hyperparameter diff --git a/hyppopy/Solver/OptunitySolver.py b/hyppopy/Solver/OptunitySolver.py index b4611ce..7a40117 100644 --- a/hyppopy/Solver/OptunitySolver.py +++ b/hyppopy/Solver/OptunitySolver.py @@ -1,98 +1,98 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging import optunity import warnings from pprint import pformat from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from hyppopy.solver.HyppopySolver import HyppopySolver class OptunitySolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._solver_info = None self.opt_trials = None - def loss_function_call(self, trial, params): + def loss_function_call(self, params): for key in params.keys(): if self.project.get_typeof(key) is int: params[key] = int(round(params[key])) return self.blackbox(**params) def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) try: self.best, _, _ = optunity.minimize_structured(f=self.loss_function, num_evals=self.max_iterations, search_space=searchspace) except Exception as e: LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) def split_categorical(self, pdict): categorical = {} uniform = {} for name, pset in pdict.items(): for key, value in pset.items(): if key == 'domain' and value == 'categorical': categorical[name] = pset elif key == 'domain': if value != 'uniform': msg = "Warning: Optunity cannot handle {} domain. Only uniform and categorical domains are supported!".format(value) warnings.warn(msg) LOG.warning(msg) uniform[name] = pset return categorical, uniform def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) solution_space = {} # split input in categorical and non-categorical data cat, uni = self.split_categorical(hyperparameter) # build up dictionary keeping all non-categorical data uniforms = {} for key, value in uni.items(): for key2, value2 in value.items(): if key2 == 'data': if len(value2) == 3: uniforms[key] = value2[0:2] elif len(value2) == 2: uniforms[key] = value2 else: raise AssertionError("precondition violation, optunity searchspace needs list with left and right range bounds!") if len(cat) == 0: return uniforms # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} tmp2 = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level tmp2[key] = tmp inner_level = tmp2 solution_space = tmp2 return solution_space diff --git a/hyppopy/Solver/RandomsearchSolver.py b/hyppopy/Solver/RandomsearchSolver.py index fff9b18..aa297c3 100644 --- a/hyppopy/Solver/RandomsearchSolver.py +++ b/hyppopy/Solver/RandomsearchSolver.py @@ -1,159 +1,159 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import random import logging import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL from hyppopy.solver.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def draw_uniform_sample(param): """ function draws a random sample from a uniform range :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ assert param['type'] != 'str', "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" s = random.random() s *= np.abs(param['data'][1] - param['data'][0]) s += param['data'][0] if param['type'] == 'int': s = int(np.round(s)) if s < param['data'][0]: s = int(param['data'][0]) if s > param['data'][1]: s = int(param['data'][1]) return s def draw_normal_sample(param): """ function draws a random sample from a normal distributed range :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ assert param['type'] != 'str', "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" mu = (param['data'][1] - param['data'][0]) / 2 sigma = mu / 3 s = np.random.normal(loc=param['data'][0] + mu, scale=sigma) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] s = float(s) if param["type"] == "int": s = int(np.round(s)) return s def draw_loguniform_sample(param): """ function draws a random sample from a logarithmic distributed range :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ assert param['type'] != 'str', "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" p = copy.deepcopy(param) p['data'][0] = np.log(param['data'][0]) p['data'][1] = np.log(param['data'][1]) assert p['data'][0] is not np.nan, "Precondition violation, left bound input error, results in nan!" assert p['data'][1] is not np.nan, "Precondition violation, right bound input error, results in nan!" x = draw_uniform_sample(p) s = np.exp(x) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] return s def draw_categorical_sample(param): """ function draws a random sample from a categorical list :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ return random.sample(param['data'], 1)[0] def draw_sample(param): """ function draws a sample from the input hyperparameter descriptor depending on it's domain :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ assert isinstance(param, dict), "input error, hyperparam descriptors of type {} not allowed!".format(type(param)) assert 'domain' in param.keys(), "input error, hyperparam descriptors need a domain key!" assert 'data' in param.keys(), "input error, hyperparam descriptors need a data key!" assert 'type' in param.keys(), "input error, hyperparam descriptors need a type key!" if param['domain'] == "uniform": return draw_uniform_sample(param) elif param['domain'] == "normal": return draw_normal_sample(param) elif param['domain'] == "loguniform": return draw_loguniform_sample(param) elif param['domain'] == "categorical": return draw_categorical_sample(param) else: raise LookupError("Unknown domain {}".format(param['domain'])) class RandomsearchSolver(HyppopySolver): """ The RandomsearchSolver class implements a randomsearch optimization. The randomsearch supports categorical, uniform, normal and loguniform sampling. The solver draws an independent sample from the parameter space each iteration.""" def __init__(self, project=None): HyppopySolver.__init__(self, project) - def loss_function_call(self, trial, params): + def loss_function_call(self, params): loss = self.blackbox(**params) if loss is None: return np.nan return loss def execute_solver(self, searchspace): N = self.max_iterations try: for n in range(N): params = {} for name, p in searchspace.items(): params[name] = draw_sample(p) self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): """ this function simply pipes the input parameter through, the sample drawing functions are responsible for interpreting the parameter. :param hyperparameter: [dict] hyperparameter space :return: [dict] hyperparameter space """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) return hyperparameter diff --git a/hyppopy/tests/test_bayesoptsolver.py b/hyppopy/tests/test_bayesoptsolver.py index 218eeb6..4bae53d 100644 --- a/hyppopy/tests/test_bayesoptsolver.py +++ b/hyppopy/tests/test_bayesoptsolver.py @@ -1,66 +1,71 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import unittest import matplotlib.pylab as plt from hyppopy.solver.BayesOptSolver import * from hyppopy.VirtualFunction import VirtualFunction from hyppopy.HyppopyProject import HyppopyProject class BayesOptSolverTestSuite(unittest.TestCase): def setUp(self): pass def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [300, 800], "type": "float" }, "axis_01": { - "domain": "normal", + "domain": "uniform", "data": [-1, 1], "type": "float" }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": "float" } }, "settings": { - "solver": {"max_iterations": 80}, + "solver": {"max_iterations": 10}, "custom": {} }} project = HyppopyProject(config) solver = BayesOptSolver(project) vfunc = VirtualFunction() vfunc.load_default() solver.blackbox = vfunc - solver.run(print_stats=True) + solver.run(print_stats=False) df, best = solver.get_results() - self.assertTrue(570 < best['axis_00'] < 590) - self.assertTrue(0.1 < best['axis_01'] < 0.8) - self.assertTrue(4.5 < best['axis_02'] < 6) + self.assertTrue(300 <= best['axis_00'] <= 800) + self.assertTrue(-1 <= best['axis_01'] <= 1) + self.assertTrue(0 <= best['axis_02'] <= 10) + + for status in df['status']: + self.assertTrue(status) + for loss in df['losses']: + self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_gridsearchsolver.py b/hyppopy/tests/test_gridsearchsolver.py index 9bea228..57b6b19 100644 --- a/hyppopy/tests/test_gridsearchsolver.py +++ b/hyppopy/tests/test_gridsearchsolver.py @@ -1,206 +1,211 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import unittest from hyppopy.solver.GridsearchSolver import * from hyppopy.VirtualFunction import VirtualFunction from hyppopy.HyppopyProject import HyppopyProject class GridsearchTestSuite(unittest.TestCase): def setUp(self): pass def test_get_uniform_axis_sample(self): drange = [0, 10] N = 11 data = get_uniform_axis_sample(drange[0], drange[1], N, "float") for i in range(11): self.assertEqual(float(i), data[i]) drange = [-10, 10] N = 21 data = get_uniform_axis_sample(drange[0], drange[1], N, "int") self.assertEqual(data[0], -10) self.assertEqual(data[20], 10) self.assertEqual(data[10], 0) def test_get_norm_cdf(self): res = [0, 0.27337265, 0.4331928, 0.48777553, 0.4986501, 0.5013499, 0.51222447, 0.5668072, 0.72662735, 1] f = get_norm_cdf(10) for n, v in enumerate(res): self.assertAlmostEqual(v, f[n]) res = [0.0, 0.27337264762313174, 0.4331927987311419, 0.48777552734495533, 0.4986501019683699, 0.5, 0.5013498980316301, 0.5122244726550447, 0.5668072012688581, 0.7266273523768683, 1.0] f = get_norm_cdf(11) for n, v in enumerate(res): self.assertAlmostEqual(v, f[n]) def test_get_gaussian_axis_sampling(self): res = [-5.0, -2.2662735237686826, -0.6680720126885813, -0.12224472655044671, -0.013498980316301257, 0.013498980316301257, 0.12224472655044671, 0.6680720126885813, 2.2662735237686826, 5.0] bounds = (-5, 5) N = 10 data = get_gaussian_axis_sample(bounds[0], bounds[1], N, "float") for n in range(N): self.assertAlmostEqual(res[n], data[n]) res = [-5.0, -2.2662735237686826, -0.6680720126885813, -0.12224472655044671, -0.013498980316301257, 0.0, 0.013498980316301257, 0.12224472655044671, 0.6680720126885813, 2.2662735237686826, 5.0] bounds = (-5, 5) N = 11 data = get_gaussian_axis_sample(bounds[0], bounds[1], N, "float") for n in range(N): self.assertAlmostEqual(res[n], data[n]) def test_get_logarithmic_axis_sample(self): res = [0.0010000000000000002, 0.0035938136638046297, 0.012915496650148841, 0.046415888336127795, 0.1668100537200059, 0.5994842503189414, 2.154434690031884, 7.7426368268112675, 27.825594022071247, 100.00000000000004] bounds = (0.001, 1e2) N = 10 data = get_logarithmic_axis_sample(bounds[0], bounds[1], N, "float") for n in range(N): self.assertAlmostEqual(res[n], data[n]) res = [0.0010000000000000002, 0.003162277660168382, 0.010000000000000004, 0.03162277660168381, 0.10000000000000006, 0.31622776601683833, 1.0000000000000009, 3.1622776601683813, 10.00000000000001, 31.622776601683846, 100.00000000000004] bounds = (0.001, 1e2) N = 11 data = get_logarithmic_axis_sample(bounds[0], bounds[1], N, "float") for n in range(N): self.assertAlmostEqual(res[n], data[n]) def test_solver(self): config = { "hyperparameter": { "value 1": { "domain": "uniform", "data": [0, 20, 11], "type": "int" }, "value 2": { "domain": "normal", "data": [0, 20.0, 11], "type": "float" }, "value 3": { "domain": "loguniform", "data": [1, 10000, 11], "type": "float" }, "categorical": { "domain": "categorical", "data": ["a", "b"], "type": "str" } }, "settings": { "solver": {}, "custom": {} }} res_labels = ['value 1', 'value 2', 'value 3', 'categorical'] res_values = [[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20], [0.0, 5.467452952462635, 8.663855974622837, 9.755510546899107, 9.973002039367397, 10.0, 10.026997960632603, 10.244489453100893, 11.336144025377163, 14.532547047537365, 20.0], [1.0, 2.51188643150958, 6.309573444801933, 15.848931924611136, 39.810717055349734, 100.00000000000004, 251.18864315095806, 630.9573444801938, 1584.8931924611143, 3981.071705534977, 10000.00000000001], ['a', 'b'] ] solver = GridsearchSolver(config) searchspace = solver.convert_searchspace(config["hyperparameter"]) for n in range(len(res_labels)): self.assertEqual(res_labels[n], searchspace[0][n]) for i in range(3): self.assertAlmostEqual(res_values[i], searchspace[1][i]) self.assertEqual(res_values[3], searchspace[1][3]) def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [300, 800, 11], "type": "float" }, "axis_01": { "domain": "normal", "data": [-1, 1, 11], "type": "float" }, "axis_02": { "domain": "uniform", "data": [0, 10, 11], "type": "float" } }, "settings": { "solver": {}, "custom": {} }} project = HyppopyProject(config) solver = GridsearchSolver(project) vfunc = VirtualFunction() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertAlmostEqual(best['axis_00'], 583.40, places=1) self.assertAlmostEqual(best['axis_01'], 0.45, places=1) self.assertAlmostEqual(best['axis_02'], 5.0, places=1) + for status in df['status']: + self.assertTrue(status) + for loss in df['losses']: + self.assertTrue(isinstance(loss, float)) + if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_hyperoptsolver.py b/hyppopy/tests/test_hyperoptsolver.py index 3713d05..a2be31c 100644 --- a/hyppopy/tests/test_hyperoptsolver.py +++ b/hyppopy/tests/test_hyperoptsolver.py @@ -1,66 +1,71 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import unittest import matplotlib.pylab as plt from hyppopy.solver.HyperoptSolver import * from hyppopy.VirtualFunction import VirtualFunction from hyppopy.HyppopyProject import HyppopyProject class HyperoptSolverTestSuite(unittest.TestCase): def setUp(self): pass def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [300, 800], "type": "float" }, "axis_01": { "domain": "normal", "data": [-1, 1], "type": "float" }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": "float" } }, "settings": { - "solver": {"max_iterations": 800}, + "solver": {"max_iterations": 100}, "custom": {} }} project = HyppopyProject(config) solver = HyperoptSolver(project) vfunc = VirtualFunction() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() - self.assertTrue(570 < best['axis_00'] < 590) - self.assertTrue(0.1 < best['axis_01'] < 0.8) - self.assertTrue(4.5 < best['axis_02'] < 6) + self.assertTrue(300 <= best['axis_00'] <= 800) + self.assertTrue(-1 <= best['axis_01'] <= 1) + self.assertTrue(0 <= best['axis_02'] <= 10) + + for status in df['status']: + self.assertTrue(status) + for loss in df['losses']: + self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_optunasolver.py b/hyppopy/tests/test_optunasolver.py index 4333544..523dba1 100644 --- a/hyppopy/tests/test_optunasolver.py +++ b/hyppopy/tests/test_optunasolver.py @@ -1,66 +1,71 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import unittest import matplotlib.pylab as plt from hyppopy.solver.OptunaSolver import * from hyppopy.VirtualFunction import VirtualFunction from hyppopy.HyppopyProject import HyppopyProject class OptunaSolverTestSuite(unittest.TestCase): def setUp(self): pass def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [300, 800], "type": "float" }, "axis_01": { "domain": "normal", "data": [-1, 1], "type": "float" }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": "float" } }, "settings": { - "solver": {"max_iterations": 800}, + "solver": {"max_iterations": 100}, "custom": {} }} project = HyppopyProject(config) solver = OptunaSolver(project) vfunc = VirtualFunction() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() - self.assertTrue(570 < best['axis_00'] < 590) - self.assertTrue(0.1 < best['axis_01'] < 0.8) - self.assertTrue(4.5 < best['axis_02'] < 6) + self.assertTrue(300 <= best['axis_00'] <= 800) + self.assertTrue(-1 <= best['axis_01'] <= 1) + self.assertTrue(0 <= best['axis_02'] <= 10) + + for status in df['status']: + self.assertTrue(status) + for loss in df['losses']: + self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_optunitysolver.py b/hyppopy/tests/test_optunitysolver.py index 4d0bc9b..287df1e 100644 --- a/hyppopy/tests/test_optunitysolver.py +++ b/hyppopy/tests/test_optunitysolver.py @@ -1,66 +1,71 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import unittest import matplotlib.pylab as plt from hyppopy.solver.OptunitySolver import * from hyppopy.VirtualFunction import VirtualFunction from hyppopy.HyppopyProject import HyppopyProject class OptunitySolverTestSuite(unittest.TestCase): def setUp(self): pass def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [300, 800], "type": "float" }, "axis_01": { "domain": "normal", "data": [-1, 1], "type": "float" }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": "float" } }, "settings": { - "solver": {"max_iterations": 800}, + "solver": {"max_iterations": 100}, "custom": {} }} project = HyppopyProject(config) solver = OptunitySolver(project) vfunc = VirtualFunction() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() - self.assertTrue(570 < best['axis_00'] < 590) - self.assertTrue(0.1 < best['axis_01'] < 0.8) - self.assertTrue(4.5 < best['axis_02'] < 6) + self.assertTrue(300 <= best['axis_00'] <= 800) + self.assertTrue(-1 <= best['axis_01'] <= 1) + self.assertTrue(0 <= best['axis_02'] <= 10) + + for status in df['status']: + self.assertTrue(status) + for loss in df['losses']: + self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_randomsearchsolver.py b/hyppopy/tests/test_randomsearchsolver.py index 6e18dd0..0958798 100644 --- a/hyppopy/tests/test_randomsearchsolver.py +++ b/hyppopy/tests/test_randomsearchsolver.py @@ -1,132 +1,137 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # # Author: Sven Wanner (s.wanner@dkfz.de) import unittest import matplotlib.pylab as plt from hyppopy.solver.RandomsearchSolver import * from hyppopy.VirtualFunction import VirtualFunction from hyppopy.HyppopyProject import HyppopyProject class RandomsearchTestSuite(unittest.TestCase): def setUp(self): pass def test_draw_uniform_sample(self): param = {"data": [0, 1, 10], "type": "float"} values = [] for i in range(10000): values.append(draw_uniform_sample(param)) self.assertTrue(0 <= values[-1] <= 1) self.assertTrue(isinstance(values[-1], float)) hist = plt.hist(values, bins=10, normed=True) std = np.std(hist[0]) mean = np.mean(hist[0]) self.assertTrue(std < 0.05) self.assertTrue(0.9 < mean < 1.1) param = {"data": [0, 10, 11], "type": "int"} values = [] for i in range(10000): values.append(draw_uniform_sample(param)) self.assertTrue(0 <= values[-1] <= 10) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=11, normed=True) std = np.std(hist[0]) mean = np.mean(hist[0]) self.assertTrue(std < 0.05) self.assertTrue(0.09 < mean < 0.11) def test_draw_normal_sample(self): param = {"data": [0, 10, 11], "type": "int"} values = [] for i in range(10000): values.append(draw_normal_sample(param)) self.assertTrue(0 <= values[-1] <= 10) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=11, normed=True) for i in range(1, 5): self.assertTrue(hist[0][i-1]-hist[0][i] < 0) for i in range(5, 10): self.assertTrue(hist[0][i] - hist[0][i+1] > 0) def test_draw_loguniform_sample(self): param = {"data": [1, 1000, 11], "type": "float"} values = [] for i in range(10000): values.append(draw_loguniform_sample(param)) self.assertTrue(1 <= values[-1] <= 1000) self.assertTrue(isinstance(values[-1], float)) hist = plt.hist(values, bins=11, normed=True) for i in range(4): self.assertTrue(hist[0][i] > hist[0][i+1]) self.assertTrue((hist[0][i] - hist[0][i+1]) > 0) def test_draw_categorical_sample(self): param = {"data": [1, 2, 3], "type": int} values = [] for i in range(10000): values.append(draw_categorical_sample(param)) self.assertTrue(values[-1] == 1 or values[-1] == 2 or values[-1] == 3) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=3, normed=True) for i in range(3): self.assertTrue(0.45 < hist[0][i] < 0.55) def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [300, 800], "type": "float" }, "axis_01": { "domain": "uniform", "data": [-1, 1], "type": "float" }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": "float" } }, "settings": { - "solver": {"max_iterations": 8000}, + "solver": {"max_iterations": 100}, "custom": {} }} project = HyppopyProject(config) solver = RandomsearchSolver(project) vfunc = VirtualFunction() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() - self.assertTrue(570 < best['axis_00'] < 590) - self.assertTrue(0.1 < best['axis_01'] < 0.8) - self.assertTrue(4.5 < best['axis_02'] < 6) + self.assertTrue(300 <= best['axis_00'] <= 800) + self.assertTrue(-1 <= best['axis_01'] <= 1) + self.assertTrue(0 <= best['axis_02'] <= 10) + + for status in df['status']: + self.assertTrue(status) + for loss in df['losses']: + self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main()