diff --git a/examples/tutorial_multisolver.py b/examples/tutorial_multisolver.py index dc1d666..afda236 100644 --- a/examples/tutorial_multisolver.py +++ b/examples/tutorial_multisolver.py @@ -1,180 +1,174 @@ # In this tutorial we solve an optimization problem using the Hyperopt Solver (http://hyperopt.github.io/hyperopt/). # Hyperopt uses a Baysian - Tree Parzen Estimator - Optimization approach, which means that each iteration computes a # new function value of the blackbox, interpolates a guess for the whole energy function and predicts a point to # compute the next function value at. This next point is not necessarily a "better" value, it's only the value with # the highest uncertainty for the function interpolation. # # See a visual explanation e.g. here (http://philipperemy.github.io/visualization/) # import the HyppopyProject class keeping track of inputs from hyppopy.HyppopyProject import HyppopyProject # import the SolverPool singleton class from hyppopy.SolverPool import SolverPool # import the Blackboxfunction class wrapping your problem for Hyppopy from hyppopy.BlackboxFunction import BlackboxFunction # Next step is defining the problem space and all settings Hyppopy needs to optimize your problem. # The config is a simple nested dictionary with two obligatory main sections, hyperparameter and settings. # The hyperparameter section defines your searchspace. Each hyperparameter is again a dictionary with: # # - a domain ['categorical', 'uniform', 'normal', 'loguniform'] # - the domain data [left bound, right bound] and # - a type of your domain ['str', 'int', 'float'] # # The settings section has two subcategories, solver and custom. The first contains settings for the solver, # here 'max_iterations' - is the maximum number of iteration. # # The custom section allows defining custom parameter. An entry here is transformed to a member variable of the # HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script. # Here we use it as a solver switch to control the usage of our solver via the config. This means with the script # below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',... # It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is # ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so: # 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals. config = { "hyperparameter": { "C": { "domain": "uniform", "data": [0.0001, 20], "type": "float" }, "gamma": { - "domain": "normal", + "domain": "uniform", "data": [0.0001, 20.0], "type": "float" }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": "str" }, "decision_function_shape": { "domain": "categorical", "data": ["ovo", "ovr"], "type": "str" } }, "settings": { "solver": { - "max_iterations": 500 + "max_iterations": 200 }, "custom": { "use_solver": "hyperopt" } }} # When creating a HyppopyProject instance we # pass the config dictionary to the constructor. project = HyppopyProject(config=config) # demonstration of the custom parameter access print("-"*30) print("max_iterations:\t{}".format(project.solver_max_iterations)) print("solver chosen -> {}".format(project.custom_use_solver)) print("-"*30) -# Hyppopy offers a class called BlackboxFunction to wrap your problem for Hyppopy. -# The function signature is as follows: +# The BlackboxFunction signature is as follows: # BlackboxFunction(blackbox_func=None, # dataloader_func=None, # preprocess_func=None, # callback_func=None, # data=None, # **kwargs) -# -# Means we can set a couple of function pointers, a data object and an arbitrary number of custom parameter via kwargs. -# -# - blackbox_func: a function pointer to the actual, user defined, blackbox function that is computing our loss -# - dataloader_func: a function pointer to a function handling the dataloading -# - preprocess_func: a function pointer to a function automatically executed before starting the optimization process -# - callback_func: a function pointer to a function that is called after each iteration with the trail object as input -# - data: setting data can be done via dataloader_func or directly -# - kwargs are passed to all functions above and thus can be used for parameter sharing between the functions -# -# (more details see in the documentation) -# -# Below we demonstrate the usage of all the above by defining a my_dataloader_function which in fact only grabs the -# iris dataset from sklearn and returns it. A my_preprocess_function which also does nothing useful here but -# demonstrating that a custom parameter can be set via kwargs and used in all of our functions when called within -# Hyppopy. The my_callback_function gets as input the dictionary containing the state of the iteration and thus can be -# used to access the current state of each solver iteration. Finally we define the actual loss_function -# my_loss_function, which gets as input a data object and params. Both parameter are fixed, the first is defined by -# the user depending on what is dataloader returns or the data object set in the constructor, the second is a dictionary -# with a sample of your hyperparameter space which content is in the choice of the solver. +# +# - blackbox_func: a function pointer to the users loss function +# - dataloader_func: a function pointer for handling dataloading. The function is called once before +# optimizing. What it returns is passed as first argument to your loss functions +# data argument. +# - preprocess_func: a function pointer for data preprocessing. The function is called once before +# optimizing and gets via kwargs['data'] the raw data object set directly or returned +# from dataloader_func. What this function returns is then what is passed as first +# argument to your loss function. +# - callback_func: a function pointer called after each iteration. The input kwargs is a dictionary +# keeping the parameters used in this iteration, the 'iteration' index, the 'loss' +# and the 'status'. The function in this example is used for realtime printing it's +# input but can also be used for realtime visualization. +# - data: if not done via dataloader_func one can set a raw_data object directly +# - kwargs: dict that whose content is passed to all functions above. from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score def my_dataloader_function(**kwargs): print("Dataloading...") # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) iris_data = load_iris() return [iris_data.data, iris_data.target] def my_preprocess_function(**kwargs): print("Preprocessing...") # kwargs['data'] allows accessing the input data print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n") # if the preprocessing function returns something, # the input data will be replaced with the data returned by this function. x = kwargs['data'][0] y = kwargs['data'][1] for i in range(x.shape[0]): x[i, :] += kwargs['params']['my_preproc_param'] return [x, y] def my_callback_function(**kwargs): print("\r{}".format(kwargs), end="") def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() # We now create the BlackboxFunction object and pass all function pointers defined above, # as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes. blackbox = BlackboxFunction(blackbox_func=my_loss_function, dataloader_func=my_dataloader_function, preprocess_func=my_preprocess_function, callback_func=my_callback_function, my_preproc_param=1, my_dataloader_input='could/be/a/path') # Last step, is we use our SolverPool which automatically returns the correct solver. # There are multiple ways to get the desired solver from the solver pool. # 1. solver = SolverPool.get('hyperopt') # solver.project = project # 2. solver = SolverPool.get('hyperopt', project) # 3. The SolverPool will look for the field 'use_solver' in the project instance, if # it is present it will be used to specify the solver so that in this case it is enough # to pass the project instance. solver = SolverPool.get(project=project) # Give the solver your blackbox and run it. After execution we can get the result # via get_result() which returns a pandas dataframe containing the complete history # The dict best contains the best parameter set. solver.blackbox = blackbox solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) diff --git a/hyppopy/Solver/BayesOptSolver.py b/hyppopy/Solver/BayesOptSolver.py index b0c8173..bed6e7a 100644 --- a/hyppopy/Solver/BayesOptSolver.py +++ b/hyppopy/Solver/BayesOptSolver.py @@ -1,117 +1,84 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os -import copy import logging -import datetime import warnings import numpy as np from pprint import pformat from hyperopt import Trials from bayes_opt import BayesianOptimization from hyppopy.globals import DEBUGLEVEL -from hyppopy.BlackboxFunction import BlackboxFunction from hyppopy.solver.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class BayesOptSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None self._idx = None def reformat_parameter(self, params): out_params = {} for name, value in params.items(): if self._searchspace[name]["domain"] == "categorical": out_params[name] = self._searchspace[name]["data"][int(np.round(value))] else: if self._searchspace[name]["type"] == "int": out_params[name] = int(np.round(value)) else: out_params[name] = value return out_params - def loss_function(self, **params): - self._idx += 1 + def loss_function_call(self, trial, params): params = self.reformat_parameter(params) - vals = {} - idx = {} - for key, value in params.items(): - vals[key] = [value] - idx[key] = [self._idx] - trial = {'tid': self._idx, - 'result': {'loss': None, 'status': 'ok'}, - 'misc': { - 'tid': self._idx, - 'idxs': idx, - 'vals': vals - }, - 'book_time': datetime.datetime.now(), - 'refresh_time': None - } - try: - loss = self.blackbox(**params) - trial['result']['loss'] = loss - trial['result']['status'] = 'ok' - except Exception as e: - LOG.error("computing loss failed due to:\n {}".format(e)) - loss = np.nan - trial['result']['loss'] = np.nan - trial['result']['status'] = 'failed' - trial['refresh_time'] = datetime.datetime.now() - self._trials.trials.append(trial) - if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: - cbd = copy.deepcopy(params) - cbd['iterations'] = self._idx - cbd['loss'] = loss - cbd['status'] = trial['result']['status'] - self.blackbox.callback_func(**cbd) - return loss + for key in params.keys(): + if self.project.get_typeof(key) is int: + params[key] = int(round(params[key])) + return self.blackbox(**params) def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self.trials = Trials() self._idx = 0 try: optimizer = BayesianOptimization(f=self.loss_function, pbounds=searchspace, verbose=0) optimizer.maximize(init_points=2, n_iter=self.max_iterations) self.best = self.reformat_parameter(optimizer.max["params"]) except Exception as e: LOG.error("internal error in bayes_opt maximize occured. {}".format(e)) raise BrokenPipeError("internal error in bayes_opt maximize occured. {}".format(e)) def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) self._searchspace = hyperparameter pbounds = {} for name, param in hyperparameter.items(): if param["domain"] != "categorical": if param["domain"] != "uniform": msg = "Warning: BayesOpt cannot handle {} domain. Only uniform and categorical domains are supported!".format( param["domain"]) warnings.warn(msg) LOG.warning(msg) pbounds[name] = (param["data"][0], param["data"][1]) else: pbounds[name] = (0, len(param["data"])-1) return pbounds diff --git a/hyppopy/Solver/GridsearchSolver.py b/hyppopy/Solver/GridsearchSolver.py index c5bb053..3946ca4 100644 --- a/hyppopy/Solver/GridsearchSolver.py +++ b/hyppopy/Solver/GridsearchSolver.py @@ -1,223 +1,182 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os -import copy import logging -import datetime import numpy as np from pprint import pformat -from hyperopt import Trials from scipy.stats import norm from itertools import product from hyppopy.globals import DEBUGLEVEL from hyppopy.solver.HyppopySolver import HyppopySolver -from hyppopy.BlackboxFunction import BlackboxFunction LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def get_uniform_axis_sample(a, b, N, dtype): """ returns a uniform sample x(n) in the range [a,b] sampled at N pojnts :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" assert isinstance(dtype, str), "condition type of type str violated!" if dtype == "int": return list(np.linspace(a, b, N).astype(int)) elif dtype == "float" or dtype == "double": return list(np.linspace(a, b, N)) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) def get_norm_cdf(N): """ returns a normed gaussian cdf (range [0,1]) with N sampling points :param N: sampling points :return: [ndarray] gaussian cdf function values """ assert isinstance(N, int), "condition N of type int violated!" even = True if N % 2 != 0: N -= 1 even = False N = int(N/2) sigma = 1/3 x = np.linspace(0, 1, N) y1 = norm.cdf(x, loc=0, scale=sigma)-0.5 if not even: y1 = np.append(y1, [0.5]) y2 = 1-(norm.cdf(x, loc=0, scale=sigma)-0.5) y2 = np.flip(y2, axis=0) y = np.concatenate((y1, y2), axis=0) return y def get_gaussian_axis_sample(a, b, N, dtype): """ returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" assert isinstance(dtype, str), "condition type of type str violated!" data = [] for n in range(N): x = a + get_norm_cdf(N)[n]*(b-a) if dtype == "int": data.append(int(x)) elif dtype == "float" or dtype == "double": data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data def get_logarithmic_axis_sample(a, b, N, dtype): """ returns a function value f(n) where f is logarithmic function e^x sampling the exponent range [log(a), log(b)] linear at N sampling points. The function values returned are in the range [a, b]. :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert a > 0, "condition a > 0 violated!" assert isinstance(N, int), "condition N of type int violated!" assert isinstance(dtype, str), "condition type of type str violated!" # convert input range into exponent range lexp = np.log(a) rexp = np.log(b) exp_range = np.linspace(lexp, rexp, N) data = [] for n in range(exp_range.shape[0]): x = np.exp(exp_range[n]) if dtype == "int": data.append(int(x)) elif dtype == "float" or dtype == "double": data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data class GridsearchSolver(HyppopySolver): """ The GridsearchSolver class implements a gridsearch optimization. The gridsearch supports categorical, uniform, normal and loguniform sampling. To use the GridsearchSolver, besides a range, one must specifiy the number of samples in the domain, e.g. 'data': [0, 1, 100] """ def __init__(self, project=None): HyppopySolver.__init__(self, project) - self._tid = None self._has_maxiteration_field = False - def loss_function(self, params): - loss = None - vals = {} - idx = {} - for key, value in params.items(): - vals[key] = [value] - idx[key] = [self._tid] - trial = {'tid': self._tid, - 'result': {'loss': None, 'status': 'ok'}, - 'misc': { - 'tid': self._tid, - 'idxs': idx, - 'vals': vals - }, - 'book_time': datetime.datetime.now(), - 'refresh_time': None - } - try: - loss = self.blackbox(**params) - if loss is None: - trial['result']['loss'] = np.nan - trial['result']['status'] = 'failed' - else: - trial['result']['loss'] = loss - except Exception as e: - LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) - trial['result']['loss'] = np.nan - trial['result']['status'] = 'failed' - trial['refresh_time'] = datetime.datetime.now() - self._trials.trials.append(trial) - if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: - cbd = copy.deepcopy(params) - cbd['iterations'] = self._tid + 1 - cbd['loss'] = loss - cbd['status'] = trial['result']['status'] - self.blackbox.callback_func(**cbd) - return + def loss_function_call(self, trial, params): + loss = self.blackbox(**params) + if loss is None: + return np.nan + return loss def execute_solver(self, searchspace): - self._tid = 0 - self._trials = Trials() - for x in product(*searchspace[1]): params = {} for name, value in zip(searchspace[0], x): params[name] = value try: - self.loss_function(params) - self._tid += 1 + self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): """ the function converts the standard parameter input into a range list depending on the domain. These rangelists are later used with itertools product to create a paramater space sample of each combination. :param hyperparameter: [dict] hyperparameter space :return: [list] name and range for each parameter space axis """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) searchspace = [[], []] for name, param in hyperparameter.items(): if param["domain"] == "categorical": searchspace[0].append(name) searchspace[1].append(param["data"]) elif param["domain"] == "uniform": searchspace[0].append(name) searchspace[1].append(get_uniform_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) elif param["domain"] == "normal": searchspace[0].append(name) searchspace[1].append(get_gaussian_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) elif param["domain"] == "loguniform": searchspace[0].append(name) searchspace[1].append(get_logarithmic_axis_sample(param["data"][0], param["data"][1], param["data"][2], param["type"])) return searchspace diff --git a/hyppopy/Solver/HyppopySolver.py b/hyppopy/Solver/HyppopySolver.py index ec4459f..bc08ddb 100644 --- a/hyppopy/Solver/HyppopySolver.py +++ b/hyppopy/Solver/HyppopySolver.py @@ -1,231 +1,278 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import abc import os +import copy import types import logging import datetime import numpy as np import pandas as pd +from hyperopt import Trials from hyppopy.globals import DEBUGLEVEL from hyppopy.HyppopyProject import HyppopyProject from hyppopy.BlackboxFunction import BlackboxFunction from hyppopy.VirtualFunction import VirtualFunction from hyppopy.globals import DEBUGLEVEL, DEFAULTITERATIONS LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyppopySolver(object): def __init__(self, project=None): + self._idx = None self._best = None self._trials = None self._blackbox = None self._max_iterations = None self._project = project self._total_duration = None self._solver_overhead = None self._time_per_iteration = None self._accumulated_blackbox_time = None self._has_maxiteration_field = True @abc.abstractmethod def execute_solver(self, searchspace): raise NotImplementedError('users must define execute_solver to use this class') @abc.abstractmethod def convert_searchspace(self, hyperparameter): raise NotImplementedError('users must define convert_searchspace to use this class') + @abc.abstractmethod + def loss_function_call(self): + raise NotImplementedError('users must define convert_searchspace to use this class') + + def loss_function(self, **params): + self._idx += 1 + vals = {} + idx = {} + for key, value in params.items(): + vals[key] = [value] + idx[key] = [self._idx] + trial = {'tid': self._idx, + 'result': {'loss': None, 'status': 'ok'}, + 'misc': { + 'tid': self._idx, + 'idxs': idx, + 'vals': vals + }, + 'book_time': datetime.datetime.now(), + 'refresh_time': None + } + try: + loss = self.loss_function_call(trial, params) + trial['result']['loss'] = loss + trial['result']['status'] = 'ok' + if loss == np.nan: + trial['result']['status'] = 'failed' + except Exception as e: + LOG.error("computing loss failed due to:\n {}".format(e)) + loss = np.nan + trial['result']['loss'] = np.nan + trial['result']['status'] = 'failed' + trial['refresh_time'] = datetime.datetime.now() + self._trials.trials.append(trial) + if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: + cbd = copy.deepcopy(params) + cbd['iterations'] = self._idx + cbd['loss'] = loss + cbd['status'] = trial['result']['status'] + self.blackbox.callback_func(**cbd) + return loss + def run(self, print_stats=True): + self._idx = 0 + self.trials = Trials() if self._has_maxiteration_field: if 'solver_max_iterations' not in self.project.__dict__: msg = "Missing max_iteration entry in project, use default {}!".format(DEFAULTITERATIONS) LOG.warning(msg) print("WARNING: {}".format(msg)) setattr(self.project, 'solver_max_iterations', DEFAULTITERATIONS) self._max_iterations = self.project.solver_max_iterations start_time = datetime.datetime.now() try: search_space = self.convert_searchspace(self.project.hyperparameter) except Exception as e: msg = "Failed to convert searchspace, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) try: self.execute_solver(search_space) except Exception as e: msg = "Failed to execute solver, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) end_time = datetime.datetime.now() dt = end_time - start_time days = divmod(dt.total_seconds(), 86400) hours = divmod(days[1], 3600) minutes = divmod(hours[1], 60) seconds = divmod(minutes[1], 1) milliseconds = divmod(seconds[1], 0.001) self._total_duration = [int(days[0]), int(hours[0]), int(minutes[0]), int(seconds[0]), int(milliseconds[0])] if print_stats: self.print_best() self.print_timestats() def get_results(self): results = {'duration': [], 'losses': []} pset = self.trials.trials[0]['misc']['vals'] for p in pset.keys(): results[p] = [] for n, trial in enumerate(self.trials.trials): t1 = trial['book_time'] t2 = trial['refresh_time'] results['duration'].append((t2 - t1).microseconds / 1000.0) results['losses'].append(trial['result']['loss']) losses = np.array(results['losses']) results['losses'] = list(losses) pset = trial['misc']['vals'] for p in pset.items(): results[p[0]].append(p[1][0]) return pd.DataFrame.from_dict(results), self.best def print_best(self): print("\n") print("#" * 40) print("### Best Parameter Choice ###") print("#" * 40) for name, value in self.best.items(): print(" - {}\t:\t{}".format(name, value)) print("\n - number of iterations\t:\t{}".format(self.trials.trials[-1]['tid']+1)) print(" - total time\t:\t{}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) def compute_time_statistics(self): dts = [] for trial in self._trials.trials: if 'book_time' in trial.keys() and 'refresh_time' in trial.keys(): dt = trial['refresh_time'] - trial['book_time'] dts.append(dt.total_seconds()) self._time_per_iteration = np.mean(dts) * 1e3 self._accumulated_blackbox_time = np.sum(dts) * 1e3 tmp = self.total_duration - self._accumulated_blackbox_time self._solver_overhead = int(np.round(100.0 / (self.total_duration+1e-12) * tmp)) def print_timestats(self): print("\n") print("#" * 40) print("### Timing Statistics ###") print("#" * 40) print(" - per iteration: {}ms".format(int(self.time_per_iteration*1e4)/10000)) print(" - total time: {}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) print(" - solver overhead: {}%".format(self.solver_overhead)) @property def project(self): return self._project @project.setter def project(self, value): if not isinstance(value, HyppopyProject): msg = "Input error, project_manager of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) self._project = value @property def blackbox(self): return self._blackbox @blackbox.setter def blackbox(self, value): if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction) or isinstance(value, VirtualFunction): self._blackbox = value else: self._blackbox = None msg = "Input error, blackbox of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) @property def best(self): return self._best @best.setter def best(self, value): if not isinstance(value, dict): msg = "Input error, best of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) self._best = value @property def trials(self): return self._trials @trials.setter def trials(self, value): self._trials = value @property def max_iterations(self): return self._max_iterations @max_iterations.setter def max_iterations(self, value): if not isinstance(value, int): msg = "Input error, max_iterations of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) if value < 1: msg = "Precondition violation, max_iterations < 1!" LOG.error(msg) raise IOError(msg) self._max_iterations = value @property def total_duration(self): return (self._total_duration[0]*86400 + self._total_duration[1] * 3600 + self._total_duration[2] * 60 + self._total_duration[3]) * 1000 + self._total_duration[4] @property def solver_overhead(self): if self._solver_overhead is None: self.compute_time_statistics() return self._solver_overhead @property def time_per_iteration(self): if self._time_per_iteration is None: self.compute_time_statistics() return self._time_per_iteration @property def accumulated_blackbox_time(self): if self._accumulated_blackbox_time is None: self.compute_time_statistics() return self._accumulated_blackbox_time diff --git a/hyppopy/Solver/OptunaSolver.py b/hyppopy/Solver/OptunaSolver.py index d27f4b9..dcbcaf8 100644 --- a/hyppopy/Solver/OptunaSolver.py +++ b/hyppopy/Solver/OptunaSolver.py @@ -1,120 +1,82 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os -import copy import optuna import logging -import datetime import warnings import numpy as np from pprint import pformat -from hyperopt import Trials from hyppopy.globals import DEBUGLEVEL -from hyppopy.BlackboxFunction import BlackboxFunction from hyppopy.solver.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class OptunaSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None - self._idx = None def reformat_parameter(self, params): out_params = {} for name, value in params.items(): if self._searchspace[name]["domain"] == "categorical": out_params[name] = self._searchspace[name]["data"][int(np.round(value))] else: if self._searchspace[name]["type"] == "int": out_params[name] = int(np.round(value)) else: out_params[name] = value return out_params def trial_cache(self, trial): - self._idx += 1 - params = {} for name, param in self._searchspace.items(): if param["domain"] == "categorical": params[name] = trial.suggest_categorical(name, param["data"]) else: params[name] = trial.suggest_uniform(name, param["data"][0], param["data"][1]) return self.loss_function(**params) - def loss_function(self, **params): - vals = {} - idx = {} - for key, value in params.items(): - vals[key] = [value] - idx[key] = [self._idx] - trial = {'tid': self._idx, - 'result': {'loss': None, 'status': 'ok'}, - 'misc': { - 'tid': self._idx, - 'idxs': idx, - 'vals': vals - }, - 'book_time': datetime.datetime.now(), - 'refresh_time': None - } - try: - loss = self.blackbox(**params) - trial['result']['loss'] = loss - trial['result']['status'] = 'ok' - except Exception as e: - LOG.error("computing loss failed due to:\n {}".format(e)) - loss = np.nan - trial['result']['loss'] = np.nan - trial['result']['status'] = 'failed' - trial['refresh_time'] = datetime.datetime.now() - self._trials.trials.append(trial) - if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: - cbd = copy.deepcopy(params) - cbd['iterations'] = self._idx - cbd['loss'] = loss - cbd['status'] = trial['result']['status'] - self.blackbox.callback_func(**cbd) - return loss + def loss_function_call(self, trial, params): + for key in params.keys(): + if self.project.get_typeof(key) is int: + params[key] = int(round(params[key])) + return self.blackbox(**params) def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self._searchspace = searchspace - self.trials = Trials() - self._idx = 0 try: study = optuna.create_study() study.optimize(self.trial_cache, n_trials=self.max_iterations) self.best = study.best_trial.params except Exception as e: LOG.error("internal error in bayes_opt maximize occured. {}".format(e)) raise BrokenPipeError("internal error in bayes_opt maximize occured. {}".format(e)) def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) for name, param in hyperparameter.items(): if param["domain"] != "categorical" and param["domain"] != "uniform": msg = "Warning: Optuna cannot handle {} domain. Only uniform and categorical domains are supported!".format(param["domain"]) warnings.warn(msg) LOG.warning(msg) return hyperparameter diff --git a/hyppopy/Solver/OptunitySolver.py b/hyppopy/Solver/OptunitySolver.py index 6e018d5..379ea7d 100644 --- a/hyppopy/Solver/OptunitySolver.py +++ b/hyppopy/Solver/OptunitySolver.py @@ -1,139 +1,98 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os -import copy import logging import optunity -import datetime import warnings -import numpy as np from pprint import pformat -from hyperopt import Trials from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from hyppopy.solver.HyppopySolver import HyppopySolver -from hyppopy.BlackboxFunction import BlackboxFunction class OptunitySolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._solver_info = None self.opt_trials = None - self._idx = None - def loss_function(self, **params): - self._idx += 1 - vals = {} - idx = {} - for key, value in params.items(): - vals[key] = [value] - idx[key] = [self._idx] - trial = {'tid': self._idx, - 'result': {'loss': None, 'status': 'ok'}, - 'misc': { - 'tid': self._idx, - 'idxs': idx, - 'vals': vals - }, - 'book_time': datetime.datetime.now(), - 'refresh_time': None - } - try: - for key in params.keys(): - if self.project.get_typeof(key) is int: - params[key] = int(round(params[key])) - loss = self.blackbox(**params) - trial['result']['loss'] = loss - trial['result']['status'] = 'ok' - except Exception as e: - LOG.error("computing loss failed due to:\n {}".format(e)) - loss = np.nan - trial['result']['loss'] = np.nan - trial['result']['status'] = 'failed' - trial['refresh_time'] = datetime.datetime.now() - self._trials.trials.append(trial) - if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: - cbd = copy.deepcopy(params) - cbd['iterations'] = self._idx - cbd['loss'] = loss - cbd['status'] = trial['result']['status'] - self.blackbox.callback_func(**cbd) - return loss + def loss_function_call(self, trial, params): + for key in params.keys(): + if self.project.get_typeof(key) is int: + params[key] = int(round(params[key])) + return self.blackbox(**params) def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) - self.trials = Trials() - self._idx = 0 try: - self.best, self.opt_trials, self._solver_info = optunity.minimize_structured(f=self.loss_function, - num_evals=self.max_iterations, - search_space=searchspace) + self.best, _, _ = optunity.minimize_structured(f=self.loss_function, + num_evals=self.max_iterations, + search_space=searchspace) except Exception as e: LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) def split_categorical(self, pdict): categorical = {} uniform = {} for name, pset in pdict.items(): for key, value in pset.items(): if key == 'domain' and value == 'categorical': categorical[name] = pset elif key == 'domain': if value != 'uniform': msg = "Warning: Optunity cannot handle {} domain. Only uniform and categorical domains are supported!".format(value) warnings.warn(msg) LOG.warning(msg) uniform[name] = pset return categorical, uniform def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) solution_space = {} # split input in categorical and non-categorical data cat, uni = self.split_categorical(hyperparameter) # build up dictionary keeping all non-categorical data uniforms = {} for key, value in uni.items(): for key2, value2 in value.items(): if key2 == 'data': if len(value2) == 3: uniforms[key] = value2[0:2] elif len(value2) == 2: uniforms[key] = value2 else: raise AssertionError("precondition violation, optunity searchspace needs list with left and right range bounds!") if len(cat) == 0: return uniforms # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} tmp2 = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level tmp2[key] = tmp inner_level = tmp2 solution_space = tmp2 return solution_space diff --git a/hyppopy/Solver/RandomsearchSolver.py b/hyppopy/Solver/RandomsearchSolver.py index 09c1a75..5cd3aed 100644 --- a/hyppopy/Solver/RandomsearchSolver.py +++ b/hyppopy/Solver/RandomsearchSolver.py @@ -1,198 +1,159 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import random import logging -import datetime import numpy as np from pprint import pformat -from hyperopt import Trials from hyppopy.globals import DEBUGLEVEL from hyppopy.solver.HyppopySolver import HyppopySolver -from hyppopy.BlackboxFunction import BlackboxFunction LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def draw_uniform_sample(param): """ function draws a random sample from a uniform range :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ assert param['type'] != 'str', "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" s = random.random() s *= np.abs(param['data'][1] - param['data'][0]) s += param['data'][0] if param['type'] == 'int': s = int(np.round(s)) if s < param['data'][0]: s = int(param['data'][0]) if s > param['data'][1]: s = int(param['data'][1]) return s def draw_normal_sample(param): """ function draws a random sample from a normal distributed range :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ assert param['type'] != 'str', "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" mu = (param['data'][1] - param['data'][0]) / 2 sigma = mu / 3 s = np.random.normal(loc=param['data'][0] + mu, scale=sigma) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] s = float(s) if param["type"] == "int": s = int(np.round(s)) return s def draw_loguniform_sample(param): """ function draws a random sample from a logarithmic distributed range :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ assert param['type'] != 'str', "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" p = copy.deepcopy(param) p['data'][0] = np.log(param['data'][0]) p['data'][1] = np.log(param['data'][1]) assert p['data'][0] is not np.nan, "Precondition violation, left bound input error, results in nan!" assert p['data'][1] is not np.nan, "Precondition violation, right bound input error, results in nan!" x = draw_uniform_sample(p) s = np.exp(x) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] return s def draw_categorical_sample(param): """ function draws a random sample from a categorical list :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ return random.sample(param['data'], 1)[0] def draw_sample(param): """ function draws a sample from the input hyperparameter descriptor depending on it's domain :param param: [dict] input hyperparameter discription :return: random sample value of type data['type'] """ assert isinstance(param, dict), "input error, hyperparam descriptors of type {} not allowed!".format(type(param)) assert 'domain' in param.keys(), "input error, hyperparam descriptors need a domain key!" assert 'data' in param.keys(), "input error, hyperparam descriptors need a data key!" assert 'type' in param.keys(), "input error, hyperparam descriptors need a type key!" if param['domain'] == "uniform": return draw_uniform_sample(param) elif param['domain'] == "normal": return draw_normal_sample(param) elif param['domain'] == "loguniform": return draw_loguniform_sample(param) elif param['domain'] == "categorical": return draw_categorical_sample(param) else: raise LookupError("Unknown domain {}".format(param['domain'])) class RandomsearchSolver(HyppopySolver): """ The RandomsearchSolver class implements a randomsearch optimization. The randomsearch supports categorical, uniform, normal and loguniform sampling. The solver draws an independent sample from the parameter space each iteration.""" def __init__(self, project=None): HyppopySolver.__init__(self, project) - self._tid = None - - def loss_function(self, params): - loss = None - vals = {} - idx = {} - for key, value in params.items(): - vals[key] = [value] - idx[key] = [self._tid] - trial = {'tid': self._tid, - 'result': {'loss': None, 'status': 'ok'}, - 'misc': { - 'tid': self._tid, - 'idxs': idx, - 'vals': vals - }, - 'book_time': datetime.datetime.now(), - 'refresh_time': None - } - try: - loss = self.blackbox(**params) - if loss is None: - trial['result']['loss'] = np.nan - trial['result']['status'] = 'failed' - else: - trial['result']['loss'] = loss - except Exception as e: - LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) - trial['result']['loss'] = np.nan - trial['result']['status'] = 'failed' - trial['refresh_time'] = datetime.datetime.now() - self._trials.trials.append(trial) - if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: - cbd = copy.deepcopy(params) - cbd['iterations'] = self._tid + 1 - cbd['loss'] = loss - cbd['status'] = trial['result']['status'] - self.blackbox.callback_func(**cbd) - return + + def loss_function_call(self, trial, params): + loss = self.blackbox(**params) + if loss is None: + return np.nan + return loss def execute_solver(self, searchspace): - self._tid = 0 - self._trials = Trials() N = self.max_iterations try: for n in range(N): params = {} for name, p in searchspace.items(): params[name] = draw_sample(p) - self.loss_function(params) - self._tid += 1 + self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): """ this function simply pipes the input parameter through, the sample drawing functions are responsible for interpreting the parameter. :param hyperparameter: [dict] hyperparameter space :return: [dict] hyperparameter space """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) return hyperparameter