diff --git a/examples/solver_comparison.py b/examples/solver_comparison.py index 4085951..0fe92f6 100644 --- a/examples/solver_comparison.py +++ b/examples/solver_comparison.py @@ -1,136 +1,181 @@ import os import pickle import numpy as np from math import pi -import pandas as pd +from pprint import pprint import matplotlib.pyplot as plt from hyppopy.SolverPool import SolverPool from hyppopy.HyppopyProject import HyppopyProject from hyppopy.VirtualFunction import VirtualFunction from hyppopy.BlackboxFunction import BlackboxFunction -def make_spider(results, row, title, groundtruth): - categories = ["axis_00", "axis_01", "axis_02", "axis_03", "axis_04"] +OUTPUTDIR = "C:\\Users\\s635r\\Desktop\\solver_comparison" +SOLVER = ["hyperopt", "optunity", "randomsearch", "optuna"]#, "bayesopt"] +ITERATIONS = [25, 100, 250, 500] +STATREPEATS = 10 +VFUNC = "5D3" +OVERWRITE = False + +OUTPUTDIR = os.path.join(OUTPUTDIR, VFUNC) +if not os.path.isdir(OUTPUTDIR): + os.makedirs(OUTPUTDIR) + + +def compute_deviation(solver_name, vfunc_id, iterations, N, fname): + project = HyppopyProject() + project.add_hyperparameter(name="axis_00", domain="uniform", data=[0, 1], dtype="float") + project.add_hyperparameter(name="axis_01", domain="uniform", data=[0, 1], dtype="float") + project.add_hyperparameter(name="axis_02", domain="uniform", data=[0, 1], dtype="float") + project.add_hyperparameter(name="axis_03", domain="uniform", data=[0, 1], dtype="float") + project.add_hyperparameter(name="axis_04", domain="uniform", data=[0, 1], dtype="float") + + vfunc = VirtualFunction() + vfunc.load_default(vfunc_id) + minima = vfunc.minima() + + def my_loss_function(data, params): + return vfunc(**params) + + blackbox = BlackboxFunction(data=[], blackbox_func=my_loss_function) + + results = {} + results["gt"] = [] + for mini in minima: + results["gt"].append(np.median(mini[0])) + + for iter in iterations: + results[iter] = {"minima": {}, "loss": None} + for i in range(vfunc.dims()): + results[iter]["minima"]["axis_0{}".format(i)] = [] + + project.add_settings(section="solver", name="max_iterations", value=iter) + project.add_settings(section="custom", name="use_solver", value=solver_name) + + solver = SolverPool.get(project=project) + solver.blackbox = blackbox + + axis_minima = [] + best_losses = [] + for i in range(vfunc.dims()): + axis_minima.append([]) + for n in range(N): + print("\rSolver={} iteration={} round={}".format(solver, iter, n), end="") + + solver.run(print_stats=False) + df, best = solver.get_results() + best_row = df['losses'].idxmin() + best_losses.append(df['losses'][best_row]) + for i in range(vfunc.dims()): + tmp = df['axis_0{}'.format(i)][best_row] + axis_minima[i].append(tmp) + for i in range(vfunc.dims()): + results[iter]["minima"]["axis_0{}".format(i)] = [np.mean(axis_minima[i]), np.std(axis_minima[i])] + results[iter]["loss"] = [np.mean(best_losses), np.std(best_losses)] + + file = open(fname, 'wb') + pickle.dump(results, file) + file.close() + + +def make_radarplot(results, title, fname=None): + gt = results.pop("gt") + categories = list(results[list(results.keys())[0]]["minima"].keys()) N = len(categories) angles = [n / float(N) * 2 * pi for n in range(N)] angles += angles[:1] - ax = plt.subplot(2, 2, row+1, polar=True, ) + ax = plt.subplot(1, 1, 1, polar=True, ) ax.set_theta_offset(pi / 2) ax.set_theta_direction(-1) plt.xticks(angles[:-1], categories, color='grey', size=8) ax.set_rlabel_position(0) plt.yticks([0.2, 0.4, 0.6, 0.8, 1.0], ["0.2", "0.4", "0.6", "0.8", "1.0"], color="grey", size=7) plt.ylim(0, 1) - gt = [] - for i in range(5): - gt.append(groundtruth[i]) gt += gt[:1] ax.fill(angles, gt, color=(0.2, 0.8, 0.2), alpha=0.2) - colors = [(0.8, 0.8, 0.0, 0.8), (0.7, 0.2, 0.2, 0.8), (0.2, 0.2, 0.7, 0.8)] - for iter, data in results["iteration"].items(): + colors = [] + cm = plt.get_cmap('Set1') + if len(results) > 2: + indices = list(range(0, len(results) + 1)) + indices.pop(2) + else: + indices = list(range(0, len(results))) + for i in range(len(results)): + colors.append(cm(indices[i])) + + for iter, data in results.items(): values = [] - for i in range(5): - values.append(data["axis_0{}".format(i)][row]) + for i in range(len(categories)): + values.append(data["minima"]["axis_0{}".format(i)][0]) values += values[:1] - ax.plot(angles, values, color=colors.pop(0), linewidth=2, linestyle='solid', label="iterations {}".format(iter)) + color = colors.pop(0) + ax.plot(angles, values, color=color, linewidth=2, linestyle='solid', label="iterations {}".format(iter)) - ax.plot(angles, gt, color=(0.2, 0.8, 0.2, 0.8), linewidth=2, linestyle='solid', label="groundtruth") plt.title(title, size=11, color=(0.1, 0.1, 0.1), y=1.1) - plt.legend(bbox_to_anchor=(0.2, 1.2)) - - - + plt.legend(bbox_to_anchor=(0.08, 1.12)) + if fname is None: + plt.show() + else: + plt.savefig(fname + ".png") + plt.savefig(fname + ".svg") + plt.clf() -for vfunc_id in ["5D3"]: - OUTPUTDIR = "C:\\Users\\s635r\\Desktop\\solver_comparison" - EXPERIMENT = {"iterations": [50, 150, 300], - "solver": ["randomsearch", "hyperopt", "optunity", "bayesopt"], - "repeat": 1, - "output_dir": os.path.join(OUTPUTDIR, vfunc_id)} - if not os.path.isdir(EXPERIMENT["output_dir"]): - os.makedirs(EXPERIMENT["output_dir"]) +def make_deviationerrorplot(fnames): + results = {} + for fname in fnames: + file = open(fname, 'rb') + result = pickle.load(file) + file.close() + results[os.path.basename(fname)] = result + pprint(results) + + plt.figure() + for iter in results["hyperopt"].keys(): + y = [] + if iter == "gt": + x = list(range(len(results["hyperopt"][iter]))) + for i in range(len(results["hyperopt"][iter])): + y.append(results["hyperopt"][iter][i]) + plt.plot(x, y, "--g", label="groundtruth: {}".format(iter)) + continue + + x = list(range(len(results["hyperopt"][iter]["minima"]))) + for i in range(len(results["hyperopt"][iter]["minima"])): + y.append(results["hyperopt"][iter]["minima"]["axis_0{}".format(i)][0]) + plt.plot(x, y, label="iterations: {}".format(iter)) + plt.title("") + plt.legend() + plt.show() - project = HyppopyProject() - project.add_hyperparameter(name="axis_00", domain="uniform", data=[0, 1], dtype="float") - project.add_hyperparameter(name="axis_01", domain="uniform", data=[0, 1], dtype="float") - project.add_hyperparameter(name="axis_02", domain="uniform", data=[0, 1], dtype="float") - project.add_hyperparameter(name="axis_03", domain="uniform", data=[0, 1], dtype="float") - project.add_hyperparameter(name="axis_04", domain="uniform", data=[0, 1], dtype="float") - project.add_settings(section="solver", name="max_iterations", value=100) - project.add_settings(section="custom", name="use_solver", value="randomsearch") - if os.path.isfile(os.path.join(EXPERIMENT["output_dir"], "results")): - file = open(os.path.join(EXPERIMENT["output_dir"], "results"), 'rb') - results = pickle.load(file) - file.close() - else: - vfunc = VirtualFunction() - vfunc.load_default(vfunc_id) - # for i in range(5): - # vfunc.plot(i) - - - def my_loss_function(data, params): - return vfunc(**params) - - - results = {"group": EXPERIMENT["solver"], - "groundtruth": [], - 'iteration': {}} - - minima = vfunc.minima() - for mini in minima: - results["groundtruth"].append(np.median(mini[0])) - - - for iter in EXPERIMENT["iterations"]: - results["iteration"][iter] = {"axis_00": [], - "axis_01": [], - "axis_02": [], - "axis_03": [], - "axis_04": []} - for solver_name in EXPERIMENT["solver"]: - axis_minima = [0, 0, 0, 0, 0] - for n in range(EXPERIMENT["repeat"]): - print("\rSolver={} iteration={} round={}".format(solver_name, iter, n), end="") - project.add_settings(section="solver", name="max_iterations", value=iter) - project.add_settings(section="custom", name="use_solver", value=solver_name) - - blackbox = BlackboxFunction(data=[], blackbox_func=my_loss_function) - - solver = SolverPool.get(project=project) - solver.blackbox = blackbox - solver.run(print_stats=False) - df, best = solver.get_results() - - best_row = df['losses'].idxmin() - best_loss = df['losses'][best_row] - for i in range(5): - axis_minima[i] += df['axis_0{}'.format(i)][best_row]/EXPERIMENT["repeat"] - for i in range(5): - results["iteration"][iter]["axis_0{}".format(i)].append(axis_minima[i]) - print("") - print("\n\n") - - file = open(os.path.join(EXPERIMENT["output_dir"], "results"), 'wb') - pickle.dump(results, file) - file.close() - my_dpi = 96 - plt.figure(figsize=(1100/my_dpi, 1100/my_dpi), dpi=my_dpi) - for row in range(3): - make_spider(results, row=row, title=results['group'][row], groundtruth=results["groundtruth"]) - plt.show() - plt.savefig(os.path.join(EXPERIMENT["output_dir"], "radar_plots.svg")) - plt.savefig(os.path.join(EXPERIMENT["output_dir"], "radar_plots.png")) +################################################## +############### create datasets ################## +fnames = [] +for solver_name in SOLVER: + fname = os.path.join(OUTPUTDIR, solver_name) + fnames.append(fname) + if OVERWRITE or not os.path.isfile(fname): + compute_deviation(solver_name, VFUNC, ITERATIONS, N=STATREPEATS, fname=fname) +################################################## +################################################## + +################################################## +############## create radarplots ################# +for solver_name, fname in zip(SOLVER, fnames): + file = open(fname, 'rb') + results = pickle.load(file) + file.close() + make_radarplot(results, solver_name, fname + "_deviation") +################################################## +################################################## diff --git a/examples/tutorial_multisolver.py b/examples/tutorial_multisolver.py index 69acd57..dc1d666 100644 --- a/examples/tutorial_multisolver.py +++ b/examples/tutorial_multisolver.py @@ -1,180 +1,180 @@ # In this tutorial we solve an optimization problem using the Hyperopt Solver (http://hyperopt.github.io/hyperopt/). # Hyperopt uses a Baysian - Tree Parzen Estimator - Optimization approach, which means that each iteration computes a # new function value of the blackbox, interpolates a guess for the whole energy function and predicts a point to # compute the next function value at. This next point is not necessarily a "better" value, it's only the value with # the highest uncertainty for the function interpolation. # # See a visual explanation e.g. here (http://philipperemy.github.io/visualization/) # import the HyppopyProject class keeping track of inputs from hyppopy.HyppopyProject import HyppopyProject # import the SolverPool singleton class from hyppopy.SolverPool import SolverPool # import the Blackboxfunction class wrapping your problem for Hyppopy from hyppopy.BlackboxFunction import BlackboxFunction # Next step is defining the problem space and all settings Hyppopy needs to optimize your problem. # The config is a simple nested dictionary with two obligatory main sections, hyperparameter and settings. # The hyperparameter section defines your searchspace. Each hyperparameter is again a dictionary with: # # - a domain ['categorical', 'uniform', 'normal', 'loguniform'] # - the domain data [left bound, right bound] and # - a type of your domain ['str', 'int', 'float'] # # The settings section has two subcategories, solver and custom. The first contains settings for the solver, # here 'max_iterations' - is the maximum number of iteration. # # The custom section allows defining custom parameter. An entry here is transformed to a member variable of the # HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script. # Here we use it as a solver switch to control the usage of our solver via the config. This means with the script # below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',... # It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is # ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so: # 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals. config = { "hyperparameter": { "C": { "domain": "uniform", "data": [0.0001, 20], "type": "float" }, "gamma": { - "domain": "uniform", + "domain": "normal", "data": [0.0001, 20.0], "type": "float" }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": "str" }, "decision_function_shape": { "domain": "categorical", "data": ["ovo", "ovr"], "type": "str" } }, "settings": { "solver": { "max_iterations": 500 }, "custom": { - "use_solver": "optuna" + "use_solver": "hyperopt" } }} # When creating a HyppopyProject instance we # pass the config dictionary to the constructor. project = HyppopyProject(config=config) # demonstration of the custom parameter access print("-"*30) print("max_iterations:\t{}".format(project.solver_max_iterations)) print("solver chosen -> {}".format(project.custom_use_solver)) print("-"*30) # Hyppopy offers a class called BlackboxFunction to wrap your problem for Hyppopy. # The function signature is as follows: # BlackboxFunction(blackbox_func=None, # dataloader_func=None, # preprocess_func=None, # callback_func=None, # data=None, # **kwargs) # # Means we can set a couple of function pointers, a data object and an arbitrary number of custom parameter via kwargs. # # - blackbox_func: a function pointer to the actual, user defined, blackbox function that is computing our loss # - dataloader_func: a function pointer to a function handling the dataloading # - preprocess_func: a function pointer to a function automatically executed before starting the optimization process # - callback_func: a function pointer to a function that is called after each iteration with the trail object as input # - data: setting data can be done via dataloader_func or directly # - kwargs are passed to all functions above and thus can be used for parameter sharing between the functions # # (more details see in the documentation) # # Below we demonstrate the usage of all the above by defining a my_dataloader_function which in fact only grabs the # iris dataset from sklearn and returns it. A my_preprocess_function which also does nothing useful here but # demonstrating that a custom parameter can be set via kwargs and used in all of our functions when called within # Hyppopy. The my_callback_function gets as input the dictionary containing the state of the iteration and thus can be # used to access the current state of each solver iteration. Finally we define the actual loss_function # my_loss_function, which gets as input a data object and params. Both parameter are fixed, the first is defined by # the user depending on what is dataloader returns or the data object set in the constructor, the second is a dictionary # with a sample of your hyperparameter space which content is in the choice of the solver. from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score def my_dataloader_function(**kwargs): print("Dataloading...") # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) iris_data = load_iris() return [iris_data.data, iris_data.target] def my_preprocess_function(**kwargs): print("Preprocessing...") # kwargs['data'] allows accessing the input data print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n") # if the preprocessing function returns something, # the input data will be replaced with the data returned by this function. x = kwargs['data'][0] y = kwargs['data'][1] for i in range(x.shape[0]): x[i, :] += kwargs['params']['my_preproc_param'] return [x, y] def my_callback_function(**kwargs): print("\r{}".format(kwargs), end="") def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() # We now create the BlackboxFunction object and pass all function pointers defined above, # as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes. blackbox = BlackboxFunction(blackbox_func=my_loss_function, dataloader_func=my_dataloader_function, preprocess_func=my_preprocess_function, callback_func=my_callback_function, my_preproc_param=1, my_dataloader_input='could/be/a/path') # Last step, is we use our SolverPool which automatically returns the correct solver. # There are multiple ways to get the desired solver from the solver pool. # 1. solver = SolverPool.get('hyperopt') # solver.project = project # 2. solver = SolverPool.get('hyperopt', project) # 3. The SolverPool will look for the field 'use_solver' in the project instance, if # it is present it will be used to specify the solver so that in this case it is enough # to pass the project instance. solver = SolverPool.get(project=project) # Give the solver your blackbox and run it. After execution we can get the result # via get_result() which returns a pandas dataframe containing the complete history # The dict best contains the best parameter set. solver.blackbox = blackbox solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) diff --git a/hyppopy/Solver/BayesOptSolver.py b/hyppopy/Solver/BayesOptSolver.py index 1c03850..b0c8173 100644 --- a/hyppopy/Solver/BayesOptSolver.py +++ b/hyppopy/Solver/BayesOptSolver.py @@ -1,111 +1,117 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import logging import datetime +import warnings import numpy as np from pprint import pformat from hyperopt import Trials from bayes_opt import BayesianOptimization from hyppopy.globals import DEBUGLEVEL from hyppopy.BlackboxFunction import BlackboxFunction from hyppopy.solver.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class BayesOptSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None self._idx = None def reformat_parameter(self, params): out_params = {} for name, value in params.items(): if self._searchspace[name]["domain"] == "categorical": out_params[name] = self._searchspace[name]["data"][int(np.round(value))] else: if self._searchspace[name]["type"] == "int": out_params[name] = int(np.round(value)) else: out_params[name] = value return out_params def loss_function(self, **params): self._idx += 1 params = self.reformat_parameter(params) vals = {} idx = {} for key, value in params.items(): vals[key] = [value] idx[key] = [self._idx] trial = {'tid': self._idx, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._idx, 'idxs': idx, 'vals': vals }, 'book_time': datetime.datetime.now(), 'refresh_time': None } try: loss = self.blackbox(**params) trial['result']['loss'] = loss trial['result']['status'] = 'ok' except Exception as e: LOG.error("computing loss failed due to:\n {}".format(e)) loss = np.nan trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' trial['refresh_time'] = datetime.datetime.now() self._trials.trials.append(trial) if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: cbd = copy.deepcopy(params) cbd['iterations'] = self._idx cbd['loss'] = loss cbd['status'] = trial['result']['status'] self.blackbox.callback_func(**cbd) return loss def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self.trials = Trials() self._idx = 0 try: optimizer = BayesianOptimization(f=self.loss_function, pbounds=searchspace, verbose=0) optimizer.maximize(init_points=2, n_iter=self.max_iterations) self.best = self.reformat_parameter(optimizer.max["params"]) except Exception as e: LOG.error("internal error in bayes_opt maximize occured. {}".format(e)) raise BrokenPipeError("internal error in bayes_opt maximize occured. {}".format(e)) def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) self._searchspace = hyperparameter pbounds = {} for name, param in hyperparameter.items(): if param["domain"] != "categorical": + if param["domain"] != "uniform": + msg = "Warning: BayesOpt cannot handle {} domain. Only uniform and categorical domains are supported!".format( + param["domain"]) + warnings.warn(msg) + LOG.warning(msg) pbounds[name] = (param["data"][0], param["data"][1]) else: pbounds[name] = (0, len(param["data"])-1) return pbounds diff --git a/hyppopy/Solver/HyperoptSolver.py b/hyppopy/Solver/HyperoptSolver.py index 76df29c..84f4403 100644 --- a/hyppopy/Solver/HyperoptSolver.py +++ b/hyppopy/Solver/HyperoptSolver.py @@ -1,142 +1,150 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import logging import numpy as np from pprint import pformat from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials from hyppopy.globals import DEBUGLEVEL from hyppopy.solver.HyppopySolver import HyppopySolver from hyppopy.BlackboxFunction import BlackboxFunction LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyperoptSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) def loss_function(self, params): status = STATUS_FAIL try: loss = self.blackbox(**params) if loss is not None: status = STATUS_OK else: loss = 1e9 except Exception as e: LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) status = STATUS_FAIL loss = 1e9 if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: cbd = copy.deepcopy(params) cbd['iterations'] = self._trials.trials[-1]['tid'] + 1 cbd['loss'] = loss cbd['status'] = status self.blackbox.callback_func(**cbd) return {'loss': loss, 'status': status} def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self.trials = Trials() try: self.best = fmin(fn=self.loss_function, space=searchspace, algo=tpe.suggest, max_evals=self.max_iterations, trials=self.trials) except Exception as e: msg = "internal error in hyperopt.fmin occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) def convert_searchspace(self, hyperparameter): - solution_space = {} for name, content in hyperparameter.items(): param_settings = {'name': name} for key, value in content.items(): if key == 'domain': param_settings['domain'] = value elif key == 'data': param_settings['data'] = value elif key == 'type': param_settings['dtype'] = value solution_space[name] = self.convert(param_settings) return solution_space def convert(self, param_settings): name = param_settings["name"] domain = param_settings["domain"] dtype = param_settings["dtype"] data = param_settings["data"] + assert isinstance(data, list), "precondition violation. data of type {} not allowed!".format(type(data)) + assert len(data) >= 2, "precondition violation, data must be of length 2, [left_bound, right_bound]" + assert isinstance(domain, str), "precondition violation. domain of type {} not allowed!".format(type(domain)) + assert isinstance(dtype, str), "precondition violation. dtype of type {} not allowed!".format(type(dtype)) + if domain == "uniform": if dtype == "float" or dtype == "double": return hp.uniform(name, data[0], data[1]) elif dtype == "int": data = list(np.arange(int(data[0]), int(data[1] + 1))) return hp.choice(name, data) else: msg = "cannot convert the type {} in domain {}".format(dtype, domain) LOG.error(msg) raise LookupError(msg) elif domain == "loguniform": if dtype == "float" or dtype == "double": if data[0] == 0: data[0] += 1e-23 - assert data[0] > 0, "Precondition Violation, a < 0!" - assert data[0] < data[1], "Precondition Violation, a > b!" - assert data[1] > 0, "Precondition Violation, b < 0!" + assert data[0] > 0, "precondition Violation, a < 0!" + assert data[0] < data[1], "precondition Violation, a > b!" + assert data[1] > 0, "precondition Violation, b < 0!" lexp = np.log(data[0]) rexp = np.log(data[1]) - assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!" - assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!" + assert lexp is not np.nan, "precondition violation, left bound input error, results in nan!" + assert rexp is not np.nan, "precondition violation, right bound input error, results in nan!" return hp.loguniform(name, lexp, rexp) else: msg = "cannot convert the type {} in domain {}".format(dtype, domain) LOG.error(msg) raise LookupError(msg) elif domain == "normal": if dtype == "float" or dtype == "double": mu = (data[1] - data[0]) / 2.0 sigma = mu / 3 return hp.normal(name, data[0] + mu, sigma) else: msg = "cannot convert the type {} in domain {}".format(dtype, domain) LOG.error(msg) raise LookupError(msg) elif domain == "categorical": if dtype == 'str': return hp.choice(name, data) elif dtype == 'bool': data = [] for elem in data: if elem == "true" or elem == "True" or elem == 1 or elem == "1": data.append(True) elif elem == "false" or elem == "False" or elem == 0 or elem == "0": data.append(False) else: msg = "cannot convert the type {} in domain {}, unknown bool type value".format(dtype, domain) LOG.error(msg) raise LookupError(msg) return hp.choice(name, data) + else: + msg = "Precondition violation, domain named {} not available!".format(domain) + LOG.error(msg) + raise IOError(msg) diff --git a/hyppopy/Solver/HyppopySolver.py b/hyppopy/Solver/HyppopySolver.py index 0d8b1e0..ec4459f 100644 --- a/hyppopy/Solver/HyppopySolver.py +++ b/hyppopy/Solver/HyppopySolver.py @@ -1,223 +1,231 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import abc import os import types import logging import datetime import numpy as np import pandas as pd -from ..globals import DEBUGLEVEL -from ..HyppopyProject import HyppopyProject -from ..BlackboxFunction import BlackboxFunction -from ..VirtualFunction import VirtualFunction +from hyppopy.globals import DEBUGLEVEL +from hyppopy.HyppopyProject import HyppopyProject +from hyppopy.BlackboxFunction import BlackboxFunction +from hyppopy.VirtualFunction import VirtualFunction from hyppopy.globals import DEBUGLEVEL, DEFAULTITERATIONS LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyppopySolver(object): def __init__(self, project=None): self._best = None self._trials = None self._blackbox = None self._max_iterations = None self._project = project self._total_duration = None self._solver_overhead = None self._time_per_iteration = None self._accumulated_blackbox_time = None self._has_maxiteration_field = True @abc.abstractmethod def execute_solver(self, searchspace): raise NotImplementedError('users must define execute_solver to use this class') @abc.abstractmethod def convert_searchspace(self, hyperparameter): raise NotImplementedError('users must define convert_searchspace to use this class') def run(self, print_stats=True): if self._has_maxiteration_field: if 'solver_max_iterations' not in self.project.__dict__: msg = "Missing max_iteration entry in project, use default {}!".format(DEFAULTITERATIONS) LOG.warning(msg) print("WARNING: {}".format(msg)) setattr(self.project, 'solver_max_iterations', DEFAULTITERATIONS) self._max_iterations = self.project.solver_max_iterations start_time = datetime.datetime.now() try: - self.execute_solver(self.convert_searchspace(self.project.hyperparameter)) + search_space = self.convert_searchspace(self.project.hyperparameter) except Exception as e: - raise e + msg = "Failed to convert searchspace, error: {}".format(e) + LOG.error(msg) + raise AssertionError(msg) + try: + self.execute_solver(search_space) + except Exception as e: + msg = "Failed to execute solver, error: {}".format(e) + LOG.error(msg) + raise AssertionError(msg) end_time = datetime.datetime.now() dt = end_time - start_time days = divmod(dt.total_seconds(), 86400) hours = divmod(days[1], 3600) minutes = divmod(hours[1], 60) seconds = divmod(minutes[1], 1) milliseconds = divmod(seconds[1], 0.001) self._total_duration = [int(days[0]), int(hours[0]), int(minutes[0]), int(seconds[0]), int(milliseconds[0])] if print_stats: self.print_best() self.print_timestats() def get_results(self): results = {'duration': [], 'losses': []} pset = self.trials.trials[0]['misc']['vals'] for p in pset.keys(): results[p] = [] for n, trial in enumerate(self.trials.trials): t1 = trial['book_time'] t2 = trial['refresh_time'] results['duration'].append((t2 - t1).microseconds / 1000.0) results['losses'].append(trial['result']['loss']) losses = np.array(results['losses']) results['losses'] = list(losses) pset = trial['misc']['vals'] for p in pset.items(): results[p[0]].append(p[1][0]) return pd.DataFrame.from_dict(results), self.best def print_best(self): print("\n") print("#" * 40) print("### Best Parameter Choice ###") print("#" * 40) for name, value in self.best.items(): print(" - {}\t:\t{}".format(name, value)) print("\n - number of iterations\t:\t{}".format(self.trials.trials[-1]['tid']+1)) print(" - total time\t:\t{}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) def compute_time_statistics(self): dts = [] for trial in self._trials.trials: if 'book_time' in trial.keys() and 'refresh_time' in trial.keys(): dt = trial['refresh_time'] - trial['book_time'] dts.append(dt.total_seconds()) self._time_per_iteration = np.mean(dts) * 1e3 self._accumulated_blackbox_time = np.sum(dts) * 1e3 tmp = self.total_duration - self._accumulated_blackbox_time self._solver_overhead = int(np.round(100.0 / (self.total_duration+1e-12) * tmp)) def print_timestats(self): print("\n") print("#" * 40) print("### Timing Statistics ###") print("#" * 40) print(" - per iteration: {}ms".format(int(self.time_per_iteration*1e4)/10000)) print(" - total time: {}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) - print(" - solver overhead: {}%".format(self.solver_overhead)) print("#" * 40) + print(" - solver overhead: {}%".format(self.solver_overhead)) @property def project(self): return self._project @project.setter def project(self, value): if not isinstance(value, HyppopyProject): msg = "Input error, project_manager of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) self._project = value @property def blackbox(self): return self._blackbox @blackbox.setter def blackbox(self, value): if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction) or isinstance(value, VirtualFunction): self._blackbox = value else: self._blackbox = None msg = "Input error, blackbox of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) @property def best(self): return self._best @best.setter def best(self, value): if not isinstance(value, dict): msg = "Input error, best of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) self._best = value @property def trials(self): return self._trials @trials.setter def trials(self, value): self._trials = value @property def max_iterations(self): return self._max_iterations @max_iterations.setter def max_iterations(self, value): if not isinstance(value, int): msg = "Input error, max_iterations of type: {} not allowed!".format(type(value)) LOG.error(msg) raise IOError(msg) if value < 1: msg = "Precondition violation, max_iterations < 1!" LOG.error(msg) raise IOError(msg) self._max_iterations = value @property def total_duration(self): - return (self._total_duration[0] * 86400 + self._total_duration[1] * 3600 + self._total_duration[2] * 60 + self._total_duration[3]) * 1000 + self._total_duration[4] + return (self._total_duration[0]*86400 + self._total_duration[1] * 3600 + self._total_duration[2] * 60 + self._total_duration[3]) * 1000 + self._total_duration[4] @property def solver_overhead(self): if self._solver_overhead is None: self.compute_time_statistics() return self._solver_overhead @property def time_per_iteration(self): if self._time_per_iteration is None: self.compute_time_statistics() return self._time_per_iteration @property def accumulated_blackbox_time(self): if self._accumulated_blackbox_time is None: self.compute_time_statistics() return self._accumulated_blackbox_time diff --git a/hyppopy/Solver/OptunaSolver.py b/hyppopy/Solver/OptunaSolver.py index 1b0e40a..d27f4b9 100644 --- a/hyppopy/Solver/OptunaSolver.py +++ b/hyppopy/Solver/OptunaSolver.py @@ -1,114 +1,120 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import optuna import logging import datetime +import warnings import numpy as np from pprint import pformat from hyperopt import Trials from hyppopy.globals import DEBUGLEVEL from hyppopy.BlackboxFunction import BlackboxFunction from hyppopy.solver.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class OptunaSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None self._idx = None def reformat_parameter(self, params): out_params = {} for name, value in params.items(): if self._searchspace[name]["domain"] == "categorical": out_params[name] = self._searchspace[name]["data"][int(np.round(value))] else: if self._searchspace[name]["type"] == "int": out_params[name] = int(np.round(value)) else: out_params[name] = value return out_params def trial_cache(self, trial): self._idx += 1 params = {} for name, param in self._searchspace.items(): if param["domain"] == "categorical": params[name] = trial.suggest_categorical(name, param["data"]) else: params[name] = trial.suggest_uniform(name, param["data"][0], param["data"][1]) return self.loss_function(**params) def loss_function(self, **params): vals = {} idx = {} for key, value in params.items(): vals[key] = [value] idx[key] = [self._idx] trial = {'tid': self._idx, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._idx, 'idxs': idx, 'vals': vals }, 'book_time': datetime.datetime.now(), 'refresh_time': None } try: loss = self.blackbox(**params) trial['result']['loss'] = loss trial['result']['status'] = 'ok' except Exception as e: LOG.error("computing loss failed due to:\n {}".format(e)) loss = np.nan trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' trial['refresh_time'] = datetime.datetime.now() self._trials.trials.append(trial) if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: cbd = copy.deepcopy(params) cbd['iterations'] = self._idx cbd['loss'] = loss cbd['status'] = trial['result']['status'] self.blackbox.callback_func(**cbd) return loss def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self._searchspace = searchspace self.trials = Trials() self._idx = 0 try: study = optuna.create_study() study.optimize(self.trial_cache, n_trials=self.max_iterations) self.best = study.best_trial.params except Exception as e: LOG.error("internal error in bayes_opt maximize occured. {}".format(e)) raise BrokenPipeError("internal error in bayes_opt maximize occured. {}".format(e)) def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) + for name, param in hyperparameter.items(): + if param["domain"] != "categorical" and param["domain"] != "uniform": + msg = "Warning: Optuna cannot handle {} domain. Only uniform and categorical domains are supported!".format(param["domain"]) + warnings.warn(msg) + LOG.warning(msg) return hyperparameter diff --git a/hyppopy/Solver/OptunitySolver.py b/hyppopy/Solver/OptunitySolver.py index fe6df3b..6e018d5 100644 --- a/hyppopy/Solver/OptunitySolver.py +++ b/hyppopy/Solver/OptunitySolver.py @@ -1,134 +1,139 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import logging import optunity import datetime +import warnings import numpy as np from pprint import pformat from hyperopt import Trials from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from hyppopy.solver.HyppopySolver import HyppopySolver from hyppopy.BlackboxFunction import BlackboxFunction class OptunitySolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._solver_info = None self.opt_trials = None self._idx = None def loss_function(self, **params): self._idx += 1 vals = {} idx = {} for key, value in params.items(): vals[key] = [value] idx[key] = [self._idx] trial = {'tid': self._idx, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._idx, 'idxs': idx, 'vals': vals }, 'book_time': datetime.datetime.now(), 'refresh_time': None } try: for key in params.keys(): if self.project.get_typeof(key) is int: params[key] = int(round(params[key])) loss = self.blackbox(**params) trial['result']['loss'] = loss trial['result']['status'] = 'ok' except Exception as e: LOG.error("computing loss failed due to:\n {}".format(e)) loss = np.nan trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' trial['refresh_time'] = datetime.datetime.now() self._trials.trials.append(trial) if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: cbd = copy.deepcopy(params) cbd['iterations'] = self._idx cbd['loss'] = loss cbd['status'] = trial['result']['status'] self.blackbox.callback_func(**cbd) return loss def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self.trials = Trials() self._idx = 0 try: self.best, self.opt_trials, self._solver_info = optunity.minimize_structured(f=self.loss_function, num_evals=self.max_iterations, search_space=searchspace) except Exception as e: LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) def split_categorical(self, pdict): categorical = {} uniform = {} for name, pset in pdict.items(): for key, value in pset.items(): if key == 'domain' and value == 'categorical': categorical[name] = pset elif key == 'domain': + if value != 'uniform': + msg = "Warning: Optunity cannot handle {} domain. Only uniform and categorical domains are supported!".format(value) + warnings.warn(msg) + LOG.warning(msg) uniform[name] = pset return categorical, uniform def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) solution_space = {} # split input in categorical and non-categorical data cat, uni = self.split_categorical(hyperparameter) # build up dictionary keeping all non-categorical data uniforms = {} for key, value in uni.items(): for key2, value2 in value.items(): if key2 == 'data': if len(value2) == 3: uniforms[key] = value2[0:2] elif len(value2) == 2: uniforms[key] = value2 else: raise AssertionError("precondition violation, optunity searchspace needs list with left and right range bounds!") if len(cat) == 0: return uniforms # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} tmp2 = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level tmp2[key] = tmp inner_level = tmp2 solution_space = tmp2 return solution_space diff --git a/hyppopy/tests/test_randomsearchsolver.py b/hyppopy/tests/test_randomsearchsolver.py index b0a4781..946e352 100644 --- a/hyppopy/tests/test_randomsearchsolver.py +++ b/hyppopy/tests/test_randomsearchsolver.py @@ -1,132 +1,132 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import unittest import matplotlib.pylab as plt from hyppopy.solver.RandomsearchSolver import * from hyppopy.VirtualFunction import VirtualFunction from hyppopy.HyppopyProject import HyppopyProject class RandomsearchTestSuite(unittest.TestCase): def setUp(self): pass def test_draw_uniform_sample(self): param = {"data": [0, 1, 10], "type": "float"} values = [] for i in range(10000): values.append(draw_uniform_sample(param)) self.assertTrue(0 <= values[-1] <= 1) self.assertTrue(isinstance(values[-1], float)) hist = plt.hist(values, bins=10, normed=True) std = np.std(hist[0]) mean = np.mean(hist[0]) self.assertTrue(std < 0.05) self.assertTrue(0.9 < mean < 1.1) param = {"data": [0, 10, 11], "type": "int"} values = [] for i in range(10000): values.append(draw_uniform_sample(param)) self.assertTrue(0 <= values[-1] <= 10) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=11, normed=True) std = np.std(hist[0]) mean = np.mean(hist[0]) self.assertTrue(std < 0.05) self.assertTrue(0.09 < mean < 0.11) def test_draw_normal_sample(self): param = {"data": [0, 10, 11], "type": "int"} values = [] for i in range(10000): values.append(draw_normal_sample(param)) self.assertTrue(0 <= values[-1] <= 10) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=11, normed=True) for i in range(1, 5): self.assertTrue(hist[0][i-1]-hist[0][i] < 0) for i in range(5, 10): self.assertTrue(hist[0][i] - hist[0][i+1] > 0) def test_draw_loguniform_sample(self): param = {"data": [1, 1000, 11], "type": "float"} values = [] for i in range(10000): values.append(draw_loguniform_sample(param)) self.assertTrue(1 <= values[-1] <= 1000) self.assertTrue(isinstance(values[-1], float)) hist = plt.hist(values, bins=11, normed=True) for i in range(4): self.assertTrue(hist[0][i] > hist[0][i+1]) self.assertTrue((hist[0][i] - hist[0][i+1]) > 0) def test_draw_categorical_sample(self): param = {"data": [1, 2, 3], "type": int} values = [] for i in range(10000): values.append(draw_categorical_sample(param)) self.assertTrue(values[-1] == 1 or values[-1] == 2 or values[-1] == 3) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=3, normed=True) for i in range(3): self.assertTrue(0.45 < hist[0][i] < 0.55) def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [300, 800], "type": "float" }, "axis_01": { "domain": "uniform", "data": [-1, 1], "type": "float" }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": "float" } }, "settings": { - "solver": {"max_iterations": 5000}, + "solver": {"max_iterations": 8000}, "custom": {} }} project = HyppopyProject(config) solver = RandomsearchSolver(project) vfunc = VirtualFunction() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(570 < best['axis_00'] < 590) self.assertTrue(0.1 < best['axis_01'] < 0.8) self.assertTrue(4.5 < best['axis_02'] < 6) if __name__ == '__main__': unittest.main()