diff --git a/examples/solver_comparison.py b/examples/solver_comparison.py index e17d89a..4085951 100644 --- a/examples/solver_comparison.py +++ b/examples/solver_comparison.py @@ -1,136 +1,136 @@ import os import pickle import numpy as np from math import pi import pandas as pd import matplotlib.pyplot as plt from hyppopy.SolverPool import SolverPool from hyppopy.HyppopyProject import HyppopyProject from hyppopy.VirtualFunction import VirtualFunction from hyppopy.BlackboxFunction import BlackboxFunction def make_spider(results, row, title, groundtruth): categories = ["axis_00", "axis_01", "axis_02", "axis_03", "axis_04"] N = len(categories) angles = [n / float(N) * 2 * pi for n in range(N)] angles += angles[:1] ax = plt.subplot(2, 2, row+1, polar=True, ) ax.set_theta_offset(pi / 2) ax.set_theta_direction(-1) plt.xticks(angles[:-1], categories, color='grey', size=8) ax.set_rlabel_position(0) plt.yticks([0.2, 0.4, 0.6, 0.8, 1.0], ["0.2", "0.4", "0.6", "0.8", "1.0"], color="grey", size=7) plt.ylim(0, 1) gt = [] for i in range(5): gt.append(groundtruth[i]) gt += gt[:1] ax.fill(angles, gt, color=(0.2, 0.8, 0.2), alpha=0.2) colors = [(0.8, 0.8, 0.0, 0.8), (0.7, 0.2, 0.2, 0.8), (0.2, 0.2, 0.7, 0.8)] for iter, data in results["iteration"].items(): values = [] for i in range(5): values.append(data["axis_0{}".format(i)][row]) values += values[:1] ax.plot(angles, values, color=colors.pop(0), linewidth=2, linestyle='solid', label="iterations {}".format(iter)) ax.plot(angles, gt, color=(0.2, 0.8, 0.2, 0.8), linewidth=2, linestyle='solid', label="groundtruth") plt.title(title, size=11, color=(0.1, 0.1, 0.1), y=1.1) plt.legend(bbox_to_anchor=(0.2, 1.2)) for vfunc_id in ["5D3"]: OUTPUTDIR = "C:\\Users\\s635r\\Desktop\\solver_comparison" - EXPERIMENT = {"iterations": [500, 1500, 3000], - "solver": ["randomsearch", "hyperopt", "optunity"], - "repeat": 10, + EXPERIMENT = {"iterations": [50, 150, 300], + "solver": ["randomsearch", "hyperopt", "optunity", "bayesopt"], + "repeat": 1, "output_dir": os.path.join(OUTPUTDIR, vfunc_id)} if not os.path.isdir(EXPERIMENT["output_dir"]): os.makedirs(EXPERIMENT["output_dir"]) project = HyppopyProject() project.add_hyperparameter(name="axis_00", domain="uniform", data=[0, 1], dtype="float") project.add_hyperparameter(name="axis_01", domain="uniform", data=[0, 1], dtype="float") project.add_hyperparameter(name="axis_02", domain="uniform", data=[0, 1], dtype="float") project.add_hyperparameter(name="axis_03", domain="uniform", data=[0, 1], dtype="float") project.add_hyperparameter(name="axis_04", domain="uniform", data=[0, 1], dtype="float") project.add_settings(section="solver", name="max_iterations", value=100) project.add_settings(section="custom", name="use_solver", value="randomsearch") if os.path.isfile(os.path.join(EXPERIMENT["output_dir"], "results")): file = open(os.path.join(EXPERIMENT["output_dir"], "results"), 'rb') results = pickle.load(file) file.close() else: vfunc = VirtualFunction() vfunc.load_default(vfunc_id) # for i in range(5): # vfunc.plot(i) def my_loss_function(data, params): return vfunc(**params) results = {"group": EXPERIMENT["solver"], "groundtruth": [], 'iteration': {}} minima = vfunc.minima() for mini in minima: results["groundtruth"].append(np.median(mini[0])) for iter in EXPERIMENT["iterations"]: results["iteration"][iter] = {"axis_00": [], "axis_01": [], "axis_02": [], "axis_03": [], "axis_04": []} for solver_name in EXPERIMENT["solver"]: axis_minima = [0, 0, 0, 0, 0] for n in range(EXPERIMENT["repeat"]): print("\rSolver={} iteration={} round={}".format(solver_name, iter, n), end="") project.add_settings(section="solver", name="max_iterations", value=iter) project.add_settings(section="custom", name="use_solver", value=solver_name) blackbox = BlackboxFunction(data=[], blackbox_func=my_loss_function) solver = SolverPool.get(project=project) solver.blackbox = blackbox solver.run(print_stats=False) df, best = solver.get_results() best_row = df['losses'].idxmin() best_loss = df['losses'][best_row] for i in range(5): axis_minima[i] += df['axis_0{}'.format(i)][best_row]/EXPERIMENT["repeat"] for i in range(5): results["iteration"][iter]["axis_0{}".format(i)].append(axis_minima[i]) print("") print("\n\n") file = open(os.path.join(EXPERIMENT["output_dir"], "results"), 'wb') pickle.dump(results, file) file.close() my_dpi = 96 plt.figure(figsize=(1100/my_dpi, 1100/my_dpi), dpi=my_dpi) for row in range(3): make_spider(results, row=row, title=results['group'][row], groundtruth=results["groundtruth"]) - #plt.show() + plt.show() plt.savefig(os.path.join(EXPERIMENT["output_dir"], "radar_plots.svg")) plt.savefig(os.path.join(EXPERIMENT["output_dir"], "radar_plots.png")) diff --git a/examples/tutorial_multisolver.py b/examples/tutorial_multisolver.py index 4d5fbbc..69acd57 100644 --- a/examples/tutorial_multisolver.py +++ b/examples/tutorial_multisolver.py @@ -1,180 +1,180 @@ # In this tutorial we solve an optimization problem using the Hyperopt Solver (http://hyperopt.github.io/hyperopt/). # Hyperopt uses a Baysian - Tree Parzen Estimator - Optimization approach, which means that each iteration computes a # new function value of the blackbox, interpolates a guess for the whole energy function and predicts a point to # compute the next function value at. This next point is not necessarily a "better" value, it's only the value with # the highest uncertainty for the function interpolation. # # See a visual explanation e.g. here (http://philipperemy.github.io/visualization/) # import the HyppopyProject class keeping track of inputs from hyppopy.HyppopyProject import HyppopyProject # import the SolverPool singleton class from hyppopy.SolverPool import SolverPool # import the Blackboxfunction class wrapping your problem for Hyppopy from hyppopy.BlackboxFunction import BlackboxFunction # Next step is defining the problem space and all settings Hyppopy needs to optimize your problem. # The config is a simple nested dictionary with two obligatory main sections, hyperparameter and settings. # The hyperparameter section defines your searchspace. Each hyperparameter is again a dictionary with: # # - a domain ['categorical', 'uniform', 'normal', 'loguniform'] # - the domain data [left bound, right bound] and # - a type of your domain ['str', 'int', 'float'] # # The settings section has two subcategories, solver and custom. The first contains settings for the solver, # here 'max_iterations' - is the maximum number of iteration. # # The custom section allows defining custom parameter. An entry here is transformed to a member variable of the # HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script. # Here we use it as a solver switch to control the usage of our solver via the config. This means with the script # below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',... # It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is # ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so: # 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals. config = { "hyperparameter": { "C": { "domain": "uniform", "data": [0.0001, 20], "type": "float" }, "gamma": { "domain": "uniform", "data": [0.0001, 20.0], "type": "float" }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": "str" }, "decision_function_shape": { "domain": "categorical", "data": ["ovo", "ovr"], "type": "str" } }, "settings": { "solver": { - "max_iterations": 10 + "max_iterations": 500 }, "custom": { - "use_solver": "optunity" + "use_solver": "optuna" } }} # When creating a HyppopyProject instance we # pass the config dictionary to the constructor. project = HyppopyProject(config=config) # demonstration of the custom parameter access print("-"*30) print("max_iterations:\t{}".format(project.solver_max_iterations)) print("solver chosen -> {}".format(project.custom_use_solver)) print("-"*30) # Hyppopy offers a class called BlackboxFunction to wrap your problem for Hyppopy. # The function signature is as follows: # BlackboxFunction(blackbox_func=None, # dataloader_func=None, # preprocess_func=None, # callback_func=None, # data=None, # **kwargs) # # Means we can set a couple of function pointers, a data object and an arbitrary number of custom parameter via kwargs. # # - blackbox_func: a function pointer to the actual, user defined, blackbox function that is computing our loss # - dataloader_func: a function pointer to a function handling the dataloading # - preprocess_func: a function pointer to a function automatically executed before starting the optimization process # - callback_func: a function pointer to a function that is called after each iteration with the trail object as input # - data: setting data can be done via dataloader_func or directly # - kwargs are passed to all functions above and thus can be used for parameter sharing between the functions # # (more details see in the documentation) # # Below we demonstrate the usage of all the above by defining a my_dataloader_function which in fact only grabs the # iris dataset from sklearn and returns it. A my_preprocess_function which also does nothing useful here but # demonstrating that a custom parameter can be set via kwargs and used in all of our functions when called within # Hyppopy. The my_callback_function gets as input the dictionary containing the state of the iteration and thus can be # used to access the current state of each solver iteration. Finally we define the actual loss_function # my_loss_function, which gets as input a data object and params. Both parameter are fixed, the first is defined by # the user depending on what is dataloader returns or the data object set in the constructor, the second is a dictionary # with a sample of your hyperparameter space which content is in the choice of the solver. from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score def my_dataloader_function(**kwargs): print("Dataloading...") # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) iris_data = load_iris() return [iris_data.data, iris_data.target] def my_preprocess_function(**kwargs): print("Preprocessing...") # kwargs['data'] allows accessing the input data print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n") # if the preprocessing function returns something, # the input data will be replaced with the data returned by this function. x = kwargs['data'][0] y = kwargs['data'][1] for i in range(x.shape[0]): x[i, :] += kwargs['params']['my_preproc_param'] return [x, y] def my_callback_function(**kwargs): print("\r{}".format(kwargs), end="") def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() # We now create the BlackboxFunction object and pass all function pointers defined above, # as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes. blackbox = BlackboxFunction(blackbox_func=my_loss_function, dataloader_func=my_dataloader_function, preprocess_func=my_preprocess_function, callback_func=my_callback_function, my_preproc_param=1, my_dataloader_input='could/be/a/path') # Last step, is we use our SolverPool which automatically returns the correct solver. # There are multiple ways to get the desired solver from the solver pool. # 1. solver = SolverPool.get('hyperopt') # solver.project = project # 2. solver = SolverPool.get('hyperopt', project) # 3. The SolverPool will look for the field 'use_solver' in the project instance, if # it is present it will be used to specify the solver so that in this case it is enough # to pass the project instance. solver = SolverPool.get(project=project) # Give the solver your blackbox and run it. After execution we can get the result # via get_result() which returns a pandas dataframe containing the complete history # The dict best contains the best parameter set. solver.blackbox = blackbox solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) diff --git a/hyppopy/Solver/OptunaSolver.py b/hyppopy/Solver/OptunaSolver.py new file mode 100644 index 0000000..1b0e40a --- /dev/null +++ b/hyppopy/Solver/OptunaSolver.py @@ -0,0 +1,114 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import copy +import optuna +import logging +import datetime +import numpy as np +from pprint import pformat +from hyperopt import Trials + +from hyppopy.globals import DEBUGLEVEL +from hyppopy.BlackboxFunction import BlackboxFunction +from hyppopy.solver.HyppopySolver import HyppopySolver + +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + + +class OptunaSolver(HyppopySolver): + + def __init__(self, project=None): + HyppopySolver.__init__(self, project) + self._searchspace = None + self._idx = None + + def reformat_parameter(self, params): + out_params = {} + for name, value in params.items(): + if self._searchspace[name]["domain"] == "categorical": + out_params[name] = self._searchspace[name]["data"][int(np.round(value))] + else: + if self._searchspace[name]["type"] == "int": + out_params[name] = int(np.round(value)) + else: + out_params[name] = value + return out_params + + def trial_cache(self, trial): + self._idx += 1 + + params = {} + for name, param in self._searchspace.items(): + if param["domain"] == "categorical": + params[name] = trial.suggest_categorical(name, param["data"]) + else: + params[name] = trial.suggest_uniform(name, param["data"][0], param["data"][1]) + return self.loss_function(**params) + + def loss_function(self, **params): + vals = {} + idx = {} + for key, value in params.items(): + vals[key] = [value] + idx[key] = [self._idx] + trial = {'tid': self._idx, + 'result': {'loss': None, 'status': 'ok'}, + 'misc': { + 'tid': self._idx, + 'idxs': idx, + 'vals': vals + }, + 'book_time': datetime.datetime.now(), + 'refresh_time': None + } + try: + loss = self.blackbox(**params) + trial['result']['loss'] = loss + trial['result']['status'] = 'ok' + except Exception as e: + LOG.error("computing loss failed due to:\n {}".format(e)) + loss = np.nan + trial['result']['loss'] = np.nan + trial['result']['status'] = 'failed' + trial['refresh_time'] = datetime.datetime.now() + self._trials.trials.append(trial) + if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: + cbd = copy.deepcopy(params) + cbd['iterations'] = self._idx + cbd['loss'] = loss + cbd['status'] = trial['result']['status'] + self.blackbox.callback_func(**cbd) + return loss + + def execute_solver(self, searchspace): + LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) + self._searchspace = searchspace + self.trials = Trials() + self._idx = 0 + + try: + study = optuna.create_study() + study.optimize(self.trial_cache, n_trials=self.max_iterations) + self.best = study.best_trial.params + except Exception as e: + LOG.error("internal error in bayes_opt maximize occured. {}".format(e)) + raise BrokenPipeError("internal error in bayes_opt maximize occured. {}".format(e)) + + def convert_searchspace(self, hyperparameter): + LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) + return hyperparameter diff --git a/hyppopy/SolverPool.py b/hyppopy/SolverPool.py index 810241f..452dd8d 100644 --- a/hyppopy/SolverPool.py +++ b/hyppopy/SolverPool.py @@ -1,75 +1,81 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) from .Singleton import * import os import logging from hyppopy.HyppopyProject import HyppopyProject +from hyppopy.solver.OptunaSolver import OptunaSolver from hyppopy.solver.BayesOptSolver import BayesOptSolver from hyppopy.solver.HyperoptSolver import HyperoptSolver from hyppopy.solver.OptunitySolver import OptunitySolver from hyppopy.solver.GridsearchSolver import GridsearchSolver from hyppopy.solver.RandomsearchSolver import RandomsearchSolver from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) @singleton_object class SolverPool(metaclass=Singleton): def __init__(self): self._solver_list = ["hyperopt", "optunity", "bayesopt", + "optuna", "randomsearch", "gridsearch"] def get_solver_names(self): return self._solver_list def get(self, solver_name=None, project=None): if solver_name is not None: assert isinstance(solver_name, str), "precondition violation, solver_name type str expected, got {} instead!".format(type(solver_name)) if project is not None: assert isinstance(project, HyppopyProject), "precondition violation, project type HyppopyProject expected, got {} instead!".format(type(project)) if "custom_use_solver" in project.__dict__: solver_name = project.custom_use_solver if solver_name not in self._solver_list: raise AssertionError("Solver named [{}] not implemented!".format(solver_name)) if solver_name == "hyperopt": if project is not None: return HyperoptSolver(project) return HyperoptSolver() elif solver_name == "optunity": if project is not None: return OptunitySolver(project) return OptunitySolver() elif solver_name == "bayesopt": if project is not None: return BayesOptSolver(project) return BayesOptSolver() + elif solver_name == "optuna": + if project is not None: + return OptunaSolver(project) + return OptunaSolver() elif solver_name == "gridsearch": if project is not None: return GridsearchSolver(project) return GridsearchSolver() elif solver_name == "randomsearch": if project is not None: return RandomsearchSolver(project) return RandomsearchSolver()