diff --git a/doc/_static/class_diagram.png b/doc/_static/class_diagram.png index 6fa2ab3..a9d762a 100644 Binary files a/doc/_static/class_diagram.png and b/doc/_static/class_diagram.png differ diff --git a/doc/developer_guide.rst b/doc/developer_guide.rst index b45b3e6..af19150 100644 --- a/doc/developer_guide.rst +++ b/doc/developer_guide.rst @@ -1,5 +1,169 @@ **************** Developers Guide **************** -.. image:: _static/class_diagram.png \ No newline at end of file +The main classes and their connections +************************************** + +The picture below depicts the releationships between the most important classes of hyppopy. + +.. image:: _static/class_diagram.png + +To understand the concept behind Hyppopy the following classes are important: + - :py:mod:`hyppopy.solvers.HyppopySolver` + - :py:mod:`hyppopy.HyppopyProject` + - :py:mod:`hyppopy.BlackboxFunction` + + +The :py:mod:`hyppopy.solvers.HyppopySolver` class is the parent class of all solvers in Hyppopy. It defines +an abstract interface that needs to be implemented by each custom solver class. The main idea is to +define a common interface for the different approaches the solver libraries are based on. When designing +Hyppopy there were three main challenges that drove the design. Each solver library has a different +approach to define or describe the hyperparameter space, has a different approach to track the solver +information and is different in setting the blackbox function and running the optimization process. To +deal with those differences the :py:mod:`hyppopy.solvers.HyppopySolver` class defines the abstract interface +functions `convert_searchspace`, `execute_solver`, `loss_function_call` and `define_interface`. Those serve as +abstraction layer to handle the individual needs of each solver library. + +Each solver needs a :py:mod:`hyppopy.HyppopyProject` instance keeping the user configuration input and a +:py:mod:`hyppopy.BlackboxFunction` instance, implementing the loss function. + +Implementing a custom solver +**************************** + +Adding a new solver is only about deriving a new class from :py:mod:`hyppopy.solvers.HyppopySolver` as well as +telling the :py:mod:`hyppopy.SolverPool` that it exists. We go through the whole process on the example of the +solver :py:mod:`hyppopy.solvers.OptunitySolver`: + +.. code-block:: python + + import os + import optunity + from pprint import pformat + + + from hyppopy.solvers.HyppopySolver import HyppopySolver + + + class OptunitySolver(HyppopySolver): + + def __init__(self, project=None): + HyppopySolver.__init__(self, project) + +First step is to derive from the HyppopySolver class. Good practice would be that the project can be set via __init__ +and if, is piped through to the HyppopySolver.__init__. Next step is implementing the abstract interface methods. +We start with define_interface. This functions purpose is to define the relevant input parameter and the signature +of the hyperparameter space. Our solver needs an parameter called max_iterations of type int. The hyperparameter +space has a domain that allows values 'uniform' and 'categorical', a field data of type list and a field type of type +type. This guarantees that exceptions are thrown if the user disrespects this signature or forgets to set max_iterations. + +.. code-block:: python + + def define_interface(self): + self._add_member("max_iterations", int) + self._add_hyperparameter_signature(name="domain", dtype=str, + options=["uniform", "categorical"]) + self._add_hyperparameter_signature(name="data", dtype=list) + self._add_hyperparameter_signature(name="type", dtype=type) + + +Next abstract method to implement is convert_searchspace. This method is responsible for interpreting the users hyperparameter +input and convert it to a form the solver framework needs. An input for example can be: + +.. code-block:: python + + hyperparameter = { + 'C': {'domain': 'uniform', 'data': [0.0001, 20], 'type': float}, + 'gamma': {'domain': 'uniform', 'data': [0.0001, 20.0], 'type': float}, + 'kernel': {'domain': 'categorical', 'data': ['linear', 'sigmoid', 'poly', 'rbf'], 'type': str}, + 'decision_function_shape': {'domain': 'categorical', 'data': ['ovo', 'ovr'], 'type': str'} + } + + +Optunity instead expects a hyperparameter space formulation as follows: + +.. code-block:: python + + optunity_space = {'decision_function_shape': + {'ovo': { + 'kernel': { + 'linear': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'sigmoid': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'poly': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'rbf': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}} + }, + 'ovr': { + 'kernel': { + 'linear': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'sigmoid': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'poly': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'rbf': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}} + } + }} + +This conversion is what convert_searchspace is meant for. + +.. code-block:: python + + def convert_searchspace(self, hyperparameter): + LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) + # split input in categorical and non-categorical data + cat, uni = self.split_categorical(hyperparameter) + # build up dictionary keeping all non-categorical data + uniforms = {} + for key, value in uni.items(): + for key2, value2 in value.items(): + if key2 == 'data': + if len(value2) == 3: + uniforms[key] = value2[0:2] + elif len(value2) == 2: + uniforms[key] = value2 + else: + raise AssertionError("precondition violation, optunity searchspace needs list with left and right range bounds!") + + if len(cat) == 0: + return uniforms + # build nested categorical structure + inner_level = uniforms + for key, value in cat.items(): + tmp = {} + optunity_space = {} + for key2, value2 in value.items(): + if key2 == 'data': + for elem in value2: + tmp[elem] = inner_level + optunity_space[key] = tmp + inner_level = optunity_space + return optunity_space + + +Now we have defined how the solver looks from outside and how to convert the parameterspace coming in, we can define how the blackbox function +is called. The abstract method loss_function_call is a wrapper function enabling to customize the call of the blackbox function. In case of Optunity +we only check if a parameter is of type int and convert it to ensure that no exception are thrown in case of integers are expected in the blackbox. + +.. code-block:: python + + def loss_function_call(self, params): + for key in params.keys(): + if self.project.get_typeof(key) is int: + params[key] = int(round(params[key])) + return self.blackbox(**params) + + +In execute_solver the actual wrapping of the solver framework call is done. Here call the Optunity optimizing function. A dictionary keeping the optimal +parameter set must assigned to self.best. + + +.. code-block:: python + + def execute_solver(self, searchspace): + LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) + try: + self.best, _, _ = optunity.minimize_structured(f=self.loss_function, + num_evals=self.max_iterations, + search_space=searchspace) + except Exception as e: + LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) + raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) + + diff --git a/examples/tutorial_multisolver.py b/examples/tutorial_multisolver.py index c1e0d96..9a7fe52 100644 --- a/examples/tutorial_multisolver.py +++ b/examples/tutorial_multisolver.py @@ -1,183 +1,183 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # In this tutorial we solve an optimization problem using the Hyperopt Solver (http://hyperopt.github.io/hyperopt/). # Hyperopt uses a Baysian - Tree Parzen Estimator - Optimization approach, which means that each iteration computes a # new function value of the blackbox, interpolates a guess for the whole energy function and predicts a point to # compute the next function value at. This next point is not necessarily a "better" value, it's only the value with # the highest uncertainty for the function interpolation. # # See a visual explanation e.g. here (http://philipperemy.github.io/visualization/) # import the HyppopyProject class keeping track of inputs from hyppopy.HyppopyProject import HyppopyProject # import the SolverPool singleton class from hyppopy.SolverPool import SolverPool # import the Blackboxfunction class wrapping your problem for Hyppopy from hyppopy.BlackboxFunction import BlackboxFunction # Next step is defining the problem space and all settings Hyppopy needs to optimize your problem. # The config is a simple nested dictionary with two obligatory main sections, hyperparameter and settings. # The hyperparameter section defines your searchspace. Each hyperparameter is again a dictionary with: # # - a domain ['categorical', 'uniform', 'normal', 'loguniform'] # - the domain data [left bound, right bound] and # - a type of your domain ['str', 'int', 'float'] # # The settings section has two subcategories, solver and custom. The first contains settings for the solver, # here 'max_iterations' - is the maximum number of iteration. # # The custom section allows defining custom parameter. An entry here is transformed to a member variable of the # HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script. # Here we use it as a solver switch to control the usage of our solver via the config. This means with the script # below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',... # It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is # ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so: # 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals. config = { "hyperparameter": { "C": { "domain": "uniform", "data": [0.0001, 20], "type": float }, "gamma": { "domain": "uniform", "data": [0.0001, 20.0], "type": float }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str }, "decision_function_shape": { "domain": "categorical", "data": ["ovo", "ovr"], "type": str } }, "max_iterations": 300, -"solver": "quasirandomsearch" +"solver": "hyperopt" } # When creating a HyppopyProject instance we # pass the config dictionary to the constructor. project = HyppopyProject(config=config) # demonstration of the custom parameter access print("-"*30) print("max_iterations:\t{}".format(project.max_iterations)) print("solver chosen -> {}".format(project.solver)) print("-"*30) # The BlackboxFunction signature is as follows: # BlackboxFunction(blackbox_func=None, # dataloader_func=None, # preprocess_func=None, # callback_func=None, # data=None, # **kwargs) # # - blackbox_func: a function pointer to the users loss function # - dataloader_func: a function pointer for handling dataloading. The function is called once before # optimizing. What it returns is passed as first argument to your loss functions # data argument. # - preprocess_func: a function pointer for data preprocessing. The function is called once before # optimizing and gets via kwargs['data'] the raw data object set directly or returned # from dataloader_func. What this function returns is then what is passed as first # argument to your loss function. # - callback_func: a function pointer called after each iteration. The input kwargs is a dictionary # keeping the parameters used in this iteration, the 'iteration' index, the 'loss' # and the 'status'. The function in this example is used for realtime printing it's # input but can also be used for realtime visualization. # - data: if not done via dataloader_func one can set a raw_data object directly # - kwargs: dict that whose content is passed to all functions above. from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score def my_dataloader_function(**kwargs): print("Dataloading...") # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) iris_data = load_iris() return [iris_data.data, iris_data.target] def my_preprocess_function(**kwargs): print("Preprocessing...") # kwargs['data'] allows accessing the input data print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n") # if the preprocessing function returns something, # the input data will be replaced with the data returned by this function. x = kwargs['data'][0] y = kwargs['data'][1] for i in range(x.shape[0]): x[i, :] += kwargs['params']['my_preproc_param'] return [x, y] def my_callback_function(**kwargs): print("\r{}".format(kwargs), end="") def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() # We now create the BlackboxFunction object and pass all function pointers defined above, # as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes. blackbox = BlackboxFunction(blackbox_func=my_loss_function, dataloader_func=my_dataloader_function, preprocess_func=my_preprocess_function, callback_func=my_callback_function, my_preproc_param=1, my_dataloader_input='could/be/a/path') # Last step, is we use our SolverPool which automatically returns the correct solver. # There are multiple ways to get the desired solver from the solver pool. # 1. solver = SolverPool.get('hyperopt') # solver.project = project # 2. solver = SolverPool.get('hyperopt', project) # 3. The SolverPool will look for the field 'use_solver' in the project instance, if # it is present it will be used to specify the solver so that in this case it is enough # to pass the project instance. solver = SolverPool.get(project=project) # Give the solver your blackbox and run it. After execution we can get the result # via get_result() which returns a pandas dataframe containing the complete history # The dict best contains the best parameter set. solver.blackbox = blackbox #solver.start_viewer() solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) diff --git a/hyppopy/solvers/OptunitySolver.py b/hyppopy/solvers/OptunitySolver.py index ff73b53..2c894af 100644 --- a/hyppopy/solvers/OptunitySolver.py +++ b/hyppopy/solvers/OptunitySolver.py @@ -1,98 +1,93 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import logging import optunity -import warnings from pprint import pformat from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from hyppopy.solvers.HyppopySolver import HyppopySolver class OptunitySolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) - self._solver_info = None - self.opt_trials = None def define_interface(self): self._add_member("max_iterations", int) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) def loss_function_call(self, params): for key in params.keys(): if self.project.get_typeof(key) is int: params[key] = int(round(params[key])) return self.blackbox(**params) def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) try: self.best, _, _ = optunity.minimize_structured(f=self.loss_function, num_evals=self.max_iterations, search_space=searchspace) except Exception as e: LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) def split_categorical(self, pdict): categorical = {} uniform = {} for name, pset in pdict.items(): for key, value in pset.items(): if key == 'domain' and value == 'categorical': categorical[name] = pset elif key == 'domain': uniform[name] = pset return categorical, uniform def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) - solution_space = {} # split input in categorical and non-categorical data cat, uni = self.split_categorical(hyperparameter) # build up dictionary keeping all non-categorical data uniforms = {} for key, value in uni.items(): for key2, value2 in value.items(): if key2 == 'data': if len(value2) == 3: uniforms[key] = value2[0:2] elif len(value2) == 2: uniforms[key] = value2 else: raise AssertionError("precondition violation, optunity searchspace needs list with left and right range bounds!") if len(cat) == 0: return uniforms # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} - tmp2 = {} + optunity_space = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level - tmp2[key] = tmp - inner_level = tmp2 - solution_space = tmp2 - return solution_space + optunity_space[key] = tmp + inner_level = optunity_space + return optunity_space