diff --git a/hyppopy/solvers/HyppopySolver.py b/hyppopy/solvers/HyppopySolver.py index aa80d61..30ba528 100644 --- a/hyppopy/solvers/HyppopySolver.py +++ b/hyppopy/solvers/HyppopySolver.py @@ -1,612 +1,594 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE from _pytest import deprecated from hyppopy import CandidateDescriptor __all__ = ['HyppopySolver'] import abc import copy import types import datetime import numpy as np import pandas as pd from hyperopt import Trials from hyppopy.globals import * from hyppopy.CandidateDescriptor import CandidateDescriptor from hyppopy.VisdomViewer import VisdomViewer from hyppopy.HyppopyProject import HyppopyProject from hyppopy.BlackboxFunction import BlackboxFunction from hyppopy.MPIBlackboxFunction import MPIBlackboxFunction from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyppopySolver(object): """ The HyppopySolver class is the base class for all solver addons. It defines virtual functions a child class has to implement to deal with the front-end communication, orchestrating the optimization process and ensuring a proper process information storing. The key idea is that the HyppopySolver class defines an interface to configure and run an object instance of itself independently from the concrete solver lib used to optimize in the background. To achieve this goal an addon - developer needs to implement the abstract methods 'convert_searchspace', 'execute_solver' and 'loss_function_call'. + developer needs to implement the abstract methods 'convert_searchspace', 'execute_solver'. These methods abstract the peculiarities of the solver libs to offer, on the user side, a simple and consistent parameter space configuration and optimization procedure. The method 'convert_searchspace' transforms the hyppopy - parameter space description into the solver lib specific description. The method loss_function_call is used to - handle solver lib specifics of calling the actual blackbox function and execute_solver is executed when the run - method is invoked und takes care of calling the solver lib solving routine. + parameter space description into the solver lib specific description. The methods loss_func_cand_preprocess and + loss_func_postprocess are used to handle solver lib specifics of calling the actual blackbox function and + execute_solver is executed when the run method is invoked und takes care of calling the solver lib solving routine. The class HyppopySolver defines an interface to be implemented when writing a custom solver. Each solver derivative needs to implement the abstract methods: - convert_searchspace - execute_solver - - loss_function_call - TODO - define_interface The dev-user interface consists of the methods: - _add_member - _add_hyperparameter_signature - _check_project The end-user interface consists of the methods: - run - get_results - print_best - print_timestats - start_viewer """ def __init__(self, project=None): """ The constructor accepts a HyppopyProject. :param project: [HyppopyProject] project instance, default=None """ self._idx = 0 # current iteration counter self._best = None # best parameter set self._trials = None # trials object, hyppopy uses the Trials object from hyperopt self._blackbox = None # blackbox function, eiter a function or a BlackboxFunction instance self._total_duration = None # keeps track of the solvers running time self._solver_overhead = None # stores the time overhead of the solver, means total time minus time in blackbox self._time_per_iteration = None # mean time per iterration self._accumulated_blackbox_time = None # summed time the solver was in the blackbox function self._visdom_viewer = None # visdom viewer instance self._child_members = {} # this dict keeps track of the settings the child solver defines self._hopt_signatures = {} # this dict keeps track of the hyperparameter signatures the child solver defines self.define_interface() # the child define interface function is called which defines settings and hyperparameter signatures if project is not None: self.project = project @abc.abstractmethod def convert_searchspace(self, hyperparameter): """ This function gets the unified hyppopy-like parameterspace description as input and, if necessary, should convert it into a solver lib specific format. The function is invoked when run is called and what it returns is passed as searchspace argument to the function execute_solver. :param hyperparameter: [dict] nested parameter description dict e.g. {'name': {'domain':'uniform', 'data':[0,1], 'type':'float'}, ...} :return: [object] converted hyperparameter space """ raise NotImplementedError('users must define convert_searchspace to use this class') @abc.abstractmethod def execute_solver(self, searchspace): """ This function is called immediately after convert_searchspace and get the output of the latter as input. It's purpose is to call the solver libs main optimization function. :param searchspace: converted hyperparameter space """ raise NotImplementedError('users must define execute_solver to use this class') - @abc.abstractmethod - def loss_function_call(self, params): # TODO: Delete me... - """ - This function is called within the function loss_function and encapsulates the actual blackbox function call - in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver - lib might need some special treatment between the parameter set selection and the calling of the actual blackbox - function, e.g. parameter converting. - - :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} - - :return: [float] loss - """ - - # TODO This is deprecated! Mark or remove... - raise NotImplementedError('users must define loss_function_call to use this class') - @abc.abstractmethod def loss_function_batch_call(self, candidates): # TODO: Delete me... """ TODO :param candidates: :return: """ # TODO This is deprecated! Mark or remove... raise NotImplementedError('users must define loss_function_batch_call to use this class') def loss_func_cand_preprocess(self, candidates): # TODO: Delete me... """ TODO :param candidates: :return: """ # User may implement this function to preprocess candidates before calling the actual loss_function # raise NotImplementedError('users must define loss_function_batch_call to use this class') return candidates def loss_func_postprocess(self, results): # TODO: Delete me... """ TODO :param candidates: :return: """ # User may implement this function to postprocess results after calling the actual loss_function # raise NotImplementedError('users must define loss_function_batch_call to use this class') return results @abc.abstractmethod def define_interface(self): """ This function is called when HyppopySolver.__init__ function finished. Child classes need to define their individual parameter here by calling the _add_member function for each class member variable need to be defined. Using _add_hyperparameter_signature the structure of a hyperparameter the solver expects must be defined. Both, members and hyperparameter signatures are later get checked, before executing the solver, ensuring settings passed fullfill solver needs. """ raise NotImplementedError('users must define define_interface to use this class') def _add_member(self, name, dtype, value=None, default=None): """ When designing your child solver class you need to implement the define_interface abstract method where you can call _add_member to define custom solver options that are automatically converted to class attributes. :param name: [str] option name :param dtype: [type] option data type :param value: [object] option value :param default: [object] option default value """ assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) if value is not None: assert isinstance(value, dtype), "precondition violation, value does not match dtype condition!" if default is not None: assert isinstance(default, dtype), "precondition violation, default does not match dtype condition!" setattr(self, name, value) self._child_members[name] = {"type": dtype, "value": value, "default": default} def _add_hyperparameter_signature(self, name, dtype, options=None): """ When designing your child solver class you need to implement the define_interface abstract method where you can call _add_hyperparameter_signature to define a hyperparamter signature which is automatically checked for consistency while solver execution. :param name: [str] hyperparameter name :param dtype: [type] hyperparameter data type :param options: [list] list of possible values the hp can be set, if None no option check is done """ assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) self._hopt_signatures[name] = {"type": dtype, "options": options} def _check_project(self): """ The function checks the members and hyperparameter signatures read from the project instance to be consistent with the members and signatures defined in the child class via define_interface. """ assert isinstance(self.project, HyppopyProject), "Invalid project instance, either not set or setting failed!" # check hyperparameter signatures for name, param in self.project.hyperparameter.items(): for sig, settings in self._hopt_signatures.items(): if sig not in param.keys(): msg = "Missing hyperparameter signature {}!".format(sig) LOG.error(msg) raise LookupError(msg) else: if not isinstance(param[sig], settings["type"]): msg = "Hyperparameter signature type mismatch, expected type {} got {}!".format(settings["type"], param[sig]) LOG.error(msg) raise TypeError(msg) if settings["options"] is not None: if param[sig] not in settings["options"]: msg = "Wrong signature value, {} not found in signature options!".format(param[sig]) LOG.error(msg) raise LookupError(msg) # check child members for name in self._child_members.keys(): if name not in self.project.__dict__.keys(): msg = "missing settings field {}!".format(name) LOG.error(msg) raise LookupError(msg) self.__dict__[name] = self.project.settings[name] def __compute_time_statistics(self): """ Evaluates all timestatistic values available """ dts = [] for trial in self._trials.trials: if 'book_time' in trial.keys() and 'refresh_time' in trial.keys(): dt = trial['refresh_time'] - trial['book_time'] dts.append(dt.total_seconds()) self._time_per_iteration = np.mean(dts) * 1e3 self._accumulated_blackbox_time = np.sum(dts) * 1e3 tmp = self.total_duration - self._accumulated_blackbox_time self._solver_overhead = int(np.round(100.0 / (self.total_duration + 1e-12) * tmp)) def loss_function(self, **params): """ This function is called each iteration with a selected parameter set. The parameter set selection is driven by the solver lib itself. This function just calls loss_function_batch() with a batch size of one. It takes care of converting the params to CandidateDescriptors. :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} :return: [float] loss """ newCandidate = CandidateDescriptor(**params) results = self.loss_function_batch([newCandidate]) return list(results.values())[0]['loss'] # Here 'results' will always contain a single dict. We extract the loss from it and return it. def loss_function_batch(self, candidates): """ This function is called with a list of candidates. This list is driven by the solver lib itself. The purpose of this function is to take care of the iteration reporting and the calling of the callback_func if available. As a developer you might want to overwrite this function (or the 'non-batch'-version completely (e.g. - HyperoptSolver) but then you need to take care for iteration reporting for yourself. The alternative is to only - implement loss_function_call (e.g. OptunitySolver). + HyperoptSolver). :param candidates: [list of CandidateDescriptors] :return: [dict] result e.g. {'loss': 0.5, 'book_time': ..., 'refresh_time': ...} """ # print('hello'*10) results = dict() try: candidates = self.loss_func_cand_preprocess(candidates) results = self.blackbox.call_batch(candidates) if results is None: results = np.nan results = self.loss_func_postprocess(results) except ZeroDivisionError as e: # Fallback: If call_batch is not supported in BlackboxFunction, we iterate over the candidates in the batch. message = "Script not started via MPI:\n {}".format(e) LOG.error(message) print(message) except Exception as e: message = "call_batch not supported in BlackboxFunction:\n {}".format(e) LOG.error(message) print(message) finally: for i, candidate in enumerate(candidates): cand_id = candidate.ID # params = candidate.get_values() cand_results = dict() cand_results['book_time'] = datetime.datetime.now() try: preprocessed_candidate_list = self.loss_func_cand_preprocess([candidate]) candidate = preprocessed_candidate_list[0] params = candidate.get_values() try: loss = self.blackbox(**params) except: loss = self.blackbox(params) if loss is None: loss = np.nan cand_results['loss'] = loss except Exception as e: LOG.error("computing loss failed due to:\n {}".format(e)) cand_results['loss'] = np.nan cand_results['refresh_time'] = datetime.datetime.now() results[cand_id] = cand_results results = self.loss_func_postprocess(results) # initialize trials for i, candidate in enumerate(candidates): self._idx += 1 vals = {} idx = {} for key in candidate.keys(): vals[key] = [candidate[key]] idx[key] = [self._idx] trial = {'tid': self._idx, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._idx, 'idxs': idx, 'vals': vals }, 'book_time': results[candidate.ID]['book_time'], 'refresh_time': results[candidate.ID]['refresh_time'] } try: loss = results[candidate.ID]['loss'] trial['result']['loss'] = loss trial['result']['status'] = 'ok' if loss is np.nan: trial['result']['status'] = 'failed' except Exception as e: LOG.error("computing loss failed due to:\n {}".format(e)) loss = np.nan trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' self._trials.trials.append(trial) cbd = copy.deepcopy(candidate.get_values()) cbd['iterations'] = self._idx cbd['loss'] = loss cbd['status'] = trial['result']['status'] cbd['book_time'] = trial['book_time'] cbd['refresh_time'] = trial['refresh_time'] if (isinstance(self.blackbox, BlackboxFunction) or isinstance(self.blackbox, MPIBlackboxFunction)) and self.blackbox.callback_func is not None: self.blackbox.callback_func(**cbd) return results def run(self, print_stats=True): """ This function starts the optimization process. :param print_stats: [bool] en- or disable console output """ self._idx = 0 self.trials = Trials() start_time = datetime.datetime.now() try: search_space = self.convert_searchspace(self.project.hyperparameter) except Exception as e: msg = "Failed to convert searchspace, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) try: self.execute_solver(search_space) except Exception as e: msg = "Failed to execute solver, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) end_time = datetime.datetime.now() dt = end_time - start_time days = divmod(dt.total_seconds(), 86400) hours = divmod(days[1], 3600) minutes = divmod(hours[1], 60) seconds = divmod(minutes[1], 1) milliseconds = divmod(seconds[1], 0.001) self._total_duration = [int(days[0]), int(hours[0]), int(minutes[0]), int(seconds[0]), int(milliseconds[0])] if print_stats: self.print_best() self.print_timestats() def get_results(self): """ This function returns a complete optimization history as pandas DataFrame and a dict with the optimal parameter set. :return: [DataFrame], [dict] history and optimal parameter set """ assert isinstance(self.trials, Trials), "precondition violation, wrong trials type! Maybe solver was not yet executed?" results = {'duration': [], 'losses': [], 'status': []} pset = self.trials.trials[0]['misc']['vals'] for p in pset.keys(): results[p] = [] for n, trial in enumerate(self.trials.trials): t1 = trial['book_time'] t2 = trial['refresh_time'] results['duration'].append((t2 - t1).microseconds / 1000.0) results['losses'].append(trial['result']['loss']) results['status'].append(trial['result']['status'] == 'ok') losses = np.array(results['losses']) results['losses'] = list(losses) pset = trial['misc']['vals'] for p in pset.items(): results[p[0]].append(p[1][0]) return pd.DataFrame.from_dict(results), self.best def print_best(self): """ Optimization result console output printing. """ print("\n") print("#" * 40) print("### Best Parameter Choice ###") print("#" * 40) for name, value in self.best.items(): print(" - {}\t:\t{}".format(name, value)) print("\n - number of iterations\t:\t{}".format(self.trials.trials[-1]['tid']+1)) print(" - total time\t:\t{}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) def print_timestats(self): """ Time statistic console output printing. """ print("\n") print("#" * 40) print("### Timing Statistics ###") print("#" * 40) print(" - per iteration: {}ms".format(int(self.time_per_iteration*1e4)/10000)) print(" - total time: {}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) print(" - solver overhead: {}%".format(self.solver_overhead)) def start_viewer(self, port=8097, server="http://localhost"): """ Starts the visdom viewer. :param port: [int] port number, default: 8097 :param server: [str] server name, default: http://localhost """ try: self._visdom_viewer = VisdomViewer(self._project, port, server) except Exception as e: import warnings warnings.warn("Failed starting VisdomViewer. Is the server running? If not start it via $visdom") LOG.error("Failed starting VisdomViewer: {}".format(e)) self._visdom_viewer = None @property def project(self): """ HyppopyProject instance :return: [HyppopyProject] project instance """ return self._project @project.setter def project(self, value): """ Set HyppopyProject instance :param value: [HyppopyProject] project instance """ if isinstance(value, dict): self._project = HyppopyProject(value) elif isinstance(value, HyppopyProject): self._project = value else: msg = "Input error, project_manager of type: {} not allowed!".format(type(value)) LOG.error(msg) raise TypeError(msg) self._check_project() @property def blackbox(self): """ Get the BlackboxFunction object. :return: [object] BlackboxFunction instance or function """ return self._blackbox @blackbox.setter def blackbox(self, value): """ Set the BlackboxFunction wrapper class encapsulating the loss function or a function accepting a hyperparameter set and returning a float. :return: [object] pointer to blackbox_func """ if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction) or isinstance(value, FunctionSimulator) or isinstance(value, MPIBlackboxFunction): self._blackbox = value else: self._blackbox = None msg = "Input error, blackbox of type: {} not allowed!".format(type(value)) LOG.error(msg) raise TypeError(msg) @property def best(self): """ Returns best parameter set. :return: [dict] best parameter set """ return self._best @best.setter def best(self, value): """ Set the best parameter set. :param value: [dict] best parameter set """ if not isinstance(value, dict): msg = "Input error, best of type: {} not allowed!".format(type(value)) LOG.error(msg) raise TypeError(msg) self._best = value @property def trials(self): """ Get the Trials instance. :return: [object] Trials instance """ return self._trials @trials.setter def trials(self, value): """ Set the Trials object. :param value: [object] Trials instance """ self._trials = value @property def total_duration(self): """ Get total computation duration. :return: [float] total computation time """ return (self._total_duration[0]*86400 + self._total_duration[1] * 3600 + self._total_duration[2] * 60 + self._total_duration[3]) * 1000 + self._total_duration[4] @property def solver_overhead(self): """ Get the solver overhead, this is the total time minus the duration of the blackbox function calls. :return: [float] solver overhead duration """ if self._solver_overhead is None: self.__compute_time_statistics() return self._solver_overhead @property def time_per_iteration(self): """ Get the mean duration per iteration. :return: [float] time per iteration """ if self._time_per_iteration is None: self.__compute_time_statistics() return self._time_per_iteration @property def accumulated_blackbox_time(self): """ Get the summed blackbox function computation time. :return: [float] blackbox function computation time """ if self._accumulated_blackbox_time is None: self.__compute_time_statistics() return self._accumulated_blackbox_time diff --git a/hyppopy/solvers/OptunaSolver.py b/hyppopy/solvers/OptunaSolver.py index 677f580..126f455 100644 --- a/hyppopy/solvers/OptunaSolver.py +++ b/hyppopy/solvers/OptunaSolver.py @@ -1,118 +1,150 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import optuna import logging import warnings import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL from hyppopy.solvers.HyppopySolver import HyppopySolver +from hyppopy.CandidateDescriptor import CandidateDescriptor + LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class OptunaSolver(HyppopySolver): def __init__(self, project=None): """ The constructor accepts a HyppopyProject. :param project: [HyppopyProject] project instance, default=None """ HyppopySolver.__init__(self, project) self._searchspace = None + self.candidates_list = list() def define_interface(self): """ This function is called when HyppopySolver.__init__ function finished. Child classes need to define their individual parameter here by calling the _add_member function for each class member variable need to be defined. Using _add_hyperparameter_signature the structure of a hyperparameter the solver expects must be defined. Both, members and hyperparameter signatures are later get checked, before executing the solver, ensuring settings passed fullfill solver needs. """ self._add_member("max_iterations", int) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) + def get_candidates(self, trial=None): + """ + This function converts the searchspace to a candidate_list that can then be used to distribute via MPI. + + :param searchspace: converted hyperparameter space + """ + + candidates_list = list() + N = self.max_iterations + for n in range(N): + print(n) + # Todo: Ugly hack that does not even work... + from optuna import trial as trial_module + # temp_study = optuna.create_study() + trial_id = self.study._storage.create_new_trial_id(0) + trial = trial_module.Trial(self.study, trial_id) + ## trial.report(result) + ## self._storage.set_trial_state(trial_id, structs.TrialState.COMPLETE) + ## self._log_completed_trial(trial_number, result) + + params = {} + for name, param in self._searchspace.items(): + if param["domain"] == "categorical": + params[name] = trial.suggest_categorical(name, param["data"]) + else: + params[name] = trial.suggest_uniform(name, param["data"][0], param["data"][1]) + candidates_list.append(CandidateDescriptor(**params)) + + return candidates_list + + N = self.max_iterations + for n in range(N): + params = {} + for name, param in self._searchspace.items(): + if param["domain"] == "categorical": + params[name] = trial.suggest_categorical(name, param["data"]) + else: + params[name] = trial.suggest_uniform(name, param["data"][0], param["data"][1]) + candidates_list.append(CandidateDescriptor(**params)) + + return candidates_list + def trial_cache(self, trial): """ Optuna specific loss function wrapper :param trial: [Trial] instance :return: [function] loss function """ + params = {} + for name, param in self._searchspace.items(): if param["domain"] == "categorical": params[name] = trial.suggest_categorical(name, param["data"]) else: params[name] = trial.suggest_uniform(name, param["data"][0], param["data"][1]) - return self.loss_function(**params) - - def loss_function_call(self, params): - """ - This function is called within the function loss_function and encapsulates the actual blackbox function call - in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver - lib might need some special treatment between the parameter set selection and the calling of the actual blackbox - function, e.g. parameter converting. - :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} - - :return: [float] loss - """ - for key in params.keys(): - if self.project.get_typeof(key) is int: - params[key] = int(round(params[key])) - return self.blackbox(**params) + return self.loss_function(**params) def execute_solver(self, searchspace): """ This function is called immediately after convert_searchspace and get the output of the latter as input. It's purpose is to call the solver libs main optimization function. :param searchspace: converted hyperparameter space """ LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self._searchspace = searchspace try: study = optuna.create_study() study.optimize(self.trial_cache, n_trials=self.max_iterations) self.best = study.best_trial.params except Exception as e: LOG.error("internal error in bayes_opt maximize occured. {}".format(e)) raise BrokenPipeError("internal error in bayes_opt maximize occured. {}".format(e)) def convert_searchspace(self, hyperparameter): """ This function gets the unified hyppopy-like parameterspace description as input and, if necessary, should convert it into a solver lib specific format. The function is invoked when run is called and what it returns is passed as searchspace argument to the function execute_solver. :param hyperparameter: [dict] nested parameter description dict e.g. {'name': {'domain':'uniform', 'data':[0,1], 'type':'float'}, ...} :return: [object] converted hyperparameter space """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) for name, param in hyperparameter.items(): if param["domain"] != "categorical" and param["domain"] != "uniform": msg = "Warning: Optuna cannot handle {} domain. Only uniform and categorical domains are supported!".format(param["domain"]) warnings.warn(msg) LOG.warning(msg) return hyperparameter diff --git a/hyppopy/solvers/OptunitySolver.py b/hyppopy/solvers/OptunitySolver.py index 858af11..bfb04b8 100644 --- a/hyppopy/solvers/OptunitySolver.py +++ b/hyppopy/solvers/OptunitySolver.py @@ -1,203 +1,123 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import logging import optunity from pprint import pformat from hyppopy.CandidateDescriptor import CandidateDescriptor, CandicateDescriptorWrapper from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from hyppopy.solvers.HyppopySolver import HyppopySolver class OptunitySolver(HyppopySolver): def __init__(self, project=None): """ The constructor accepts a HyppopyProject. :param project: [HyppopyProject] project instance, default=None """ HyppopySolver.__init__(self, project) def define_interface(self): """ This function is called when HyppopySolver.__init__ function finished. Child classes need to define their individual parameter here by calling the _add_member function for each class member variable need to be defined. Using _add_hyperparameter_signature the structure of a hyperparameter the solver expects must be defined. Both, members and hyperparameter signatures are later get checked, before executing the solver, ensuring settings passed fullfill solver needs. """ self._add_member("max_iterations", int) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) - def loss_function_call(self, params): - """ - This function is called within the function loss_function and encapsulates the actual blackbox function call - in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver - lib might need some special treatment between the parameter set selection and the calling of the actual blackbox - function, e.g. parameter converting. - - :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} - - :return: [float] loss - """ - for key in params.keys(): - if self.project.get_typeof(key) is int: - params[key] = int(round(params[key])) - return self.blackbox(**params) - - def loss_function_batch(self, **candidates): - """ - This function is called with a list of candidates. This list is driven by the solver lib itself. - The purpose of this function is to take care of the iteration reporting and the calling - of the callback_func if available. As a developer you might want to overwrite this function (or the 'non-batch'-version completely (e.g. - HyperoptSolver) but then you need to take care for iteration reporting for yourself. The alternative is to only - implement loss_function_call (e.g. OptunitySolver). - - :param candidates: [list of CandidateDescriptors] - - :return: [dict] result e.g. {'loss': 0.5, 'book_time': ..., 'refresh_time': ...} - """ - - candidate_list = [] - - keysValue = candidates.keys() - temp = {} - for key in keysValue: - temp[key] = candidates[key].get() - - for i, pack in enumerate(zip(*temp.values())): - candidate_list.append(CandidateDescriptor(**(dict(zip(keysValue, pack))))) - - results = super(OptunitySolver, self).loss_function_batch(candidate_list) - try: - self.best = self._trials.argmin - except: - pass - - result = [x['loss'] for x in results.values()] - return result - - def hyppopy_optunity_solver_pmap(self, f, seq): - # Check if seq is empty. I so, return an empty result list. - if len(seq) == 0: - return [] - - candidates = [] - for elem in seq: - can = CandidateDescriptor(**elem) - candidates.append(can) - - cand_list = CandicateDescriptorWrapper(keys=seq[0].keys()) - cand_list.set(candidates) - - f_result = f(cand_list) - - # If one candidate does not match the constraints, f() returns a single default value. - # This is a problem as all the other candidates are not calculated either. - # The following is a workaround. We split the candidate_list into 2 lists and call the map function recursively until all valid parameters are processed. - if not isinstance(f_result, list): - # First half - seq_A = seq[:len(seq) // 2] - temp_result_a = self.hyppopy_optunity_solver_pmap(f, seq_A) - - seq_B = seq[len(seq) // 2:] - temp_result_b = self.hyppopy_optunity_solver_pmap(f, seq_B) - # f_result = [42] - - f_result = temp_result_a + temp_result_b - - return f_result - def execute_solver(self, searchspace): """ This function is called immediately after convert_searchspace and get the output of the latter as input. It's purpose is to call the solver libs main optimization function. :param searchspace: converted hyperparameter space """ LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) try: - optunity.minimize_structured(f=self.loss_function_batch, + self.best, _, _ = optunity.minimize_structured(f=self.loss_function, num_evals=self.max_iterations, - search_space=searchspace, - pmap=self.hyppopy_optunity_solver_pmap) + search_space=searchspace) except Exception as e: LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) def split_categorical(self, pdict): """ This function splits the incoming dict into two parts, categorical only entries and other. :param pdict: [dict] input parameter description dict :return: [dict],[dict] categorical only, others """ categorical = {} uniform = {} for name, pset in pdict.items(): for key, value in pset.items(): if key == 'domain' and value == 'categorical': categorical[name] = pset elif key == 'domain': uniform[name] = pset return categorical, uniform def convert_searchspace(self, hyperparameter): """ This function gets the unified hyppopy-like parameterspace description as input and, if necessary, should convert it into a solver lib specific format. The function is invoked when run is called and what it returns is passed as searchspace argument to the function execute_solver. :param hyperparameter: [dict] nested parameter description dict e.g. {'name': {'domain':'uniform', 'data':[0,1], 'type':'float'}, ...} :return: [object] converted hyperparameter space """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) # split input in categorical and non-categorical data cat, uni = self.split_categorical(hyperparameter) # build up dictionary keeping all non-categorical data uniforms = {} for key, value in uni.items(): for key2, value2 in value.items(): if key2 == 'data': if len(value2) == 3: uniforms[key] = value2[0:2] elif len(value2) == 2: uniforms[key] = value2 else: raise AssertionError("precondition violation, optunity searchspace needs list with left and right range bounds!") if len(cat) == 0: return uniforms # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} optunity_space = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level optunity_space[key] = tmp inner_level = optunity_space return optunity_space diff --git a/hyppopy/solvers/QuasiRandomsearchSolver.py b/hyppopy/solvers/QuasiRandomsearchSolver.py index 5e1a3cf..4a7e5c4 100644 --- a/hyppopy/solvers/QuasiRandomsearchSolver.py +++ b/hyppopy/solvers/QuasiRandomsearchSolver.py @@ -1,238 +1,222 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE __all__ = ['HaltonSequenceGenerator', 'QuasiRandomSampleGenerator', 'QuasiRandomsearchSolver'] import os import logging import warnings import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL from hyppopy.solvers.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HaltonSequenceGenerator(object): """ This class generates Halton sequences (https://en.wikipedia.org/wiki/Halton_sequence). The class needs a total number of samples and the number of dimensions to generate a quasirandom sequence for each axis. The method get_unit_space returns a sequence list with N_samples for each axis representing N_samples vectors on a unit sphere. """ def __init__(self): pass def __next_prime(self): """ Checks if num is a prime value """ def is_prime(num): for i in range(2, int(num ** 0.5) + 1): if (num % i) == 0: return False return True prime = 3 while 1: if is_prime(prime): yield prime prime += 2 def __vdc(self, n, base): vdc, denom = 0, 1 while n: denom *= base n, remainder = divmod(n, base) vdc += remainder / float(denom) return vdc def get_unit_space(self, N_samples, N_dims): """ Returns a unit space in form of a sequence list keeping N_dims sequences with N_sample samplings. Each sample represents a N_dims dimensional vector on a unit sphere. :param N_samples: [int] Number of samples :param N_dims: [int] Number of dimensions :return: [list] samples list of length N_dims keeping lists each of length N_samples """ seq = [] primeGen = self.__next_prime() next(primeGen) for d in range(N_dims): base = next(primeGen) seq.append([self.__vdc(i, base) for i in range(N_samples)]) return seq class QuasiRandomSampleGenerator(object): """ This class takes care of the hyperparameter space creation and next sample delivery. """ def __init__(self, N_samples=None): self._axis = None self._samples = [] self._numerical = [] self._categorical = [] self._N_samples = N_samples def set_axis(self, name, data, domain, dtype): """ Add an axis description. :param name: [str] axis name :param data: [list] axis range [min, max] :param domain: [str] axis domain :param dtype: [type] axis data type """ if domain == "categorical": if dtype is int: data = [int(i) for i in data] elif dtype is str: data = [str(i) for i in data] elif dtype is float: data = [float(i) for i in data] self._categorical.append({"name": name, "data": data, "type": dtype}) else: self._numerical.append({"name": name, "data": data, "type": dtype, "domain": domain}) def generate_samples(self, N_samples=None): """ This function is called once when the first sample is requested. It generates the halton sequence space. :param N_samples: [int] number of samples """ self._axis = [] if N_samples is None: assert isinstance(self._N_samples, int), "Precondition violation, no number of samples specified!" else: self._N_samples = N_samples axis_samples = {} if len(self._numerical) > 0: generator = HaltonSequenceGenerator() unit_space = generator.get_unit_space(self._N_samples, len(self._numerical)) for n, axis in enumerate(self._numerical): width = abs(axis["data"][1] - axis["data"][0]) unit_space[n] = [x * width for x in unit_space[n]] unit_space[n] = [x + axis["data"][0] for x in unit_space[n]] if axis["type"] is int: unit_space[n] = [int(round(x)) for x in unit_space[n]] axis_samples[axis["name"]] = unit_space[n] else: warnings.warn("No numerical axis defined, this warning can be ignored if searchspace is categorical only, otherwise check if axis was set!") for n in range(self._N_samples): sample = {} for name, data in axis_samples.items(): sample[name] = data[n] for cat in self._categorical: choice = np.random.choice(len(cat["data"]), 1)[0] sample[cat["name"]] = cat["data"][choice] self._samples.append(sample) def next(self): """ Returns the next sample. Returns None if all samples are requested. :return: [dict] sample dict {'name':value, ...} """ if len(self._samples) == 0: self.generate_samples() if len(self._samples) == 0: return None next_index = np.random.choice(len(self._samples), 1)[0] sample = self._samples.pop(next_index) return sample class QuasiRandomsearchSolver(HyppopySolver): """ The QuasiRandomsearchSolver class implements a quasi randomsearch optimization. The quasi randomsearch supports categorical and uniform sampling. The solver defines a Halton Sequence distributed hyperparameter space. This means a rather evenly distributed space sampling but no real randomness. """ def __init__(self, project=None): """ The constructor accepts a HyppopyProject. :param project: [HyppopyProject] project instance, default=None """ HyppopySolver.__init__(self, project) self._sampler = None def define_interface(self): """ This function is called when HyppopySolver.__init__ function finished. Child classes need to define their individual parameter here by calling the _add_member function for each class member variable need to be defined. Using _add_hyperparameter_signature the structure of a hyperparameter the solver expects must be defined. Both, members and hyperparameter signatures are later get checked, before executing the solver, ensuring settings passed fullfill solver needs. """ self._add_member("max_iterations", int) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) - def loss_function_call(self, params): - """ - This function is called within the function loss_function and encapsulates the actual blackbox function call - in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver - lib might need some special treatment between the parameter set selection and the calling of the actual blackbox - function, e.g. parameter converting. - - :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} - - :return: [float] loss - """ - loss = self.blackbox(**params) - if loss is None: - return np.nan - return loss - def execute_solver(self, searchspace): """ This function is called immediately after convert_searchspace and get the output of the latter as input. It's purpose is to call the solver libs main optimization function. :param searchspace: converted hyperparameter space """ N = self.max_iterations self._sampler = QuasiRandomSampleGenerator(N) for name, axis in searchspace.items(): self._sampler.set_axis(name, axis["data"], axis["domain"], axis["type"]) try: for n in range(N): params = self._sampler.next() if params is None: break self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): """ This function gets the unified hyppopy-like parameterspace description as input and, if necessary, should convert it into a solver lib specific format. The function is invoked when run is called and what it returns is passed as searchspace argument to the function execute_solver. :param hyperparameter: [dict] nested parameter description dict e.g. {'name': {'domain':'uniform', 'data':[0,1], 'type':'float'}, ...} :return: [object] converted hyperparameter space """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) return hyperparameter diff --git a/hyppopy/tests/test_hyppopysolver.py b/hyppopy/tests/test_hyppopysolver.py index adf43fa..1a24a96 100644 --- a/hyppopy/tests/test_hyppopysolver.py +++ b/hyppopy/tests/test_hyppopysolver.py @@ -1,381 +1,344 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import unittest from hyppopy.HyppopyProject import HyppopyProject from hyppopy.solvers.HyppopySolver import HyppopySolver class FooSolver1(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None class FooSolver2(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass class FooSolver3(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass def define_interface(self): pass class FooSolver4(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass def define_interface(self): pass - def loss_function_call(self, params): - return 1 - class GooSolver1(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass def define_interface(self): self._add_member("max_iterations", int, 1.0, 100) - def loss_function_call(self, params): - return 1 - def execute_solver(self, searchspace): pass class GooSolver2(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass def define_interface(self): self._add_member("max_iterations", int, 100, 5.0) - def loss_function_call(self, params): - return 1 - def execute_solver(self, searchspace): pass class TestSolver1(HyppopySolver): def __init__(self, project=None): config = { "hyperparameter": { "gamma": { "domain": "uniform", "data": [0.0001, 20.0], "type": float }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str } }, "foo1": 300, "goo": 1.0 } project = HyppopyProject(config) HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass - def loss_function_call(self, params): - return 1 - def execute_solver(self, searchspace): pass def define_interface(self): self._add_member("foo", int) self._add_member("goo", float) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) class TestSolver2(HyppopySolver): def __init__(self, project=None): config = { "hyperparameter": { "gamma": { "domain": "normal", "data": [0.0001, 20.0], "type": float }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str } }, "foo": 300, "goo": 1.0 } project = HyppopyProject(config) HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass - def loss_function_call(self, params): - return 1 - def execute_solver(self, searchspace): pass def define_interface(self): self._add_member("foo", int) self._add_member("goo", float) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) class TestSolver3(HyppopySolver): def __init__(self, project=None): config = { "hyperparameter": { "gamma": { "domain": 100, "data": [0.0001, 20.0], "type": float }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str } }, "foo": 300, "goo": 1.0 } project = HyppopyProject(config) HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass - def loss_function_call(self, params): - return 1 - def execute_solver(self, searchspace): pass def define_interface(self): self._add_member("foo", int) self._add_member("goo", float) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) class TestSolver4(HyppopySolver): def __init__(self, project=None): config = { "hyperparameter": { "gamma": { "domina": "uniform", "data": [0.0001, 20.0], "type": float }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str } }, "foo": 300, "goo": 1.0 } project = HyppopyProject(config) HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass - def loss_function_call(self, params): - return 1 - def execute_solver(self, searchspace): pass def define_interface(self): self._add_member("foo", int) self._add_member("goo", float) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) class TestRunSolver1(HyppopySolver): def __init__(self, project=None): project = HyppopyProject({}) HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): raise EnvironmentError("ForTesting") - def loss_function_call(self, params): - return 1 - def execute_solver(self, searchspace): pass def define_interface(self): pass class TestRunSolver2(HyppopySolver): def __init__(self, project=None): project = HyppopyProject({}) HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass - def loss_function_call(self, params): - return 1 - def execute_solver(self, searchspace): raise EnvironmentError("ForTesting") def define_interface(self): pass class TestLossFuncSolver1(HyppopySolver): def __init__(self, project=None): project = HyppopyProject({}) HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass - def loss_function_call(self, params): - raise Exception("For testing") - def execute_solver(self, searchspace): self.loss_function(**{}) def define_interface(self): pass class TestLossFuncSolver2(HyppopySolver): def __init__(self, project=None): project = HyppopyProject({}) HyppopySolver.__init__(self, project) self._searchspace = None def convert_searchspace(self, hyperparameter): pass - def loss_function_call(self, params): - from numpy import nan as npnan - return npnan - def execute_solver(self, searchspace): self.loss_function(**{}) def define_interface(self): pass class HyppopySolverTestSuite(unittest.TestCase): def setUp(self): pass def test_class(self): self.assertRaises(NotImplementedError, HyppopySolver) self.assertRaises(NotImplementedError, FooSolver1) self.assertRaises(NotImplementedError, FooSolver2) - foo = FooSolver3() - self.assertRaises(NotImplementedError, foo.loss_function_call, {}) foo = FooSolver4() - self.assertEqual(foo.loss_function_call({}), 1) self.assertRaises(NotImplementedError, foo.execute_solver, {}) self.assertRaises(AssertionError, GooSolver1) self.assertRaises(AssertionError, GooSolver2) def test_check_project(self): self.assertRaises(LookupError, TestSolver1) self.assertRaises(LookupError, TestSolver2) self.assertRaises(TypeError, TestSolver3) self.assertRaises(LookupError, TestSolver4) def test_run(self): solver = TestRunSolver1() self.assertRaises(AssertionError, solver.run) solver = TestRunSolver2() self.assertRaises(AssertionError, solver.run) self.assertRaises(TypeError, solver.project, 100) self.assertRaises(TypeError, solver.blackbox, 100) self.assertRaises(TypeError, solver.best, 100) def test_lossfunccall(self): TestLossFuncSolver1().run(print_stats=False) TestLossFuncSolver2().run(print_stats=False)