diff --git a/hyppopy/MPIBlackboxFunction.py b/hyppopy/MPIBlackboxFunction.py index 182a46d..1bd583a 100644 --- a/hyppopy/MPIBlackboxFunction.py +++ b/hyppopy/MPIBlackboxFunction.py @@ -1,167 +1,157 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE __all__ = ['MPIBlackboxFunction'] import os import logging import functools from hyppopy.globals import DEBUGLEVEL, MPI_TAGS from mpi4py import MPI LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def default_kwargs(**defaultKwargs): """ Decorator defining default args in **kwargs arguments """ def actual_decorator(fn): @functools.wraps(fn) def g(*args, **kwargs): defaultKwargs.update(kwargs) return fn(*args, **defaultKwargs) return g return actual_decorator class MPIBlackboxFunction(object): """ - This class is a BlackboxFunction wrapper class encapsulating the loss function. Additional function pointer can be - set to get access at different pipelining steps: - - - dataloader_func: data loading, the function must return a data object and is called first when the solver is executed. - The data object returned will be the input of the blackbox function. - - preprocess_func: data preprocessing is called after dataloader_func, the functions signature must be foo(data, params) - and must return a data object. The input is the data object set directly or via dataloader_func, - the params are passed from constructor params. - - callback_func: this function is called at each iteration step getting passed the trail info content, can be used for - custom visualization - - data: add a data object directly - + This class is a BlackboxFunction wrapper class encapsulating the loss function. + # TODO: complete class documentation The constructor accepts several function pointers or a data object which are all None by default (see below). Additionally one can define an arbitrary number of arg pairs. These are passed as input to each function pointer as arguments. :param dataloader_func: data loading function pointer, default=None :param preprocess_func: data preprocessing function pointer, default=None :param callback_func: callback function pointer, default=None :param data: data object, default=None :param kwargs: additional arg=value pairs """ @default_kwargs(blackbox_func=None, dataloader_func=None, preprocess_func=None, callback_func=None, data=None) def __init__(self, **kwargs): self._blackbox_func = None self._preprocess_func = None self._dataloader_func = None self._callback_func = None self._raw_data = None self._data = None self.setup(kwargs) def __call__(self, **kwargs): """ Call method calls blackbox_func passing the data object and the args passed :param kwargs: [dict] args :return: blackbox_func(data, kwargs) """ return self.blackbox_func(self.data, kwargs) @staticmethod def call_batch(candidates): results = dict() size = MPI.COMM_WORLD.Get_size() for i, candidate in enumerate(candidates): dest = (i % (size-1)) + 1 MPI.COMM_WORLD.send(candidate, dest=dest, tag=MPI_TAGS.MPI_SEND_CANDIDATE.value) while True: for i in range(size - 1): if len(candidates) == len(results): print('All results received!') return results cand_id, result_dict = MPI.COMM_WORLD.recv(source=i + 1, tag=MPI_TAGS.MPI_SEND_RESULTS.value) results[cand_id] = result_dict def setup(self, kwargs): """ Alternative to Constructor, kwargs signature see __init__ :param kwargs: (see __init__) """ self._blackbox_func = kwargs['blackbox_func'] del kwargs['blackbox_func'] @property def blackbox_func(self): """ BlackboxFunction wrapper class encapsulating the loss function or a function accepting a hyperparameter set and returning a float. :return: [object] pointer to blackbox_func """ return self._blackbox_func @property def preprocess_func(self): """ Data preprocessing is called after dataloader_func, the functions signature must be foo(data, params) and must return a data object. The input is the data object set directly or via dataloader_func, the params are passed from constructor params. :return: [object] preprocess_func """ return self._preprocess_func @property def dataloader_func(self): """ Data loading, the function must return a data object and is called first when the solver is executed. The data object returned will be the input of the blackbox function. :return: [object] dataloader_func """ return self._dataloader_func @property def callback_func(self): """ This function is called at each iteration step getting passed the trail info content, can be used for custom visualization :return: [object] callback_func """ return self._callback_func @property def raw_data(self): """ This data structure is used to store the return from dataloader_func to serve as input for preprocess_func if available. :return: [object] raw_data """ return self._raw_data @property def data(self): """ Datastructure keeping the input data. :return: [object] data """ return self._data diff --git a/hyppopy/solvers/GridsearchSolver.py b/hyppopy/solvers/GridsearchSolver.py index 1ad7650..f11df27 100644 --- a/hyppopy/solvers/GridsearchSolver.py +++ b/hyppopy/solvers/GridsearchSolver.py @@ -1,276 +1,234 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import logging import warnings import numpy as np from pprint import pformat from scipy.stats import norm from itertools import product from hyppopy.globals import DEBUGLEVEL, DEFAULTGRIDFREQUENCY from hyppopy.solvers.HyppopySolver import HyppopySolver from hyppopy.CandidateDescriptor import CandidateDescriptor LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def get_uniform_axis_sample(a, b, N, dtype): """ Returns a uniform sample x(n) in the range [a,b] sampled at N pojnts :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" if dtype is int: return list(np.linspace(a, b, N).astype(int)) elif dtype is float: return list(np.linspace(a, b, N)) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) def get_norm_cdf(N): """ Returns a normed gaussian cdf (range [0,1]) with N sampling points :param N: sampling points :return: [ndarray] gaussian cdf function values """ assert isinstance(N, int), "condition N of type int violated!" even = True if N % 2 != 0: N -= 1 even = False N = int(N/2) sigma = 1/3 x = np.linspace(0, 1, N) y1 = norm.cdf(x, loc=0, scale=sigma)-0.5 if not even: y1 = np.append(y1, [0.5]) y2 = 1-(norm.cdf(x, loc=0, scale=sigma)-0.5) y2 = np.flip(y2, axis=0) y = np.concatenate((y1, y2), axis=0) return y def get_gaussian_axis_sample(a, b, N, dtype): """ Returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" data = [] for n in range(N): x = a + get_norm_cdf(N)[n]*(b-a) if dtype is int: data.append(int(x)) elif dtype is float: data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data def get_logarithmic_axis_sample(a, b, N, dtype): """ Returns a function value f(n) where f is logarithmic function e^x sampling the exponent range [log(a), log(b)] linear at N sampling points. The function values returned are in the range [a, b]. :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type :return: [list] axis range """ assert a < b, "condition a < b violated!" assert a > 0, "condition a > 0 violated!" assert isinstance(N, int), "condition N of type int violated!" # convert input range into exponent range lexp = np.log(a) rexp = np.log(b) exp_range = np.linspace(lexp, rexp, N) data = [] for n in range(exp_range.shape[0]): x = np.exp(exp_range[n]) if dtype is int: data.append(int(x)) elif dtype is float: data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data class GridsearchSolver(HyppopySolver): """ The GridsearchSolver class implements a gridsearch optimization. The gridsearch supports categorical, uniform, normal and loguniform sampling. To use the GridsearchSolver, besides a range, one must specifiy the number of samples in the domain, e.g. 'data': [0, 1, 100] """ def __init__(self, project=None): """ The constructor accepts a HyppopyProject. :param project: [HyppopyProject] project instance, default=None """ HyppopySolver.__init__(self, project) def define_interface(self): """ This function is called when HyppopySolver.__init__ function finished. Child classes need to define their individual parameter here by calling the _add_member function for each class member variable need to be defined. Using _add_hyperparameter_signature the structure of a hyperparameter the solver expects must be defined. Both, members and hyperparameter signatures are later get checked, before executing the solver, ensuring settings passed fullfill solver needs. """ self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "normal", "loguniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="frequency", dtype=int) self._add_hyperparameter_signature(name="type", dtype=type) - def loss_function_call(self, params): - """ - This function is called within the function loss_function and encapsulates the actual blackbox function call - in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver - lib might need some special treatment between the parameter set selection and the calling of the actual blackbox - function, e.g. parameter converting. - :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} - :return: [float] loss - """ - loss = self.blackbox(**params) - if loss is None: - return np.nan - return loss - - def loss_function_call(self, params): - """ - This function is called within the function loss_function and encapsulates the actual blackbox function call - in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver - lib might need some special treatment between the parameter set selection and the calling of the actual blackbox - function, e.g. parameter converting. - :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} - :return: [float] loss - """ - loss = self.blackbox(**params) - if loss is None: - return np.nan - return loss - - def loss_function_call_batch(self, candidates): - """ - This function is called within the function loss_function and encapsulates the actual blackbox function call - in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver - lib might need some special treatment between the parameter set selection and the calling of the actual blackbox - function, e.g. parameter converting. - :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} - :return: [float] loss - """ - loss = self.blackbox.call_batch(candidates) - if loss is None: - return np.nan - return loss - def get_candidates(self, searchspace): """ This function converts the searchspace to a candidate_list that can then be used to distribute via MPI. :param searchspace: converted hyperparameter space """ candidates_list = list() candidates = [x for x in product(*searchspace[1])] for c in candidates: params = {} for name, value in zip(searchspace[0], c): params[name] = value candidates_list.append(CandidateDescriptor(**params)) return candidates_list def execute_solver(self, searchspace): """ This function is called immediately after convert_searchspace and get the output of the latter as input. It's purpose is to call the solver libs main optimization function. :param searchspace: converted hyperparameter space """ candidates = self.get_candidates(searchspace) try: self.loss_function_batch(candidates) except Exception as e: - msg = "internal error in grdsearch execute_solver occured. {}".format(e) + msg = "internal error in gridsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): """ The function converts the standard parameter input into a range list depending on the domain. These rangelists are later used with itertools product to create a paramater space sample of each combination. :param hyperparameter: [dict] hyperparameter space :return: [list] name and range for each parameter space axis """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) searchspace = [[], []] for name, param in hyperparameter.items(): if param["domain"] != "categorical" and "frequency" not in param.keys(): param["frequency"] = DEFAULTGRIDFREQUENCY warnings.warn("No frequency field found, used default gridsearch frequency {}".format(DEFAULTGRIDFREQUENCY)) if param["domain"] == "categorical": searchspace[0].append(name) searchspace[1].append(param["data"]) elif param["domain"] == "uniform": searchspace[0].append(name) searchspace[1].append(get_uniform_axis_sample(param["data"][0], param["data"][1], param["frequency"], param["type"])) elif param["domain"] == "normal": searchspace[0].append(name) searchspace[1].append(get_gaussian_axis_sample(param["data"][0], param["data"][1], param["frequency"], param["type"])) elif param["domain"] == "loguniform": searchspace[0].append(name) searchspace[1].append(get_logarithmic_axis_sample(param["data"][0], param["data"][1], param["frequency"], param["type"])) return searchspace