diff --git a/doc/_static/class_diagram.png b/doc/_static/class_diagram.png new file mode 100644 index 0000000..67a55cd Binary files /dev/null and b/doc/_static/class_diagram.png differ diff --git a/doc/conf.py b/doc/conf.py index 60de06a..f181e57 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,207 +1,208 @@ # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys from shutil import copyfile -ROOT = os.path.abspath('..') -print("ROOT", ROOT) +ROOT = os.path.abspath('../') +sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, ROOT) README_PATH_SRC = os.path.join(ROOT, "README.md") README_PATH_DST = os.path.join(ROOT, *("doc", "README.md")) print("copy", README_PATH_SRC, "to", README_PATH_DST) try: copyfile(README_PATH_SRC, README_PATH_DST) except: print("Missing README.md file in subdir!") LICENSE_PATH_SRC = os.path.join(ROOT, "LICENSE") LICENSE_PATH_DST = os.path.join(ROOT, *("doc", "LICENSE.rst")) print("copy", LICENSE_PATH_SRC, "to", LICENSE_PATH_DST) try: copyfile(LICENSE_PATH_SRC, LICENSE_PATH_DST) except: print("Missing LICENSE file in subdir!") # -- Project information ----------------------------------------------------- project = 'Hyppopy' copyright = '2019, DKFZ' author = 'S. Wanner' # The short X.Y version version = '0.5' # The full version, including alpha/beta/rc tags release = '0.5.0' # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'recommonmark', - 'autoapi.extension' + 'autoapi.extension', + 'sphinx.ext.napoleon', ] autoapi_type = 'python' autoapi_dirs = [ROOT, ''] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'Hyppopydoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'Hyppopy.tex', 'Hyppopy Documentation', 'S. Wanner', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'hyppopy', 'Hyppopy Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'Hyppopy', 'Hyppopy Documentation', author, 'Hyppopy', 'One line description of project.', 'Miscellaneous'), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] # -- Extension configuration ------------------------------------------------- # -- Options for todo extension ---------------------------------------------- # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True diff --git a/hyppopy/ProjectManager.py b/hyppopy/ProjectManager.py deleted file mode 100644 index f37b260..0000000 --- a/hyppopy/ProjectManager.py +++ /dev/null @@ -1,68 +0,0 @@ -# Hyppopy - A Hyper-Parameter Optimization Toolbox -# -# Copyright (c) German Cancer Research Center, -# Division of Medical Image Computing. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE - -from .Singleton import * - -import os -import logging -from hyppopy.HyppopyProject import HyppopyProject -from hyppopy.globals import DEBUGLEVEL - -__all__ = ['ProjectManager'] - -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - - -@singleton_object -class ProjectManager(metaclass=Singleton): - - def __init__(self): - self._current_project = None - self._projects = {} - - def clear_all(self): - pass - - def new_project(self, name="HyppopyProject", config=None): - if name in self._projects.keys(): - name = self.check_projectname(name) - self._projects[name] = HyppopyProject(config) - self._current_project = self._projects[name] - return self._current_project - - def check_projectname(self, name): - split = name.split(".") - if len(split) == 0: - return split[0] + "." + str(0).zfill(3) - else: - try: - number = int(split[-1]) - del split[-1] - except: - number = 0 - return '.'.join(split) + "." + str(number).zfill(3) - - def get_current(self): - if self._current_project is None: - self.new_project() - return self._current_project - - def get_project(self, name): - if name in self._projects.keys(): - self._current_project = self._projects[name] - return self.get_current() - return self.new_project(name) - - def get_projectnames(self): - return self._projects.keys() - diff --git a/hyppopy/SolverPool.py b/hyppopy/SolverPool.py index d0d62de..71868bc 100644 --- a/hyppopy/SolverPool.py +++ b/hyppopy/SolverPool.py @@ -1,97 +1,96 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE __all__ = ['SolverPool'] from .Singleton import * import os import logging from hyppopy.HyppopyProject import HyppopyProject from hyppopy.solvers.OptunaSolver import OptunaSolver from hyppopy.solvers.HyperoptSolver import HyperoptSolver from hyppopy.solvers.OptunitySolver import OptunitySolver from hyppopy.solvers.GridsearchSolver import GridsearchSolver from hyppopy.solvers.RandomsearchSolver import RandomsearchSolver from hyppopy.solvers.QuasiRandomsearchSolver import QuasiRandomsearchSolver from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) @singleton_object class SolverPool(metaclass=Singleton): """ The SolverPool is a helper singleton class to get the desired solver either by name and a HyppopyProject instance or by a HyppopyProject instance only, if it defines a setting field called solver. """ def __init__(self): self._solver_list = ["hyperopt", "optunity", "optuna", "randomsearch", "quasirandomsearch", "gridsearch"] def get_solver_names(self): """ Returns a list of available solvers :return: [list] solver list """ return self._solver_list def get(self, solver_name=None, project=None): """ Get the configured solver instance :param solver_name: [str] solver name, if None, the project must have an attribute solver keeping the solver name, default=None :param project: [HyppopyProject] HyppopyProject instance :return: [HyppopySolver] the configured solver instance """ if solver_name is not None: assert isinstance(solver_name, str), "precondition violation, solver_name type str expected, got {} instead!".format(type(solver_name)) if project is not None: assert isinstance(project, HyppopyProject), "precondition violation, project type HyppopyProject expected, got {} instead!".format(type(project)) if "solver" in project.__dict__: solver_name = project.solver if solver_name not in self._solver_list: raise AssertionError("Solver named [{}] not implemented!".format(solver_name)) if solver_name == "hyperopt": if project is not None: return HyperoptSolver(project) return HyperoptSolver() elif solver_name == "optunity": if project is not None: return OptunitySolver(project) return OptunitySolver() elif solver_name == "optuna": if project is not None: return OptunaSolver(project) return OptunaSolver() elif solver_name == "gridsearch": if project is not None: return GridsearchSolver(project) return GridsearchSolver() elif solver_name == "randomsearch": if project is not None: return RandomsearchSolver(project) return RandomsearchSolver() elif solver_name == "quasirandomsearch": if project is not None: return QuasiRandomsearchSolver(project) return QuasiRandomsearchSolver() - diff --git a/hyppopy/solvers/GridsearchSolver.py b/hyppopy/solvers/GridsearchSolver.py index 18560a7..1b43ea2 100644 --- a/hyppopy/solvers/GridsearchSolver.py +++ b/hyppopy/solvers/GridsearchSolver.py @@ -1,196 +1,206 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import logging import warnings import numpy as np from pprint import pformat from scipy.stats import norm from itertools import product from hyppopy.globals import DEBUGLEVEL, DEFAULTGRIDFREQUENCY from hyppopy.solvers.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def get_uniform_axis_sample(a, b, N, dtype): """ - returns a uniform sample x(n) in the range [a,b] sampled at N pojnts + Returns a uniform sample x(n) in the range [a,b] sampled at N pojnts + :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type + :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" if dtype is int: return list(np.linspace(a, b, N).astype(int)) elif dtype is float: return list(np.linspace(a, b, N)) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) def get_norm_cdf(N): """ - returns a normed gaussian cdf (range [0,1]) with N sampling points + Returns a normed gaussian cdf (range [0,1]) with N sampling points + :param N: sampling points + :return: [ndarray] gaussian cdf function values """ assert isinstance(N, int), "condition N of type int violated!" even = True if N % 2 != 0: N -= 1 even = False N = int(N/2) sigma = 1/3 x = np.linspace(0, 1, N) y1 = norm.cdf(x, loc=0, scale=sigma)-0.5 if not even: y1 = np.append(y1, [0.5]) y2 = 1-(norm.cdf(x, loc=0, scale=sigma)-0.5) y2 = np.flip(y2, axis=0) y = np.concatenate((y1, y2), axis=0) return y def get_gaussian_axis_sample(a, b, N, dtype): """ - returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points + Returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points + :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type + :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" data = [] for n in range(N): x = a + get_norm_cdf(N)[n]*(b-a) if dtype is int: data.append(int(x)) elif dtype is float: data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data def get_logarithmic_axis_sample(a, b, N, dtype): """ - returns a function value f(n) where f is logarithmic function e^x sampling + Returns a function value f(n) where f is logarithmic function e^x sampling the exponent range [log(a), log(b)] linear at N sampling points. The function values returned are in the range [a, b]. + :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type + :return: [list] axis range """ assert a < b, "condition a < b violated!" assert a > 0, "condition a > 0 violated!" assert isinstance(N, int), "condition N of type int violated!" # convert input range into exponent range lexp = np.log(a) rexp = np.log(b) exp_range = np.linspace(lexp, rexp, N) data = [] for n in range(exp_range.shape[0]): x = np.exp(exp_range[n]) if dtype is int: data.append(int(x)) elif dtype is float: data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data class GridsearchSolver(HyppopySolver): """ The GridsearchSolver class implements a gridsearch optimization. The gridsearch supports categorical, uniform, normal and loguniform sampling. To use the GridsearchSolver, besides a range, one must specifiy the number of samples in the domain, e.g. 'data': [0, 1, 100] """ def __init__(self, project=None): HyppopySolver.__init__(self, project) def define_interface(self): self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "normal", "loguniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="frequency", dtype=int) self._add_hyperparameter_signature(name="type", dtype=type) def loss_function_call(self, params): loss = self.blackbox(**params) if loss is None: return np.nan return loss def execute_solver(self, searchspace): for x in product(*searchspace[1]): params = {} for name, value in zip(searchspace[0], x): params[name] = value try: self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): """ - the function converts the standard parameter input into a range list depending + The function converts the standard parameter input into a range list depending on the domain. These rangelists are later used with itertools product to create a paramater space sample of each combination. + :param hyperparameter: [dict] hyperparameter space + :return: [list] name and range for each parameter space axis """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) searchspace = [[], []] for name, param in hyperparameter.items(): if param["domain"] != "categorical" and "frequency" not in param.keys(): param["frequency"] = DEFAULTGRIDFREQUENCY warnings.warn("No frequency field found, used default gridsearch frequency {}".format(DEFAULTGRIDFREQUENCY)) if param["domain"] == "categorical": searchspace[0].append(name) searchspace[1].append(param["data"]) elif param["domain"] == "uniform": searchspace[0].append(name) searchspace[1].append(get_uniform_axis_sample(param["data"][0], param["data"][1], param["frequency"], param["type"])) elif param["domain"] == "normal": searchspace[0].append(name) searchspace[1].append(get_gaussian_axis_sample(param["data"][0], param["data"][1], param["frequency"], param["type"])) elif param["domain"] == "loguniform": searchspace[0].append(name) searchspace[1].append(get_logarithmic_axis_sample(param["data"][0], param["data"][1], param["frequency"], param["type"])) return searchspace diff --git a/hyppopy/solvers/QuasiRandomsearchSolver.py b/hyppopy/solvers/QuasiRandomsearchSolver.py index 4d9b857..b9159ec 100644 --- a/hyppopy/solvers/QuasiRandomsearchSolver.py +++ b/hyppopy/solvers/QuasiRandomsearchSolver.py @@ -1,181 +1,201 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['HaltonSequenceGenerator', 'QuasiRandomSampleGenerator', 'QuasiRandomsearchSolver'] + import os import logging import warnings import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL from hyppopy.solvers.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) -def get_loguniform_ranges(a, b, N): - aL = np.log(a) - bL = np.log(b) - exp_range = np.linspace(aL, bL, N+1) - ranges = [] - for i in range(N): - ranges.append([np.exp(exp_range[i]), np.exp(exp_range[i+1])]) - return ranges - - class HaltonSequenceGenerator(object): + """ + This class generates Halton sequences (https://en.wikipedia.org/wiki/Halton_sequence). The class needs a total + number of samples and the number of dimensions to generate a quasirandom sequence for each axis. The method + get_unit_space returns a sequence list with N_samples for each axis representing N_samples vectors on a unit sphere. + """ + def __init__(self): + pass - def __init__(self, N_samples, dimensions): - self._N = N_samples - self._dims = dimensions - - def next_prime(self): + def __next_prime(self): + """ + Checks if num is a prime value + """ def is_prime(num): - "Checks if num is a prime value" for i in range(2, int(num ** 0.5) + 1): if (num % i) == 0: return False return True prime = 3 while 1: if is_prime(prime): yield prime prime += 2 - def vdc(self, n, base): + def __vdc(self, n, base): vdc, denom = 0, 1 while n: denom *= base n, remainder = divmod(n, base) vdc += remainder / float(denom) return vdc - def get_sequence(self): + def get_unit_space(self, N_samples, N_dims): + """ + Returns a unit space in form of a sequence list keeping N_dims sequences with N_sample samplings. Each sample + represents a N_dims dimensional vector on a unit sphere. + + :param N_samples: [int] Number of samples + :param N_dims: [int] Number of dimensions + + :return: [list] samples list of length N_dims keeping lists each of length N_samples + """ seq = [] - primeGen = self.next_prime() + primeGen = self.__next_prime() next(primeGen) - for d in range(self._dims): + for d in range(N_dims): base = next(primeGen) - seq.append([self.vdc(i, base) for i in range(self._N)]) + seq.append([self.__vdc(i, base) for i in range(N_samples)]) return seq class QuasiRandomSampleGenerator(object): - + """ + This class takes care of the hyperparameter space creation and next sample delivery. + """ def __init__(self, N_samples=None): self._axis = None self._samples = [] self._numerical = [] self._categorical = [] self._N_samples = N_samples def set_axis(self, name, data, domain, dtype): + """ + Add an axis description. + + :param name: [str] axis name + :param data: [list] axis range [min, max] + :param domain: [str] axis domain + :param dtype: [type] axis data type + """ if domain == "categorical": if dtype is int: data = [int(i) for i in data] elif dtype is str: data = [str(i) for i in data] elif dtype is float: data = [float(i) for i in data] self._categorical.append({"name": name, "data": data, "type": dtype}) else: self._numerical.append({"name": name, "data": data, "type": dtype, "domain": domain}) def generate_samples(self, N_samples=None): + """ + This function is called once when the first sample is requested. It generates the halton sequence space. + + :param N_samples: [int] number of samples + """ self._axis = [] if N_samples is None: assert isinstance(self._N_samples, int), "Precondition violation, no number of samples specified!" else: self._N_samples = N_samples axis_samples = {} if len(self._numerical) > 0: - generator = HaltonSequenceGenerator(self._N_samples, len(self._numerical)) - unit_space = generator.get_sequence() + generator = HaltonSequenceGenerator() + unit_space = generator.get_unit_space(self._N_samples, len(self._numerical)) for n, axis in enumerate(self._numerical): width = abs(axis["data"][1] - axis["data"][0]) unit_space[n] = [x * width for x in unit_space[n]] unit_space[n] = [x + axis["data"][0] for x in unit_space[n]] if axis["type"] is int: unit_space[n] = [int(round(x)) for x in unit_space[n]] axis_samples[axis["name"]] = unit_space[n] else: warnings.warn("No numerical axis defined, this warning can be ignored if searchspace is categorical only, otherwise check if axis was set!") for n in range(self._N_samples): sample = {} for name, data in axis_samples.items(): sample[name] = data[n] for cat in self._categorical: choice = np.random.choice(len(cat["data"]), 1)[0] sample[cat["name"]] = cat["data"][choice] self._samples.append(sample) def next(self): + """ + Returns the next sample. Returns None if all samples are requested. + + :return: [dict] sample dict {'name':value, ...} + """ if len(self._samples) == 0: self.generate_samples() if len(self._samples) == 0: return None next_index = np.random.choice(len(self._samples), 1)[0] sample = self._samples.pop(next_index) return sample class QuasiRandomsearchSolver(HyppopySolver): """ The QuasiRandomsearchSolver class implements a quasi randomsearch optimization. The quasi randomsearch supports - categorical, uniform, normal and loguniform sampling. The solver defines a grid which size and appearance depends - on the max_iterations parameter and the domain. The at each grid box a random value is drawn. This ensures both, - random parameter samples with the cosntraint that the space is evenly sampled and cluster building prevention.""" + categorical and uniform sampling. The solver defines a Halton Sequence distributed hyperparameter space. This + means a rather evenly distributed space sampling but no real randomness. + """ def __init__(self, project=None): HyppopySolver.__init__(self, project) self._sampler = None def define_interface(self): self._add_member("max_iterations", int) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) def loss_function_call(self, params): loss = self.blackbox(**params) if loss is None: return np.nan return loss def execute_solver(self, searchspace): N = self.max_iterations self._sampler = QuasiRandomSampleGenerator(N) for name, axis in searchspace.items(): self._sampler.set_axis(name, axis["data"], axis["domain"], axis["type"]) try: for n in range(N): params = self._sampler.next() if params is None: break self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): - """ - this function simply pipes the input parameter through, the sample - drawing functions are responsible for interpreting the parameter. - :param hyperparameter: [dict] hyperparameter space - :return: [dict] hyperparameter space - """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) return hyperparameter diff --git a/hyppopy/solvers/RandomsearchSolver.py b/hyppopy/solvers/RandomsearchSolver.py index afa9f9a..abbf85d 100644 --- a/hyppopy/solvers/RandomsearchSolver.py +++ b/hyppopy/solvers/RandomsearchSolver.py @@ -1,160 +1,172 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['RandomsearchSolver', + 'draw_uniform_sample', + 'draw_normal_sample', + 'draw_loguniform_sample', + 'draw_categorical_sample', + 'draw_sample'] + import os import copy import random import logging import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL from hyppopy.solvers.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def draw_uniform_sample(param): """ - function draws a random sample from a uniform range + Function draws a random sample from a uniform range + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ assert param['type'] is not str, "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" s = random.random() s *= np.abs(param['data'][1] - param['data'][0]) s += param['data'][0] if param['type'] is int: s = int(np.round(s)) if s < param['data'][0]: s = int(param['data'][0]) if s > param['data'][1]: s = int(param['data'][1]) return s def draw_normal_sample(param): """ - function draws a random sample from a normal distributed range + Function draws a random sample from a normal distributed range + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ assert param['type'] is not str, "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" mu = (param['data'][1] - param['data'][0]) / 2 sigma = mu / 3 s = np.random.normal(loc=param['data'][0] + mu, scale=sigma) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] s = float(s) if param["type"] is int: s = int(np.round(s)) return s def draw_loguniform_sample(param): """ - function draws a random sample from a logarithmic distributed range + Function draws a random sample from a logarithmic distributed range + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ assert param['type'] is not str, "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" p = copy.deepcopy(param) p['data'][0] = np.log(param['data'][0]) p['data'][1] = np.log(param['data'][1]) assert p['data'][0] is not np.nan, "Precondition violation, left bound input error, results in nan!" assert p['data'][1] is not np.nan, "Precondition violation, right bound input error, results in nan!" x = draw_uniform_sample(p) s = np.exp(x) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] return s def draw_categorical_sample(param): """ - function draws a random sample from a categorical list + Function draws a random sample from a categorical list + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ return random.sample(param['data'], 1)[0] def draw_sample(param): """ - function draws a sample from the input hyperparameter descriptor depending on it's domain + Function draws a sample from the input hyperparameter descriptor depending on it's domain + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ assert isinstance(param, dict), "input error, hyperparam descriptors of type {} not allowed!".format(type(param)) if param['domain'] == "uniform": return draw_uniform_sample(param) elif param['domain'] == "normal": return draw_normal_sample(param) elif param['domain'] == "loguniform": return draw_loguniform_sample(param) elif param['domain'] == "categorical": return draw_categorical_sample(param) else: raise LookupError("Unknown domain {}".format(param['domain'])) class RandomsearchSolver(HyppopySolver): """ The RandomsearchSolver class implements a randomsearch optimization. The randomsearch supports categorical, uniform, normal and loguniform sampling. The solver draws an independent sample - from the parameter space each iteration.""" + from the parameter space each iteration. + """ def __init__(self, project=None): HyppopySolver.__init__(self, project) def define_interface(self): self._add_member("max_iterations", int) self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "normal", "loguniform", "categorical"]) self._add_hyperparameter_signature(name="data", dtype=list) self._add_hyperparameter_signature(name="type", dtype=type) def loss_function_call(self, params): loss = self.blackbox(**params) if loss is None: return np.nan return loss def execute_solver(self, searchspace): N = self.max_iterations try: for n in range(N): params = {} for name, p in searchspace.items(): params[name] = draw_sample(p) self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): - """ - this function simply pipes the input parameter through, the sample - drawing functions are responsible for interpreting the parameter. - :param hyperparameter: [dict] hyperparameter space - :return: [dict] hyperparameter space - """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) return hyperparameter diff --git a/hyppopy/tests/test_randomsearchsolver.py b/hyppopy/tests/test_randomsearchsolver.py index 18ca58e..0be138c 100644 --- a/hyppopy/tests/test_randomsearchsolver.py +++ b/hyppopy/tests/test_randomsearchsolver.py @@ -1,164 +1,165 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import unittest +import numpy as np import matplotlib.pylab as plt from hyppopy.solvers.RandomsearchSolver import * from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.HyppopyProject import HyppopyProject class RandomsearchTestSuite(unittest.TestCase): def setUp(self): pass def test_draw_uniform_sample(self): param = {"data": [0, 1, 10], "type": float} values = [] for i in range(10000): values.append(draw_uniform_sample(param)) self.assertTrue(0 <= values[-1] <= 1) self.assertTrue(isinstance(values[-1], float)) hist = plt.hist(values, bins=10, normed=True) std = np.std(hist[0]) mean = np.mean(hist[0]) self.assertTrue(std < 0.05) self.assertTrue(0.9 < mean < 1.1) param = {"data": [0, 10, 11], "type": int} values = [] for i in range(10000): values.append(draw_uniform_sample(param)) self.assertTrue(0 <= values[-1] <= 10) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=11, normed=True) std = np.std(hist[0]) mean = np.mean(hist[0]) self.assertTrue(std < 0.05) self.assertTrue(0.09 < mean < 0.11) def test_draw_normal_sample(self): param = {"data": [0, 10, 11], "type": int} values = [] for i in range(10000): values.append(draw_normal_sample(param)) self.assertTrue(0 <= values[-1] <= 10) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=11, normed=True) for i in range(1, 5): self.assertTrue(hist[0][i-1]-hist[0][i] < 0) for i in range(5, 10): self.assertTrue(hist[0][i] - hist[0][i+1] > 0) def test_draw_loguniform_sample(self): param = {"data": [1, 1000, 11], "type": float} values = [] for i in range(10000): values.append(draw_loguniform_sample(param)) self.assertTrue(1 <= values[-1] <= 1000) self.assertTrue(isinstance(values[-1], float)) hist = plt.hist(values, bins=11, normed=True) for i in range(4): self.assertTrue(hist[0][i] > hist[0][i+1]) self.assertTrue((hist[0][i] - hist[0][i+1]) > 0) def test_draw_categorical_sample(self): param = {"data": [1, 2, 3], "type": int} values = [] for i in range(10000): values.append(draw_categorical_sample(param)) self.assertTrue(values[-1] == 1 or values[-1] == 2 or values[-1] == 3) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=3, normed=True) for i in range(3): self.assertTrue(0.45 < hist[0][i] < 0.55) def test_solver_uniform(self): config = { "hyperparameter": { "axis_00": { "domain": "uniform", "data": [0, 800], "type": float }, "axis_01": { "domain": "uniform", "data": [-1, 1], "type": float }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": float } }, "max_iterations": 300 } project = HyppopyProject(config) solver = RandomsearchSolver(project) vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(0 <= best['axis_00'] <= 800) self.assertTrue(-1 <= best['axis_01'] <= 1) self.assertTrue(0 <= best['axis_02'] <= 10) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) def test_solver_normal(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [500, 650], "type": float }, "axis_01": { "domain": "normal", "data": [0, 1], "type": float }, "axis_02": { "domain": "normal", "data": [4, 5], "type": float } }, "max_iterations": 500, } solver = RandomsearchSolver(config) vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(500 <= best['axis_00'] <= 650) self.assertTrue(0 <= best['axis_01'] <= 1) self.assertTrue(4 <= best['axis_02'] <= 5) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main()