diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..0780ab3 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,3 @@ +[run] +source=hyppopy +omit=hyppopy/tests/* \ No newline at end of file diff --git a/.gitignore b/.gitignore index 784180b..507df4e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,108 +1,111 @@ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # latex *.aux *.bbl *.blg *.log *.tcp # solver_comparison examples/solver_comparison/gfx/data_I examples/solver_comparison/gfx/data_II examples/solver_comparison/gfx/data_III *.vpp.lck .pytest_cache/ *.vpp.bak_* python_tests_xml +doc/CHANGELOG.md # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg .idea/ # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation -docs/_build/ -doc/ +doc/_build/ +doc/LICENSE.rst +doc/README.md + # PyBuilder target/ #Ipython Notebook .ipynb_checkpoints #Pycharm files *.iml # merging stuff *.orig *~ # Paths in repository mcml.py # images etc *.tif *.nrrd *.caffemodel # C++ stuff build* *.user hyppopy/tests/test_snipped_000.py hyppopy/tests/test_snipped_001.py hyppopy/tests/test_snipped_002.py hyppopy/tests/test_snipped_003.py hyppopy/tests/test_snipped_004.py hyppopy/tests/test_snipped_005.py hyppopy/tests/test_snipped_006.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d245baa..5093a4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,18 @@ +# Changelog + Release 0.5.0.0 - settings structure changed, additional settings now can be addded as additional entries in the config dict or using the methods add_setting or set_settings - sections solver and custom in config dict are removed completely - use_solver setting in config dict is renamed to solver - hyperparameter type now a native type, not a string anymore - automatic consistency check between config and solver conditions, each solver defines now it's interface which is checked when executing the solver throwing exceptions if the project instance and the solvers interface doesn't work together - bayesOpt solver removed, extremely slow and not very good Release 0.4.2.0 New feature QuasiRandomSolver added. The QuasiRandomSolver provides a randomized gridsampling. This means that depending on max_iterations a grid over all numerical parameter is spanned and each cell is populated with a random value within the the cell bounds for numerical and a random draw for each categorical parameter. This ensures a random sampling of the parameter space and a good space coverage without random cluster building. The solver also supports normal and loguniform sampling. \ No newline at end of file diff --git a/LICENSE b/LICENSE index 1422535..bc8ee80 100644 --- a/LICENSE +++ b/LICENSE @@ -1,38 +1,39 @@ -======================================================================= +License +======= + Copyright (c) 2019 German Cancer Research Center, Division of Medical Image Computing All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the German Cancer Research Center, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -======================================================================= diff --git a/README.md b/README.md index 1e4e59b..9dcfc33 100644 --- a/README.md +++ b/README.md @@ -1,380 +1,384 @@                 ![docs_title_logo](./resources/docs_title_logo.png) # A Hyper-Parameter Optimization Toolbox
+## Project Status +[![Documentation Status](https://readthedocs.org/projects/hyppopy/badge/?version=latest)](https://hyppopy.readthedocs.io/en/latest/?badge=latest) +[![codecov](https://codecov.io/gh/mic-dkfz/hyppopy/branch/master/graph/badge.svg)](https://codecov.io/gh/mic-dkfz/hyppopy) + ## What is Hyppopy? Hyppopy is a python toolbox for blackbox optimization. It's purpose is to offer a unified and easy to use interface to a collection of solver libraries. Currently provided solvers are: * [Hyperopt](http://hyperopt.github.io/hyperopt/) * [Optunity](https://optunity.readthedocs.io/en/latest/user/index.html) * [Optuna](https://optuna.org/) * Quasi-Randomsearch Solver * Randomsearch Solver * Gridsearch Solver [See a solver analysis here: https://github.com/MIC-DKFZ/Hyppopy/blob/master/examples/solver_comparison/HyppopyReport.pdf] ## Installation 1. clone the [Hyppopy](http:\\github.com) project from Github 2. (create a virtual environment), open a console (with your activated virtual env) and go to the hyppopy root folder 3. ```$ pip install -r requirements.txt``` 4. ```$ python setup.py install``` (for normal usage) or ```$ python setup.py develop``` (if you want to join the hyppopy development *hooray*) ## How to use Hyppopy? #### The Hyperparamaterspace Hyppopy defines a common hyperparameterspace description, whatever solver is used. A hyperparameter description includes the following fields: * domain: the domain defines how the solver samples the parameter space, options are: * uniform: samples the data range [a,b] evenly, whereas b>a * normal: samples the data range [a,b] using a normal distribution with mu=a+(b-a)/2, sigma=(b-a)/6, whereas b>a * loguniform: samples the data range [a,b] logarithmic using e^x by sampling the exponent range x=[log(a), log(b)] uniformly, whereas a>0 and b>a * categorical: is used to define a data list * data: in case of categorical domain data is a list, all other domains expect a range [a, b] * type: the parameter data type as string 'int', 'float' or 'str' An exeption must be kept in mind when using the GridsearchSolver. The gridsearch additionally needs a number of samples per domain, which must be set using the field: frequency. #### The HyppopyProject class The HyppopyProject class takes care all settings necessary for the solver and your workflow. To setup a HyppopyProject instance we can use a nested dictionary or the classes memberfunctions respectively. ```python # Import the HyppopyProject class from hyppopy.HyppopyProject import HyppopyProject # Create a nested dict with a section hyperparameter. We define a 2 dimensional # hyperparameter space with a numerical dimension named myNumber of type float and # a uniform sampling. The second dimension is a categorical parameter of type string. config = { "hyperparameter": { "myNumber": { "domain": "uniform", "data": [0, 100], "type": float }, "myOption": { "domain": "categorical", "data": ["a", "b", "c"], "type": str } }} # Create a HyppopyProject instance and pass the config dict to # the constructor. Alternatively one can use set_config method. project = HyppopyProject(config=config) # We can also add hyperparameter using the add_hyperparameter method project = HyppopyProject() project.add_hyperparameter(name="myNumber", domain="uniform", data=[0, 100], dtype=float) project.add_hyperparameter(name="myOption", domain="categorical", data=["a", "b", "c"], dtype=str) ``` Additional settings for the solver or custom parameters can be set either as additional entries in the config dict, or via the methods set_settings or add_setting: ```python from hyppopy.HyppopyProject import HyppopyProject config = { "hyperparameter": { "myNumber": { "domain": "uniform", "data": [0, 100], "type": float }, "myOption": { "domain": "categorical", "data": ["a", "b", "c"], "type": str } }, "max_iterations": 500, "anything_you_want": 42 } project = HyppopyProject(config=config) print("max_iterations:", project.max_iterations) print("anything_you_want:", project.anything_you_want) #alternatively project = HyppopyProject() project.set_settings(max_iterations=500, anything_you_want=42) print("anything_you_want:", project.anything_you_want) #alternatively project = HyppopyProject() project.add_setting(name="max_iterations", value=500) project.add_setting(name="anything_you_want", value=42) print("anything_you_want:", project.anything_you_want) ``` #### The HyppopySolver classes Each solver is a child of the HyppopySolver class. This is only interesting if you're planning to write a new solver, we will discuss this in the section Solver Development. All solvers we can use to optimize our blackbox function are part of the module 'hyppopy.solver'. Below is a list of all solvers available along with their access key in squared brackets. * HyperoptSolver [hyperopt] _Bayes Optimization use Tree-Parzen Estimator, supports uniform, normal, loguniform and categorical parameter_ * OptunitySolver [optunity] _Particle Swarm Optimizer, supports uniform and categorical parameter_ * OptunaSolver [optuna] _Bayes Optimization, supports uniform, and categorical parameter_ * RandomsearchSolver [randomsearch] _Naive randomized parameter search, supports uniform, normal, loguniform and categorical parameter_ * QuasiRandomsearchSolver [quasirandomsearch] _Randomized grid ensuring random sample drawing and a good space coverage, supports uniform, normal, loguniform and categorical parameter_ * GridsearchSolver [gridsearch] _Standard gridsearch, supports uniform, normal, loguniform and categorical parameter_ There are two options to get a solver, we can import directly from the hyppopy.solvers package or we use the SolverPool class. We look into both options by optimizing a simple function, starting with the direct import case. ```python # Import the HyppopyProject class from hyppopy.HyppopyProject import HyppopyProject # Import the HyperoptSolver class, in this case wh use Hyperopt from hyppopy.solvers.HyperoptSolver import HyperoptSolver # Our function to optimize def my_loss_func(x, y): return x**2+y**2 # Creating a HyppopyProject instance project = HyppopyProject() project.add_hyperparameter(name="x", domain="uniform", data=[-10, 10], type=float) project.add_hyperparameter(name="y", domain="uniform", data=[-10, 10], type=float) project.add_setting(name="max_iterations", value=300) # create a solver instance solver = HyperoptSolver(project) # pass the loss function to the solver solver.blackbox = my_loss_func # run the solver solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) ``` The SolverPool is a class keeping track of all solver classes. We have several options to ask the SolverPool for the desired solver. We can add a setting called solver to our config or to the project instance respectively, or we can use the solver access key (see solver listing above) to ask for the solver directly. ```python # import the SolverPool class from hyppopy.SolverPool import SolverPool # Import the HyppopyProject class from hyppopy.HyppopyProject import HyppopyProject # Our function to optimize def my_loss_func(x, y): return x**2+y**2 # Creating a HyppopyProject instance project = HyppopyProject() project.add_hyperparameter(name="x", domain="uniform", data=[-10, 10], type=float) project.add_hyperparameter(name="y", domain="uniform", data=[-10, 10], type=float) project.set_settings(max_iterations=300, solver="hyperopt") # create a solver instance. The SolverPool class is a singleton # and can be used without instanciating. It looks in the project # instance for the use_solver option and returns the correct solver. solver = SolverPool.get(project=project) # Another option without the usage of the solver field would be: # solver = SolverPool.get(solver_name='hyperopt', project=project) # pass the loss function to the solver solver.blackbox = my_loss_func # run the solver solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) ``` #### The BlackboxFunction class To extend the possibilities beyond using parameter only loss functions as in the examples above, we can use the BlackboxFunction class. This class is a wrapper class around the actual loss_function providing a more advanced access interface to data handling and a callback_function for accessing the solvers iteration loop. ```python # import the HyppopyProject class keeping track of inputs from hyppopy.HyppopyProject import HyppopyProject # import the SolverPool singleton class from hyppopy.SolverPool import SolverPool # import the Blackboxfunction class wrapping your problem for Hyppopy from hyppopy.BlackboxFunction import BlackboxFunction # Create the HyppopyProject class instance project = HyppopyProject() project.add_hyperparameter(name="C", domain="uniform", data=[0.0001, 20], type=float) project.add_hyperparameter(name="gamma", domain="uniform", data=[0.0001, 20], type=float) project.add_hyperparameter(name="kernel", domain="categorical", data=["linear", "sigmoid", "poly", "rbf"], type=str) project.add_setting(name="max_iterations", value=500) project.add_setting(name="solver", value="optunity") # The BlackboxFunction signature is as follows: # BlackboxFunction(blackbox_func=None, # dataloader_func=None, # preprocess_func=None, # callback_func=None, # data=None, # **kwargs) # # - blackbox_func: a function pointer to the users loss function # - dataloader_func: a function pointer for handling dataloading. The function is called once before # optimizing. What it returns is passed as first argument to your loss functions # data argument. # - preprocess_func: a function pointer for data preprocessing. The function is called once before # optimizing and gets via kwargs['data'] the raw data object set directly or returned # from dataloader_func. What this function returns is then what is passed as first # argument to your loss function. # - callback_func: a function pointer called after each iteration. The input kwargs is a dictionary # keeping the parameters used in this iteration, the 'iteration' index, the 'loss' # and the 'status'. The function in this example is used for realtime printing it's # input but can also be used for realtime visualization. # - data: if not done via dataloader_func one can set a raw_data object directly # - kwargs: dict that whose content is passed to all functions above. from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score def my_dataloader_function(**kwargs): print("Dataloading...") # kwargs['params'] allows accessing additional parameter passed, # see below my_preproc_param, my_dataloader_input. print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) iris_data = load_iris() return [iris_data.data, iris_data.target] def my_preprocess_function(**kwargs): print("Preprocessing...") # kwargs['data'] allows accessing the input data print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) # kwargs['params'] allows accessing additional parameter passed, # see below my_preproc_param, my_dataloader_input. print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n") # if the preprocessing function returns something, # the input data will be replaced with the data returned by this function. x = kwargs['data'][0] y = kwargs['data'][1] for i in range(x.shape[0]): x[i, :] += kwargs['params']['my_preproc_param'] return [x, y] def my_callback_function(**kwargs): print("\r{}".format(kwargs), end="") def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() # We now create the BlackboxFunction object and pass all function pointers defined above, # as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes. blackbox = BlackboxFunction(blackbox_func=my_loss_function, dataloader_func=my_dataloader_function, preprocess_func=my_preprocess_function, callback_func=my_callback_function, my_preproc_param=1, my_dataloader_input='could/be/a/path') # Get the solver solver = SolverPool.get(project=project) # Give the solver your blackbox solver.blackbox = blackbox # Run the solver solver.run() # Get your results df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) ``` #### The Parameter Space Domains Each hyperparameter needs a range and a domain specifier. The range, specified via 'data', is the left and right bound of an interval (exception is the domain 'categorical', here 'data' is the actual list of data elements) and the domain specifier the way this interval is sampled. Currently supported domains are: * uniform (samples the interval [a,b] evenly) * normal* (a gaussian sampling of the interval [a,b] such that mu=a+(b-a)/2 and sigma=(b-a)/6) * loguniform* (a logaritmic sampling of the iterval [a,b], such that the exponent e^x is sampled evenly x=[log(a),log(b)]) * categorical (in this case data is not interpreted as interval but as actual list of objects) *Not all domains are supported by all solvers, this might be fixed in the future, but until, the solver throws an error telling you that the domain is unknown. When using the GridsearchSolver we need to specifiy an interval and a number of samples using a frequency specifier. The max_iterations parameter is obsolet in this case, because each axis specifies an individual number of samples via frequency. This applies only to numerical space domains, categorical space domains need a frequency value of 1. ```python # import the SolverPool class from hyppopy.solvers.GridsearchSolver import GridsearchSolver # Import the HyppopyProject class from hyppopy.HyppopyProject import HyppopyProject # Our function to optimize def my_loss_func(x, y): return x**2+y**2 # Creating a HyppopyProject instance project = HyppopyProject() project.add_hyperparameter(name="x", domain="uniform", data=[-1.1, 1], frequency=10, type=float) project.add_hyperparameter(name="y", domain="uniform", data=[-1.1, 1], frequency=12, type=float) solver = GridsearchSolver(project=project) # pass the loss function to the solver solver.blackbox = my_loss_func # run the solver solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) ``` #### Using a Visdom Server to Visualize the Optimization Process We can simply create a realtime visualization using a visdom server. If installed, start your visdom server via console command: ``` >visdom ``` Go to your browser and open the site: http://localhost:8097 To enable the visualization call the function 'start_viewer' before running the solver: ``` #enable visualization solver.start_viewer() # Run the solver solver.run() ``` You can also change the port and the server name in start_viewer(port=8097, server="http://localhost") ## Acknowledgements: _This work is supported by the [Helmholtz Association Initiative and Networking](https://www.helmholtz.de/en/about_us/the_association/initiating_and_networking/) Fund under project number ZT-I-0003._
diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..298ea9e --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/hyppopy/solvers/__init__.py b/doc/__init__.py similarity index 100% copy from hyppopy/solvers/__init__.py copy to doc/__init__.py diff --git a/doc/_static/class_diagram.png b/doc/_static/class_diagram.png new file mode 100644 index 0000000..a9d762a Binary files /dev/null and b/doc/_static/class_diagram.png differ diff --git a/doc/api.rst b/doc/api.rst new file mode 100644 index 0000000..6d5d790 --- /dev/null +++ b/doc/api.rst @@ -0,0 +1,77 @@ +*********** +Hyppopy API +*********** + +Main Classes +############ + +HyppopyProject +************** +.. automodule:: hyppopy.HyppopyProject + :members: + +HyppopySolver +************* +.. automodule:: hyppopy.solvers.HyppopySolver + :members: + +BlackboxFunction +**************** +.. automodule:: hyppopy.BlackboxFunction + :members: + +SolverPool +********** +.. automodule:: hyppopy.SolverPool + :members: + +Solver Classes +############## + +HyperoptSolver +************** +.. automodule:: hyppopy.solvers.HyperoptSolver + :members: + +OptunitySolver +************** +.. automodule:: hyppopy.solvers.OptunitySolver + :members: + +OptunaSolver +************** +.. automodule:: hyppopy.solvers.OptunaSolver + :members: + +RandomsearchSolver +****************** +.. automodule:: hyppopy.solvers.RandomsearchSolver + :members: + +QuasiRandomsearchSolver +*********************** +.. automodule:: hyppopy.solvers.QuasiRandomsearchSolver + :members: + +RandomsearchSolver +****************** +.. automodule:: hyppopy.solvers.RandomsearchSolver + :members: + +Helpers +####### + +VisdomViewer +************ +.. automodule:: hyppopy.VisdomViewer + :members: + +FunctionSimulator +***************** +.. automodule:: hyppopy.FunctionSimulator + :members: + +Singleton +********* +.. automodule:: hyppopy.Singleton + :members: \ No newline at end of file diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 0000000..1eda279 --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,223 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# + +import os +import sys +from shutil import copyfile + +ROOT = os.path.abspath('../') +sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, ROOT) + +README_PATH_SRC = os.path.join(ROOT, "README.md") +README_PATH_DST = os.path.join(ROOT, *("doc", "README.md")) +print("copy", README_PATH_SRC, "to", README_PATH_DST) +try: + copyfile(README_PATH_SRC, README_PATH_DST) +except: + print("Missing README.md file in subdir!") + +LICENSE_PATH_SRC = os.path.join(ROOT, "LICENSE") +LICENSE_PATH_DST = os.path.join(ROOT, *("doc", "LICENSE.rst")) +print("copy", LICENSE_PATH_SRC, "to", LICENSE_PATH_DST) +try: + copyfile(LICENSE_PATH_SRC, LICENSE_PATH_DST) +except: + print("Missing LICENSE file in subdir!") + +CHANGELOG_PATH_SRC = os.path.join(ROOT, "CHANGELOG.md") +CHANGELOG_PATH_DST = os.path.join(ROOT, *("doc", "CHANGELOG.md")) +print("copy", CHANGELOG_PATH_SRC, "to", CHANGELOG_PATH_DST) +try: + copyfile(CHANGELOG_PATH_SRC, CHANGELOG_PATH_DST) +except: + print("Missing CHANGELOG.md file in subdir!") + +# -- Project information ----------------------------------------------------- + +project = 'Hyppopy' +copyright = '2019, DKFZ' +author = 'S. Wanner' + +# The short X.Y version +version = '0.5' +# The full version, including alpha/beta/rc tags +release = '0.5.0' + + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +#extensions = [ + # 'sphinx.ext.autodoc', +# 'recommonmark', +# 'autoapi.extension', +# 'sphinx.ext.napoleon' +#] + +extensions = ['recommonmark', + 'sphinx.ext.autodoc', + 'sphinx.ext.coverage', + 'sphinx.ext.napoleon', + #'autoapi.extension', + 'sphinx.ext.inheritance_diagram'] + +#autoapi_type = 'python' +#autoapi_dirs = [ROOT, ''] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'doc', 'tests'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = None + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Hyppopydoc' + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'Hyppopy.tex', 'Hyppopy Documentation', + 'S. Wanner', 'manual'), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'hyppopy', 'Hyppopy Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'Hyppopy', 'Hyppopy Documentation', + author, 'Hyppopy', 'One line description of project.', + 'Miscellaneous'), +] + + +# -- Options for Epub output ------------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = project + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +# +# epub_identifier = '' + +# A unique identification for the text. +# +# epub_uid = '' + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ['search.html'] + + +# -- Extension configuration ------------------------------------------------- + +# -- Options for todo extension ---------------------------------------------- + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True diff --git a/doc/developer_guide.rst b/doc/developer_guide.rst new file mode 100644 index 0000000..af19150 --- /dev/null +++ b/doc/developer_guide.rst @@ -0,0 +1,169 @@ +**************** +Developers Guide +**************** + +The main classes and their connections +************************************** + +The picture below depicts the releationships between the most important classes of hyppopy. + +.. image:: _static/class_diagram.png + +To understand the concept behind Hyppopy the following classes are important: + - :py:mod:`hyppopy.solvers.HyppopySolver` + - :py:mod:`hyppopy.HyppopyProject` + - :py:mod:`hyppopy.BlackboxFunction` + + +The :py:mod:`hyppopy.solvers.HyppopySolver` class is the parent class of all solvers in Hyppopy. It defines +an abstract interface that needs to be implemented by each custom solver class. The main idea is to +define a common interface for the different approaches the solver libraries are based on. When designing +Hyppopy there were three main challenges that drove the design. Each solver library has a different +approach to define or describe the hyperparameter space, has a different approach to track the solver +information and is different in setting the blackbox function and running the optimization process. To +deal with those differences the :py:mod:`hyppopy.solvers.HyppopySolver` class defines the abstract interface +functions `convert_searchspace`, `execute_solver`, `loss_function_call` and `define_interface`. Those serve as +abstraction layer to handle the individual needs of each solver library. + +Each solver needs a :py:mod:`hyppopy.HyppopyProject` instance keeping the user configuration input and a +:py:mod:`hyppopy.BlackboxFunction` instance, implementing the loss function. + +Implementing a custom solver +**************************** + +Adding a new solver is only about deriving a new class from :py:mod:`hyppopy.solvers.HyppopySolver` as well as +telling the :py:mod:`hyppopy.SolverPool` that it exists. We go through the whole process on the example of the +solver :py:mod:`hyppopy.solvers.OptunitySolver`: + +.. code-block:: python + + import os + import optunity + from pprint import pformat + + + from hyppopy.solvers.HyppopySolver import HyppopySolver + + + class OptunitySolver(HyppopySolver): + + def __init__(self, project=None): + HyppopySolver.__init__(self, project) + +First step is to derive from the HyppopySolver class. Good practice would be that the project can be set via __init__ +and if, is piped through to the HyppopySolver.__init__. Next step is implementing the abstract interface methods. +We start with define_interface. This functions purpose is to define the relevant input parameter and the signature +of the hyperparameter space. Our solver needs an parameter called max_iterations of type int. The hyperparameter +space has a domain that allows values 'uniform' and 'categorical', a field data of type list and a field type of type +type. This guarantees that exceptions are thrown if the user disrespects this signature or forgets to set max_iterations. + +.. code-block:: python + + def define_interface(self): + self._add_member("max_iterations", int) + self._add_hyperparameter_signature(name="domain", dtype=str, + options=["uniform", "categorical"]) + self._add_hyperparameter_signature(name="data", dtype=list) + self._add_hyperparameter_signature(name="type", dtype=type) + + +Next abstract method to implement is convert_searchspace. This method is responsible for interpreting the users hyperparameter +input and convert it to a form the solver framework needs. An input for example can be: + +.. code-block:: python + + hyperparameter = { + 'C': {'domain': 'uniform', 'data': [0.0001, 20], 'type': float}, + 'gamma': {'domain': 'uniform', 'data': [0.0001, 20.0], 'type': float}, + 'kernel': {'domain': 'categorical', 'data': ['linear', 'sigmoid', 'poly', 'rbf'], 'type': str}, + 'decision_function_shape': {'domain': 'categorical', 'data': ['ovo', 'ovr'], 'type': str'} + } + + +Optunity instead expects a hyperparameter space formulation as follows: + +.. code-block:: python + + optunity_space = {'decision_function_shape': + {'ovo': { + 'kernel': { + 'linear': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'sigmoid': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'poly': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'rbf': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}} + }, + 'ovr': { + 'kernel': { + 'linear': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'sigmoid': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'poly': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}, + 'rbf': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}} + } + }} + +This conversion is what convert_searchspace is meant for. + +.. code-block:: python + + def convert_searchspace(self, hyperparameter): + LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) + # split input in categorical and non-categorical data + cat, uni = self.split_categorical(hyperparameter) + # build up dictionary keeping all non-categorical data + uniforms = {} + for key, value in uni.items(): + for key2, value2 in value.items(): + if key2 == 'data': + if len(value2) == 3: + uniforms[key] = value2[0:2] + elif len(value2) == 2: + uniforms[key] = value2 + else: + raise AssertionError("precondition violation, optunity searchspace needs list with left and right range bounds!") + + if len(cat) == 0: + return uniforms + # build nested categorical structure + inner_level = uniforms + for key, value in cat.items(): + tmp = {} + optunity_space = {} + for key2, value2 in value.items(): + if key2 == 'data': + for elem in value2: + tmp[elem] = inner_level + optunity_space[key] = tmp + inner_level = optunity_space + return optunity_space + + +Now we have defined how the solver looks from outside and how to convert the parameterspace coming in, we can define how the blackbox function +is called. The abstract method loss_function_call is a wrapper function enabling to customize the call of the blackbox function. In case of Optunity +we only check if a parameter is of type int and convert it to ensure that no exception are thrown in case of integers are expected in the blackbox. + +.. code-block:: python + + def loss_function_call(self, params): + for key in params.keys(): + if self.project.get_typeof(key) is int: + params[key] = int(round(params[key])) + return self.blackbox(**params) + + +In execute_solver the actual wrapping of the solver framework call is done. Here call the Optunity optimizing function. A dictionary keeping the optimal +parameter set must assigned to self.best. + + +.. code-block:: python + + def execute_solver(self, searchspace): + LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) + try: + self.best, _, _ = optunity.minimize_structured(f=self.loss_function, + num_evals=self.max_iterations, + search_space=searchspace) + except Exception as e: + LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) + raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) + + diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000..92e6a66 --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,19 @@ +Welcome to Hyppopy's documentation! +=================================== + +.. toctree:: + :maxdepth: 3 + :caption: Contents: + + README + api + developer_guide + CHANGELOG + LICENSE + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` \ No newline at end of file diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..7893348 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/examples/solver_comparison.py b/examples/solver_comparison.py index f4743b0..8a09b41 100644 --- a/examples/solver_comparison.py +++ b/examples/solver_comparison.py @@ -1,364 +1,369 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import sys import time import pickle import numpy as np from math import pi import matplotlib.pyplot as plt from hyppopy.SolverPool import SolverPool from hyppopy.HyppopyProject import HyppopyProject -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.BlackboxFunction import BlackboxFunction -#OUTPUTDIR = "C:\\Users\\s635r\\Desktop\\solver_comparison" -OUTPUTDIR = "D:\\Projects\\Python\\hyppopy\\examples\\solver_comparison\\gfx" +OUTPUTDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), *("solver_comparison", "gfx"))) +# The solvers to be evaluated SOLVER = [] - SOLVER.append("quasirandomsearch") SOLVER.append("randomsearch") SOLVER.append("hyperopt") SOLVER.append("optunity") SOLVER.append("optuna") +# number of iterations to be tested ITERATIONS = [] ITERATIONS.append(15) ITERATIONS.append(50) ITERATIONS.append(300) ITERATIONS.append(1000) +# number of repetitions for each solver and iteration the results +# plottet are the mean and std dev of these independent trials STATREPEATS = 50 +# evaluations are stored using pickle, if OVERWRITE is True these +# are ignored and overwritten each time, set to False when only the +# plottings need to be re-evaluated OVERWRITE = False def compute_deviation(solver_name, vfunc_id, iterations, N, fname): project = HyppopyProject() project.add_hyperparameter(name="axis_00", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_01", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_02", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_03", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_04", domain="uniform", data=[0, 1], type=float) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default(vfunc_id) minima = vfunc.minima() def my_loss_function(data, params): return vfunc(**params) blackbox = BlackboxFunction(data=[], blackbox_func=my_loss_function) results = {} results["gt"] = [] for mini in minima: results["gt"].append(np.median(mini[0])) for iter in iterations: results[iter] = {"minima": {}, "distance": {}, "duration": None, "set_difference": None, "loss": None, "loss_history": {}} for i in range(vfunc.dims()): results[iter]["minima"]["axis_0{}".format(i)] = [] results[iter]["distance"]["axis_0{}".format(i)] = [] project.add_setting("max_iterations", iter) project.add_setting("solver", solver_name) solver = SolverPool.get(project=project) solver.blackbox = blackbox axis_minima = [] best_losses = [] best_sets_diff = [] for i in range(vfunc.dims()): axis_minima.append([]) loss_history = [] durations = [] for n in range(N): print("\rSolver={} iteration={} round={}".format(solver, iter, n), end="") start = time.time() solver.run(print_stats=False) end = time.time() durations.append(end-start) df, best = solver.get_results() loss_history.append(np.flip(np.sort(df['losses'].values))) best_row = df['losses'].idxmin() best_losses.append(df['losses'][best_row]) best_sets_diff.append(abs(df['axis_00'][best_row] - best['axis_00'])+ abs(df['axis_01'][best_row] - best['axis_01'])+ abs(df['axis_02'][best_row] - best['axis_02'])+ abs(df['axis_03'][best_row] - best['axis_03'])+ abs(df['axis_04'][best_row] - best['axis_04'])) for i in range(vfunc.dims()): tmp = df['axis_0{}'.format(i)][best_row] axis_minima[i].append(tmp) results[iter]["loss_history"] = loss_history for i in range(vfunc.dims()): results[iter]["minima"]["axis_0{}".format(i)] = [np.mean(axis_minima[i]), np.std(axis_minima[i])] dist = np.sqrt((axis_minima[i]-results["gt"][i])**2) results[iter]["distance"]["axis_0{}".format(i)] = [np.mean(dist), np.std(dist)] results[iter]["loss"] = [np.mean(best_losses), np.std(best_losses)] results[iter]["set_difference"] = sum(best_sets_diff) results[iter]["duration"] = np.mean(durations) file = open(fname, 'wb') pickle.dump(results, file) file.close() def make_radarplot(results, title, fname=None): gt = results.pop("gt") categories = list(results[list(results.keys())[0]]["minima"].keys()) N = len(categories) angles = [n / float(N) * 2 * pi for n in range(N)] angles += angles[:1] ax = plt.subplot(1, 1, 1, polar=True, ) ax.set_theta_offset(pi / 2) ax.set_theta_direction(-1) plt.xticks(angles[:-1], categories, color='grey', size=8) ax.set_rlabel_position(0) plt.yticks([0.2, 0.4, 0.6, 0.8, 1.0], ["0.2", "0.4", "0.6", "0.8", "1.0"], color="grey", size=7) plt.ylim(0, 1) gt += gt[:1] ax.fill(angles, gt, color=(0.2, 0.8, 0.2), alpha=0.2) colors = [] cm = plt.get_cmap('Set1') if len(results) > 2: indices = list(range(0, len(results) + 1)) indices.pop(2) else: indices = list(range(0, len(results))) for i in range(len(results)): colors.append(cm(indices[i])) for iter, data in results.items(): values = [] for i in range(len(categories)): values.append(data["minima"]["axis_0{}".format(i)][0]) values += values[:1] color = colors.pop(0) ax.plot(angles, values, color=color, linewidth=2, linestyle='solid', label="iterations {}".format(iter)) plt.title(title, size=11, color=(0.1, 0.1, 0.1), y=1.1) plt.legend(bbox_to_anchor=(0.08, 1.12)) if fname is None: plt.show() else: plt.savefig(fname + ".png") #plt.savefig(fname + ".svg") plt.clf() def make_errrorbars_plot(results, fname=None): n_groups = len(results) for iter in ITERATIONS: means = [] stds = [] names = [] colors = [] axis = [] fig = plt.figure(figsize=(10, 8)) for solver_name, numbers in results.items(): names.append(solver_name) means.append([]) stds.append([]) for axis_name, data in numbers[iter]["distance"].items(): means[-1].append(data[0]) stds[-1].append(data[1]) if len(axis) < 5: axis.append(axis_name) for c in range(len(names)): colors.append(plt.cm.Set2(c/len(names))) index = np.arange(len(axis)) bar_width = 0.14 opacity = 0.8 error_config = {'ecolor': '0.3'} for k, name in enumerate(names): plt.bar(index + k*bar_width, means[k], bar_width, alpha=opacity, color=colors[k], yerr=stds[k], error_kw=error_config, label=name) plt.xlabel('Axis') plt.ylabel('Mean [+/- std]') plt.title('Deviation per Axis and Solver for {} Iterations'.format(iter)) plt.xticks(index + 2*bar_width, axis) plt.legend() if fname is None: plt.show() else: plt.savefig(fname + "_{}.png".format(iter)) #plt.savefig(fname + "_{}.svg".format(iter)) plt.clf() def plot_loss_histories(results, fname=None): colors = [] for c in range(len(SOLVER)): colors.append(plt.cm.Set2(c / len(SOLVER))) for iter in ITERATIONS: fig = plt.figure(figsize=(10, 8)) added_solver = [] for n, solver_name in enumerate(results.keys()): for history in results[solver_name][iter]["loss_history"]: if solver_name not in added_solver: plt.plot(history, color=colors[n], label=solver_name, alpha=0.5) added_solver.append(solver_name) else: plt.plot(history, color=colors[n], alpha=0.5) plt.legend() plt.ylabel('Loss') plt.xlabel('Iteration') if fname is None: plt.show() else: plt.savefig(fname + "_{}.png".format(iter)) plt.clf() def print_durations(results, fname=None): # colors = [] # for c in range(len(SOLVER)): # colors.append(plt.cm.Set2(c / len(SOLVER))) f = open(fname + ".txt", "w") lines = ["iterations\t"+"\t".join(SOLVER)+"\n"] for iter in ITERATIONS: txt = str(iter) + "\t" for solver_name in SOLVER: duration = results[solver_name][iter]["duration"] txt += str(duration) + "\t" txt += "\n" lines.append(txt) f.writelines(lines) f.close() durations = {} for iter in ITERATIONS: for solver_name in SOLVER: duration = results[solver_name][iter]["duration"] if not solver_name in durations: durations[solver_name] = duration/iter else: durations[solver_name] += duration/iter for name in durations.keys(): durations[name] /= len(ITERATIONS) fig, ax = plt.subplots(figsize=(14, 6)) # Example data y_pos = np.arange(len(durations.keys())) t = [] for solver in SOLVER: t.append(durations[solver]) print(SOLVER) print(t) ax.barh(y_pos, t, align='center', color='green') ax.set_yticks(y_pos) ax.set_yticklabels(SOLVER) ax.invert_yaxis() ax.set_xscale('log') ax.set_xlabel('Duration in [s]') ax.set_title('Mean Solver Computation Time per Iteration') if fname is None: plt.show() else: plt.savefig(fname + ".png") # plt.savefig(fname + "_{}.svg".format(iter)) plt.clf() id2dirmapping = {"5D": "data_I", "5D2": "data_II", "5D3": "data_III"} if __name__ == "__main__": vfunc_ID = "5D" if len(sys.argv) == 2: vfunc_ID = sys.argv[1] print("Start Evaluation on {}".format(vfunc_ID)) OUTPUTDIR = os.path.join(OUTPUTDIR, id2dirmapping[vfunc_ID]) if not os.path.isdir(OUTPUTDIR): os.makedirs(OUTPUTDIR) ################################################## ############### create datasets ################## fnames = [] for solver_name in SOLVER: fname = os.path.join(OUTPUTDIR, solver_name) fnames.append(fname) if OVERWRITE or not os.path.isfile(fname): compute_deviation(solver_name, vfunc_ID, ITERATIONS, N=STATREPEATS, fname=fname) ################################################## ################################################## ################################################## ############## create radarplots ################# all_results = {} for solver_name, fname in zip(SOLVER, fnames): file = open(fname, 'rb') results = pickle.load(file) file.close() make_radarplot(results, solver_name, fname + "_deviation") all_results[solver_name] = results fname = os.path.join(OUTPUTDIR, "errorbars") make_errrorbars_plot(all_results, fname) fname = os.path.join(OUTPUTDIR, "losshistory") plot_loss_histories(all_results, fname) fname = os.path.join(OUTPUTDIR, "durations") print_durations(all_results, fname) for solver_name, iterations in all_results.items(): for iter, numbers in iterations.items(): if numbers["set_difference"] != 0: print("solver {} has a different parameter set match in iteration {}".format(solver_name, iter)) ################################################## ################################################## plt.imsave(fname=os.path.join(OUTPUTDIR, "dummy.png"), arr=np.ones((800, 1000, 3), dtype=np.uint8)*255) diff --git a/examples/tutorial_custom_visualization.py b/examples/tutorial_custom_visualization.py index 1b624ab..c5da2de 100644 --- a/examples/tutorial_custom_visualization.py +++ b/examples/tutorial_custom_visualization.py @@ -1,105 +1,105 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import matplotlib.pylab as plt from hyppopy.SolverPool import SolverPool from hyppopy.HyppopyProject import HyppopyProject -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.BlackboxFunction import BlackboxFunction project = HyppopyProject() project.add_hyperparameter(name="axis_00", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_01", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_02", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_03", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_04", domain="uniform", data=[0, 1], type=float) project.add_setting("max_iterations", 500) project.add_setting("solver", "randomsearch") plt.ion() fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(12, 8), sharey=True) plot_data = {"iterations": [], "loss": [], "axis_00": [], "axis_01": [], "axis_02": [], "axis_03": [], "axis_04": []} def my_visualization_function(**kwargs): print("\r{}".format(kwargs), end="") plot_data["iterations"].append(kwargs['iterations']) plot_data["loss"].append(kwargs['loss']) plot_data["axis_00"].append(kwargs['axis_00']) plot_data["axis_01"].append(kwargs['axis_01']) plot_data["axis_02"].append(kwargs['axis_02']) plot_data["axis_03"].append(kwargs['axis_03']) plot_data["axis_04"].append(kwargs['axis_04']) axes[0, 0].clear() axes[0, 0].scatter(plot_data["axis_00"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[0, 0].set_ylabel("loss") axes[0, 0].set_xlabel("axis_00") axes[0, 1].clear() axes[0, 1].scatter(plot_data["axis_01"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[0, 1].set_xlabel("axis_01") axes[0, 2].clear() axes[0, 2].scatter(plot_data["axis_02"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[0, 2].set_xlabel("axis_02") axes[1, 0].clear() axes[1, 0].scatter(plot_data["axis_03"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[1, 0].set_ylabel("loss") axes[1, 0].set_xlabel("axis_03") axes[1, 1].clear() axes[1, 1].scatter(plot_data["axis_04"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[1, 1].set_xlabel("axis_04") axes[1, 2].clear() axes[1, 2].plot(plot_data["iterations"], plot_data["loss"], "--", c=(0.8, 0.8, 0.8, 0.5)) axes[1, 2].scatter(plot_data["iterations"], plot_data["loss"], marker='.', c=(0.2, 0.2, 0.2)) axes[1, 2].set_xlabel("iterations") plt.draw() plt.tight_layout() plt.pause(0.001) def my_loss_function(data, params): - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default("5D") return vfunc(**params) blackbox = BlackboxFunction(data=[], blackbox_func=my_loss_function, callback_func=my_visualization_function) solver = SolverPool.get(project=project) solver.blackbox = blackbox solver.run() df, best = solver.get_results() print("\n") print("*" * 100) print("Best Parameter Set:\n{}".format(best)) print("*" * 100) print("") save_plot = input("Save Plot? [y/n] ") if save_plot == "y": plt.savefig('plot_{}.png'.format(project.custom_use_solver)) diff --git a/examples/tutorial_multisolver.py b/examples/tutorial_multisolver.py index c1e0d96..9a7fe52 100644 --- a/examples/tutorial_multisolver.py +++ b/examples/tutorial_multisolver.py @@ -1,183 +1,183 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # In this tutorial we solve an optimization problem using the Hyperopt Solver (http://hyperopt.github.io/hyperopt/). # Hyperopt uses a Baysian - Tree Parzen Estimator - Optimization approach, which means that each iteration computes a # new function value of the blackbox, interpolates a guess for the whole energy function and predicts a point to # compute the next function value at. This next point is not necessarily a "better" value, it's only the value with # the highest uncertainty for the function interpolation. # # See a visual explanation e.g. here (http://philipperemy.github.io/visualization/) # import the HyppopyProject class keeping track of inputs from hyppopy.HyppopyProject import HyppopyProject # import the SolverPool singleton class from hyppopy.SolverPool import SolverPool # import the Blackboxfunction class wrapping your problem for Hyppopy from hyppopy.BlackboxFunction import BlackboxFunction # Next step is defining the problem space and all settings Hyppopy needs to optimize your problem. # The config is a simple nested dictionary with two obligatory main sections, hyperparameter and settings. # The hyperparameter section defines your searchspace. Each hyperparameter is again a dictionary with: # # - a domain ['categorical', 'uniform', 'normal', 'loguniform'] # - the domain data [left bound, right bound] and # - a type of your domain ['str', 'int', 'float'] # # The settings section has two subcategories, solver and custom. The first contains settings for the solver, # here 'max_iterations' - is the maximum number of iteration. # # The custom section allows defining custom parameter. An entry here is transformed to a member variable of the # HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script. # Here we use it as a solver switch to control the usage of our solver via the config. This means with the script # below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',... # It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is # ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so: # 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals. config = { "hyperparameter": { "C": { "domain": "uniform", "data": [0.0001, 20], "type": float }, "gamma": { "domain": "uniform", "data": [0.0001, 20.0], "type": float }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str }, "decision_function_shape": { "domain": "categorical", "data": ["ovo", "ovr"], "type": str } }, "max_iterations": 300, -"solver": "quasirandomsearch" +"solver": "hyperopt" } # When creating a HyppopyProject instance we # pass the config dictionary to the constructor. project = HyppopyProject(config=config) # demonstration of the custom parameter access print("-"*30) print("max_iterations:\t{}".format(project.max_iterations)) print("solver chosen -> {}".format(project.solver)) print("-"*30) # The BlackboxFunction signature is as follows: # BlackboxFunction(blackbox_func=None, # dataloader_func=None, # preprocess_func=None, # callback_func=None, # data=None, # **kwargs) # # - blackbox_func: a function pointer to the users loss function # - dataloader_func: a function pointer for handling dataloading. The function is called once before # optimizing. What it returns is passed as first argument to your loss functions # data argument. # - preprocess_func: a function pointer for data preprocessing. The function is called once before # optimizing and gets via kwargs['data'] the raw data object set directly or returned # from dataloader_func. What this function returns is then what is passed as first # argument to your loss function. # - callback_func: a function pointer called after each iteration. The input kwargs is a dictionary # keeping the parameters used in this iteration, the 'iteration' index, the 'loss' # and the 'status'. The function in this example is used for realtime printing it's # input but can also be used for realtime visualization. # - data: if not done via dataloader_func one can set a raw_data object directly # - kwargs: dict that whose content is passed to all functions above. from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score def my_dataloader_function(**kwargs): print("Dataloading...") # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) iris_data = load_iris() return [iris_data.data, iris_data.target] def my_preprocess_function(**kwargs): print("Preprocessing...") # kwargs['data'] allows accessing the input data print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n") # if the preprocessing function returns something, # the input data will be replaced with the data returned by this function. x = kwargs['data'][0] y = kwargs['data'][1] for i in range(x.shape[0]): x[i, :] += kwargs['params']['my_preproc_param'] return [x, y] def my_callback_function(**kwargs): print("\r{}".format(kwargs), end="") def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() # We now create the BlackboxFunction object and pass all function pointers defined above, # as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes. blackbox = BlackboxFunction(blackbox_func=my_loss_function, dataloader_func=my_dataloader_function, preprocess_func=my_preprocess_function, callback_func=my_callback_function, my_preproc_param=1, my_dataloader_input='could/be/a/path') # Last step, is we use our SolverPool which automatically returns the correct solver. # There are multiple ways to get the desired solver from the solver pool. # 1. solver = SolverPool.get('hyperopt') # solver.project = project # 2. solver = SolverPool.get('hyperopt', project) # 3. The SolverPool will look for the field 'use_solver' in the project instance, if # it is present it will be used to specify the solver so that in this case it is enough # to pass the project instance. solver = SolverPool.get(project=project) # Give the solver your blackbox and run it. After execution we can get the result # via get_result() which returns a pandas dataframe containing the complete history # The dict best contains the best parameter set. solver.blackbox = blackbox #solver.start_viewer() solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) diff --git a/hyppopy/BlackboxFunction.py b/hyppopy/BlackboxFunction.py index 1348d4f..32cce46 100644 --- a/hyppopy/BlackboxFunction.py +++ b/hyppopy/BlackboxFunction.py @@ -1,96 +1,135 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['BlackboxFunction'] + import os import logging import functools from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def default_kwargs(**defaultKwargs): + """ + Decorator defining default args in **kwargs arguments + """ def actual_decorator(fn): @functools.wraps(fn) def g(*args, **kwargs): defaultKwargs.update(kwargs) return fn(*args, **defaultKwargs) return g return actual_decorator class BlackboxFunction(object): + """ + This class is a BlackboxFunction wrapper class encapsulating the loss function. Additional function pointer can be + set to get access at different pipelining steps: + + - dataloader_func: data loading, the function must return a data object and is called first when the solver is executed. + The data object returned will be the input of the blackbox function. + - preprocess_func: data preprocessing is called after dataloader_func, the functions signature must be foo(data, params) + and must return a data object. The input is the data object set directly or via dataloader_func, + the params are passed from constructor params. + - callback_func: this function is called at each iteration step getting passed the trail info content, can be used for + custom visualization + - data: add a data object directly + """ @default_kwargs(blackbox_func=None, dataloader_func=None, preprocess_func=None, callback_func=None, data=None) def __init__(self, **kwargs): + """ + Constructor accepts function pointer or a data object which are all None by default. Additionally one can define + an arbitrary number of arg pairs. These are passed as input to each function pointer as arguments. + + :param dataloader_func: data loading function pointer, default=None + :param preprocess_func: data preprocessing function pointer, default=None + :param callback_func: callback function pointer, default=None + :param data: data object, default=None + :param kwargs: additional arg=value pairs + """ self._blackbox_func = None self._preprocess_func = None self._dataloader_func = None self._callback_func = None self._raw_data = None self._data = None self.setup(kwargs) def __call__(self, **kwargs): + """ + Call method calls blackbox_func passing the data object and the args passed + + :param kwargs: [dict] args + + :return: blackbox_func(data, kwargs) + """ return self.blackbox_func(self.data, kwargs) def setup(self, kwargs): + """ + Alternative to Constructor, kwargs signature see __init__ + + :param kwargs: (see __init__) + """ self._blackbox_func = kwargs['blackbox_func'] self._preprocess_func = kwargs['preprocess_func'] self._dataloader_func = kwargs['dataloader_func'] self._callback_func = kwargs['callback_func'] self._raw_data = kwargs['data'] self._data = self._raw_data del kwargs['blackbox_func'] del kwargs['preprocess_func'] del kwargs['dataloader_func'] del kwargs['data'] params = kwargs if self.dataloader_func is not None: self._raw_data = self.dataloader_func(params=params) assert self._raw_data is not None, "Missing data exception!" assert self.blackbox_func is not None, "Missing blackbox fucntion exception!" if self.preprocess_func is not None: result = self.preprocess_func(data=self._raw_data, params=params) if result is not None: self._data = result else: self._data = self._raw_data else: self._data = self._raw_data @property def blackbox_func(self): return self._blackbox_func @property def preprocess_func(self): return self._preprocess_func @property def dataloader_func(self): return self._dataloader_func @property def callback_func(self): return self._callback_func @property def raw_data(self): return self._raw_data @property def data(self): return self._data diff --git a/hyppopy/VirtualFunction.py b/hyppopy/FunctionSimulator.py similarity index 85% rename from hyppopy/VirtualFunction.py rename to hyppopy/FunctionSimulator.py index b7af171..38174ff 100644 --- a/hyppopy/VirtualFunction.py +++ b/hyppopy/FunctionSimulator.py @@ -1,223 +1,240 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE ######################################################################################################################## # USAGE # -# The class VirtualFunction is meant to be a virtual energy function with an arbitrary dimensionality. The user can +# The class FunctionSimulator is meant to be a virtual energy function with an arbitrary dimensionality. The user can # simply scribble functions as a binary image using e.g. Gimp, defining their ranges using .cfg file and loading them -# into the VirtualFunction. An instance of the class can then be used like a normal function returning the sampling of +# into the FunctionSimulator. An instance of the class can then be used like a normal function returning the sampling of # each dimension loaded. # # 1. create binary images (IMPORTANT same shape for each), background black the function signature white, ensure that # each column has a white pixel. If more than one pixel appears in a column, only the lowest will be used. # # 2. create a .cfg file, see an example in hyppopy/virtualparameterspace # -# 3. vfunc = VirtualFunction() +# 3. vfunc = FunctionSimulator() # vfunc.load_images(path/of/your/binaryfiles/and/the/configfile) # # 4. use vfunc like a normal function, if you loaded 4 dimension binary images use it like f = vfunc(a,b,c,d) ######################################################################################################################## +__all__ = ['FunctionSimulator'] + import os import sys import numpy as np import configparser from glob import glob import matplotlib.pyplot as plt import matplotlib.image as mpimg -from hyppopy.globals import VFUNCDATAPATH +from hyppopy.globals import FUNCTIONSIMULATOR_DATAPATH + +class FunctionSimulator(object): + """ + The FunctionSimulator class serves as simulation tool for solver testing and evaluation purposes. It's designed to + simulate an energy functional by setting axis data for each dimension via binary image files. The binary image files + are sampled and a range interval is read from a config file. The class implements __call__ to act like a blackbox function + when initialized. -class VirtualFunction(object): + f=f(x1,x2,...,xn) [for n binary images and n range config files + as image input .png grayscale images are expected + as range config .cfg ascii files are expected containing + """ def __init__(self): self.config = None self.data = None self.axis = [] def __call__(self, *args, **kwargs): + """ + the call function expects the hyperparameter + :param args: + :param kwargs: + :return: + """ if len(kwargs) == self.dims(): args = [0]*len(kwargs) for key, value in kwargs.items(): index = int(key.split("_")[1]) args[index] = value assert len(args) == self.dims(), "wrong number of arguments!" for i in range(len(args)): assert self.axis[i][0] <= args[i] <= self.axis[i][1], "out of range access on axis {}!".format(i) lpos, rpos, fracs = self.pos_to_indices(args) fl = self.data[(list(range(self.dims())), lpos)] fr = self.data[(list(range(self.dims())), rpos)] return np.sum(fl*np.array(fracs) + fr*(1-np.array(fracs))) def clear(self): self.axis.clear() self.data = None self.config = None def dims(self): return self.data.shape[0] def size(self): return self.data.shape[1] def range(self, dim): return np.abs(self.axis[dim][1] - self.axis[dim][0]) def minima(self): glob_mins = [] for dim in range(self.dims()): x = [] fmin = np.min(self.data[dim, :]) for _x in range(self.size()): if self.data[dim, _x] <= fmin: x.append(_x/self.size()*(self.axis[dim][1]-self.axis[dim][0])+self.axis[dim][0]) glob_mins.append([x, fmin]) return glob_mins def pos_to_indices(self, positions): lpos = [] rpos = [] pfracs = [] for n in range(self.dims()): pos = positions[n] pos -= self.axis[n][0] pos /= np.abs(self.axis[n][1]-self.axis[n][0]) pos *= self.data.shape[1]-1 lp = int(np.floor(pos)) if lp < 0: lp = 0 rp = int(np.ceil(pos)) if rp > self.data.shape[1]-1: rp = self.data.shape[1]-1 pfracs.append(1.0-(pos-np.floor(pos))) lpos.append(lp) rpos.append(rp) return lpos, rpos, pfracs def plot(self, dim=None, title=""): if dim is None: dim = list(range(self.dims())) else: dim = [dim] fig = plt.figure(figsize=(10, 8)) for i in range(len(dim)): width = np.abs(self.axis[dim[i]][1]-self.axis[dim[i]][0]) ax = np.arange(self.axis[dim[i]][0], self.axis[dim[i]][1], width/self.size()) plt.plot(ax, self.data[dim[i], :], '.', label='axis_{}'.format(str(dim[i]).zfill(2))) plt.legend() plt.grid() plt.title(title) plt.show() def add_dimension(self, data, x_range): if self.data is None: self.data = data if len(self.data.shape) == 1: self.data = self.data.reshape((1, self.data.shape[0])) else: if len(data.shape) == 1: data = data.reshape((1, data.shape[0])) assert self.data.shape[1] == data.shape[1], "shape mismatch while adding dimension!" dims = self.data.shape[0] size = self.data.shape[1] tmp = np.append(self.data, data) self.data = tmp.reshape((dims+1, size)) self.axis.append(x_range) def load_default(self, name="3D"): - path = os.path.join(VFUNCDATAPATH, "{}".format(name)) + path = os.path.join(FUNCTIONSIMULATOR_DATAPATH, "{}".format(name)) if os.path.exists(path): self.load_images(path) else: - raise FileExistsError("No virtualfunction of dimension {} available".format(name)) + raise FileExistsError("No FunctionSimulator of dimension {} available".format(name)) def load_images(self, path): self.config = None self.data = None self.axis.clear() img_fnames = [] for f in glob(path + os.sep + "*"): if f.endswith(".png"): img_fnames.append(f) elif f.endswith(".cfg"): self.config = self.read_config(f) else: print("WARNING: files of type {} not supported, the file {} is ignored!".format(f.split(".")[-1], os.path.basename(f))) if self.config is None: print("Aborted, failed to read configfile!") sys.exit() sections = self.config.sections() if len(sections) != len(img_fnames): print("Aborted, inconsistent number of image tmplates and axis specifications!") sys.exit() img_fnames.sort() size_x = None size_y = None for n, fname in enumerate(img_fnames): img = mpimg.imread(fname) if len(img.shape) > 2: img = img[:, :, 0] if size_x is None: size_x = img.shape[1] if size_y is None: size_y = img.shape[0] self.data = np.zeros((len(img_fnames), size_x), dtype=np.float32) assert img.shape[0] == size_y, "Shape mismatch in dimension y {} is not {}".format(img.shape[0], size_y) assert img.shape[1] == size_x, "Shape mismatch in dimension x {} is not {}".format(img.shape[1], size_x) self.sample_image(img, n) def sample_image(self, img, dim): sec_name = "axis_{}".format(str(dim).zfill(2)) assert sec_name in self.config.sections(), "config section {} not found!".format(sec_name) settings = self.get_axis_settings(sec_name) self.axis.append([float(settings['min_x']), float(settings['max_x'])]) y_range = [float(settings['min_y']), float(settings['max_y'])] for x in range(img.shape[1]): candidates = np.where(img[:, x] > 0) assert len(candidates[0]) > 0, "non function value in image detected, ensure each column has at least one value > 0!" y_pos = candidates[0][0]/img.shape[0] self.data[dim, x] = 1-y_pos self.data[dim, :] *= np.abs(y_range[1] - y_range[0]) self.data[dim, :] += y_range[0] def read_config(self, fname): try: config = configparser.ConfigParser() config.read(fname) return config except Exception as e: print(e) return None def get_axis_settings(self, section): dict1 = {} options = self.config.options(section) for option in options: try: dict1[option] = self.config.get(section, option) if dict1[option] == -1: print("skip: %s" % option) except: print("exception on %s!" % option) dict1[option] = None return dict1 diff --git a/hyppopy/HyppopyProject.py b/hyppopy/HyppopyProject.py index 7bfdc93..719f386 100644 --- a/hyppopy/HyppopyProject.py +++ b/hyppopy/HyppopyProject.py @@ -1,77 +1,147 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE -import copy +__all__ = ['HyppopyProject'] + +import copy from hyppopy.globals import * + LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyppopyProject(object): + """ + The HyppopyProject class takes care of the optimization settings. An instance can be configured using a config + dictionary or by using the hyperparameter and settings methods. In case of initializing via dicts those can be + passed to the constructor or by using the set_config method. After initialization a HyppopyProject instance is + passed to a solver class which internally checks for consistency with it's needs. The class distinguished + between two categories, hyperparameter and general settings. + + The hyperparameter are a dictionary structure as follows and can be accessed via hyperparameter + {'param_name: {'domain': 'uniform', ...}, ...} + + General settings are internally converted to class attributes and can accessed directly or via settings + + An example config could look like: + config = {'hyperparameter': {'myparam': {'domain': 'uniform', 'data': [0, 100], 'type': float}, ...}, + 'my_setting_1': 3.1415, + 'my_setting_2': 'hello world'} + project = HyppopyProject(config) + + The same can be achieved using: + project = HyppopyProject() + project.add_hyperparameter(name='myparam', domain='uniform', data=[0, 100], type=float}) + project.add_setting('my_setting_1', 3.1415) + project.add_setting('my_setting_2', 'hello world') + """ def __init__(self, config=None): + """ + Constructor + + :param config: [dict] config dictionary of the form {'hyperparameter': {...}, ...} + """ self._data = {HYPERPARAMETERPATH: {}, SETTINGSPATH: {}} if config is not None: self.set_config(config) + def __parse_members(self): + """ + The function converts settings into class attributes + """ + for name, value in self.settings.items(): + if name not in self.__dict__.keys(): + setattr(self, name, value) + else: + self.__dict__[name] = value + def set_config(self, config): + """ + Set a config dict + + :param config: [dict] configuration dict defining hyperparameter and general settings + """ assert isinstance(config, dict), "precondition violation, config needs to be of type dict, got {}".format(type(config)) confic_cp = copy.deepcopy(config) if HYPERPARAMETERPATH in confic_cp.keys(): self._data[HYPERPARAMETERPATH] = confic_cp[HYPERPARAMETERPATH] del confic_cp[HYPERPARAMETERPATH] self._data[SETTINGSPATH] = confic_cp - self.parse_members() + self.__parse_members() def set_hyperparameter(self, params): + """ + This function can be used to set the hyperparameter description directly by passing the hyperparameter section + of a config dict (see class description). Alternatively use add_hyperparameter to add one after each other. + + :param params: [dict] configuration dict defining hyperparameter + """ assert isinstance(params, dict), "precondition violation, params needs to be of type dict, got {}".format(type(params)) self._data[HYPERPARAMETERPATH] = params - def set_settings(self, **kwargs): - self._data[SETTINGSPATH] = kwargs - self.parse_members() - def add_hyperparameter(self, name, **kwargs): + """ + This function can be used to set hyperparameter descriptions. Alternatively use set_hyperparameter to set all at + once. + + :param name: [str] hyperparameter name + :param kwargs: [dict] configuration dict defining a hyperparameter e.g. domain='uniform', data=[1,100], ... + """ assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) self._data[HYPERPARAMETERPATH][name] = kwargs + def set_settings(self, **kwargs): + """ + This function can be used to set the general settings directly by passing the settings as name=value pairs. + Alternatively use add_setting to add one after each other. + + :param kwargs: [dict] settings dict e.g. my_setting_1=3.1415, my_setting_2='hello world', ... + """ + self._data[SETTINGSPATH] = kwargs + self.__parse_members() + def add_setting(self, name, value): + """ + This function can be used to set a general settings. Alternatively use set_settings to set all at once. + + :param name: [str] setting name + :param value: [object] settings value + """ assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) self._data[SETTINGSPATH][name] = value - self.parse_members() - - def parse_members(self): - for name, value in self.settings.items(): - if name not in self.__dict__.keys(): - setattr(self, name, value) - else: - self.__dict__[name] = value + self.__parse_members() def get_typeof(self, name): + """ + Returns a hyperparameter type by name + + :param name: [str] hyperparameter name + :return: [type] hyperparameter type + """ if not name in self.hyperparameter.keys(): raise LookupError("Typechecking failed, couldn't find hyperparameter {}!".format(name)) if not "type" in self.hyperparameter[name].keys(): raise LookupError("Typechecking failed, couldn't find hyperparameter signature type!") dtype = self.hyperparameter[name]["type"] return dtype @property def hyperparameter(self): return self._data[HYPERPARAMETERPATH] @property def settings(self): return self._data[SETTINGSPATH] diff --git a/hyppopy/ProjectManager.py b/hyppopy/ProjectManager.py deleted file mode 100644 index 0d072dd..0000000 --- a/hyppopy/ProjectManager.py +++ /dev/null @@ -1,67 +0,0 @@ -# DKFZ -# -# -# Copyright (c) German Cancer Research Center, -# Division of Medical Image Computing. -# All rights reserved. -# -# This software is distributed WITHOUT ANY WARRANTY; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. -# -# See LICENSE - -from .Singleton import * - -import os -import logging -from hyppopy.HyppopyProject import HyppopyProject -from hyppopy.globals import DEBUGLEVEL - -LOG = logging.getLogger(os.path.basename(__file__)) -LOG.setLevel(DEBUGLEVEL) - - -@singleton_object -class ProjectManager(metaclass=Singleton): - - def __init__(self): - self._current_project = None - self._projects = {} - - def clear_all(self): - pass - - def new_project(self, name="HyppopyProject", config=None): - if name in self._projects.keys(): - name = self.check_projectname(name) - self._projects[name] = HyppopyProject(config) - self._current_project = self._projects[name] - return self._current_project - - def check_projectname(self, name): - split = name.split(".") - if len(split) == 0: - return split[0] + "." + str(0).zfill(3) - else: - try: - number = int(split[-1]) - del split[-1] - except: - number = 0 - return '.'.join(split) + "." + str(number).zfill(3) - - def get_current(self): - if self._current_project is None: - self.new_project() - return self._current_project - - def get_project(self, name): - if name in self._projects.keys(): - self._current_project = self._projects[name] - return self.get_current() - return self.new_project(name) - - def get_projectnames(self): - return self._projects.keys() - diff --git a/hyppopy/Singleton.py b/hyppopy/Singleton.py index fac0bc8..39bd2b2 100644 --- a/hyppopy/Singleton.py +++ b/hyppopy/Singleton.py @@ -1,50 +1,49 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE class Singleton(type): _instances = {} def __call__(cls, *args, **kwargs): if cls not in cls._instances: cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) return cls._instances[cls] @classmethod def __instancecheck__(mcs, instance): if instance.__class__ is mcs: return True else: return isinstance(instance.__class__, mcs) def singleton_object(cls): """Class decorator that transforms (and replaces) a class definition (which must have a Singleton metaclass) with the actual singleton object. Ensures that the resulting object can still be "instantiated" (i.e., called), returning the same object. Also ensures the object can be pickled, is hashable, and has the correct string representation (the name of the singleton) """ assert isinstance(cls, Singleton), cls.__name__ + " must use Singleton metaclass" def self_instantiate(self): return self cls.__call__ = self_instantiate cls.__hash__ = lambda self: hash(cls) cls.__repr__ = lambda self: cls.__name__ cls.__reduce__ = lambda self: cls.__name__ obj = cls() obj.__name__ = cls.__name__ return obj diff --git a/hyppopy/SolverPool.py b/hyppopy/SolverPool.py index 1e4fd6b..71868bc 100644 --- a/hyppopy/SolverPool.py +++ b/hyppopy/SolverPool.py @@ -1,79 +1,96 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['SolverPool'] + from .Singleton import * import os import logging from hyppopy.HyppopyProject import HyppopyProject from hyppopy.solvers.OptunaSolver import OptunaSolver from hyppopy.solvers.HyperoptSolver import HyperoptSolver from hyppopy.solvers.OptunitySolver import OptunitySolver from hyppopy.solvers.GridsearchSolver import GridsearchSolver from hyppopy.solvers.RandomsearchSolver import RandomsearchSolver from hyppopy.solvers.QuasiRandomsearchSolver import QuasiRandomsearchSolver from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) @singleton_object class SolverPool(metaclass=Singleton): + """ + The SolverPool is a helper singleton class to get the desired solver either by name and a HyppopyProject instance or + by a HyppopyProject instance only, if it defines a setting field called solver. + """ def __init__(self): self._solver_list = ["hyperopt", "optunity", "optuna", "randomsearch", "quasirandomsearch", "gridsearch"] def get_solver_names(self): + """ + Returns a list of available solvers + + :return: [list] solver list + """ return self._solver_list def get(self, solver_name=None, project=None): + """ + Get the configured solver instance + + :param solver_name: [str] solver name, if None, the project must have an attribute solver keeping the solver name, default=None + :param project: [HyppopyProject] HyppopyProject instance + + :return: [HyppopySolver] the configured solver instance + """ if solver_name is not None: assert isinstance(solver_name, str), "precondition violation, solver_name type str expected, got {} instead!".format(type(solver_name)) if project is not None: assert isinstance(project, HyppopyProject), "precondition violation, project type HyppopyProject expected, got {} instead!".format(type(project)) if "solver" in project.__dict__: solver_name = project.solver if solver_name not in self._solver_list: raise AssertionError("Solver named [{}] not implemented!".format(solver_name)) if solver_name == "hyperopt": if project is not None: return HyperoptSolver(project) return HyperoptSolver() elif solver_name == "optunity": if project is not None: return OptunitySolver(project) return OptunitySolver() elif solver_name == "optuna": if project is not None: return OptunaSolver(project) return OptunaSolver() elif solver_name == "gridsearch": if project is not None: return GridsearchSolver(project) return GridsearchSolver() elif solver_name == "randomsearch": if project is not None: return RandomsearchSolver(project) return RandomsearchSolver() elif solver_name == "quasirandomsearch": if project is not None: return QuasiRandomsearchSolver(project) return QuasiRandomsearchSolver() - diff --git a/hyppopy/VisdomViewer.py b/hyppopy/VisdomViewer.py index b10d151..d21ee51 100644 --- a/hyppopy/VisdomViewer.py +++ b/hyppopy/VisdomViewer.py @@ -1,114 +1,160 @@ +# Hyppopy - A Hyper-Parameter Optimization Toolbox +# +# Copyright (c) German Cancer Research Center, +# Division of Medical Image Computing. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE + +__all__ = ['VisdomViewer'] + import warnings import numpy as np from visdom import Visdom -import matplotlib.pyplot as plt def time_formatter(time_s): + """ + Formats time in seconds input to more intuitive form h, min, s or ms, depending on magnitude + :param time_s: [float] time in seconds + :return: + """ if time_s < 0.01: return int(time_s * 1000.0 * 1000) / 1000.0, "ms" elif 100 < time_s < 3600: return int(time_s / 60 * 1000) / 1000.0, "min" elif time_s >= 3600: return int(time_s / 3600 * 1000) / 1000.0, "h" else: return int(time_s * 1000) / 1000.0, "s" class VisdomViewer(object): - + """ + The VisdomViewer class implements the live viewer plots via visdom. When extending implement your plot as methos and + call it in update. Using this class make it necessary starting a visdom server beforehand $ python -m visdom.server + """ def __init__(self, project, port=8097, server="http://localhost"): self._viz = Visdom(port=port, server=server) self._enabled = self._viz.check_connection(timeout_seconds=3) if not self._enabled: warnings.warn("No connection to visdom server established. Visualization cannot be displayed!") self._project = project self._best_win = None self._best_loss = None self._loss_iter_plot = None self._status_report = None self._axis_tags = None self._axis_plots = None def plot_losshistory(self, input_data): + """ + This function plots the loss history loss over iteration + + :param input_data: [dict] trail infos + """ loss = np.array([input_data["loss"]]) iter = np.array([input_data["iterations"]]) if self._loss_iter_plot is None: self._loss_iter_plot = self._viz.line(loss, X=iter, opts=dict( markers=True, markersize=5, dash=np.array(['dashdot']), title="Loss History", xlabel='iteration', ylabel='loss' )) else: self._viz.line(loss, X=iter, win=self._loss_iter_plot, update='append') def plot_hyperparameter(self, input_data): + """ + This function plots each hyperparameter axis + + :param input_data: [dict] trail infos + """ if self._axis_plots is None: self._axis_tags = [] self._axis_plots = {} for item in input_data.keys(): if item == "refresh_time" or item == "book_time" or item == "iterations" or item == "status" or item == "loss": continue self._axis_tags.append(item) for axis in self._axis_tags: xlabel = "value" if isinstance(input_data[axis], str): if self._project.hyperparameter[axis]["domain"] == "categorical": xlabel = '-'.join(self._project.hyperparameter[axis]["data"]) input_data[axis] = self._project.hyperparameter[axis]["data"].index(input_data[axis]) axis_loss = np.array([input_data[axis], input_data["loss"]]).reshape(1, -1) self._axis_plots[axis] = self._viz.scatter(axis_loss, opts=dict( markersize=5, title=axis, xlabel=xlabel, ylabel='loss')) else: for axis in self._axis_tags: if isinstance(input_data[axis], str): if self._project.hyperparameter[axis]["domain"] == "categorical": input_data[axis] = self._project.hyperparameter[axis]["data"].index(input_data[axis]) axis_loss = np.array([input_data[axis], input_data["loss"]]).reshape(1, -1) self._viz.scatter(axis_loss, win=self._axis_plots[axis], update='append') def show_statusreport(self, input_data): + """ + This function prints status report per iteration + + :param input_data: [dict] trail infos + """ duration = input_data['refresh_time'] - input_data['book_time'] duration, time_format = time_formatter(duration.total_seconds()) report = "Iteration {}: {}{} -> {}\n".format(input_data["iterations"], duration, time_format, input_data["status"]) if self._status_report is None: self._status_report = self._viz.text(report) else: self._viz.text(report, win=self._status_report, append=True) def show_best(self, input_data): + """ + Shows best parameter set + + :param input_data: [dict] trail infos + """ if self._best_win is None: self._best_loss = input_data["loss"] txt = "Best Parameter Set:
Loss: {}
" self._best_win = self._viz.text(txt) else: if input_data["loss"] < self._best_loss: self._best_loss = input_data["loss"] txt = "Best Parameter Set:
Loss: {}
" self._viz.text(txt, win=self._best_win, append=False) def update(self, input_data): + """ + This function calls all visdom displaying routines + + :param input_data: [dict] trail infos + """ if self._enabled: self.show_statusreport(input_data) self.plot_losshistory(input_data) self.plot_hyperparameter(input_data) self.show_best(input_data) diff --git a/hyppopy/__init__.py b/hyppopy/__init__.py index bea4fb7..34a3a76 100644 --- a/hyppopy/__init__.py +++ b/hyppopy/__init__.py @@ -1,14 +1,13 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE __version__ = '0.5.0.0' diff --git a/hyppopy/globals.py b/hyppopy/globals.py index ff68aba..7d74106 100644 --- a/hyppopy/globals.py +++ b/hyppopy/globals.py @@ -1,36 +1,34 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import sys import logging ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, ROOT) LIBNAME = "hyppopy" TESTDATA_DIR = os.path.join(ROOT, *(LIBNAME, "tests", "data")) HYPERPARAMETERPATH = "hyperparameter" SETTINGSPATH = "settings" -VFUNCDATAPATH = os.path.join(os.path.join(ROOT, LIBNAME), "virtualparameterspace") +FUNCTIONSIMULATOR_DATAPATH = os.path.join(os.path.join(ROOT, LIBNAME), "virtualparameterspace") SUPPORTED_DOMAINS = ["uniform", "normal", "loguniform", "categorical"] SUPPORTED_DTYPES = ["int", "float", "str"] -#DEFAULTITERATIONS = 500 DEFAULTGRIDFREQUENCY = 10 LOGFILENAME = os.path.join(ROOT, '{}_log.log'.format(LIBNAME)) DEBUGLEVEL = logging.DEBUG logging.basicConfig(filename=LOGFILENAME, filemode='w', format='%(levelname)s: %(name)s - %(message)s') diff --git a/hyppopy/solvers/GridsearchSolver.py b/hyppopy/solvers/GridsearchSolver.py index 0daad85..1b43ea2 100644 --- a/hyppopy/solvers/GridsearchSolver.py +++ b/hyppopy/solvers/GridsearchSolver.py @@ -1,197 +1,206 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import logging import warnings import numpy as np from pprint import pformat from scipy.stats import norm from itertools import product from hyppopy.globals import DEBUGLEVEL, DEFAULTGRIDFREQUENCY from hyppopy.solvers.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def get_uniform_axis_sample(a, b, N, dtype): """ - returns a uniform sample x(n) in the range [a,b] sampled at N pojnts + Returns a uniform sample x(n) in the range [a,b] sampled at N pojnts + :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type + :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" if dtype is int: return list(np.linspace(a, b, N).astype(int)) elif dtype is float: return list(np.linspace(a, b, N)) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) def get_norm_cdf(N): """ - returns a normed gaussian cdf (range [0,1]) with N sampling points + Returns a normed gaussian cdf (range [0,1]) with N sampling points + :param N: sampling points + :return: [ndarray] gaussian cdf function values """ assert isinstance(N, int), "condition N of type int violated!" even = True if N % 2 != 0: N -= 1 even = False N = int(N/2) sigma = 1/3 x = np.linspace(0, 1, N) y1 = norm.cdf(x, loc=0, scale=sigma)-0.5 if not even: y1 = np.append(y1, [0.5]) y2 = 1-(norm.cdf(x, loc=0, scale=sigma)-0.5) y2 = np.flip(y2, axis=0) y = np.concatenate((y1, y2), axis=0) return y def get_gaussian_axis_sample(a, b, N, dtype): """ - returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points + Returns a function value f(n) where f is a gaussian cdf in range [a, b] and N sampling points + :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type + :return: [list] axis range """ assert a < b, "condition a < b violated!" assert isinstance(N, int), "condition N of type int violated!" data = [] for n in range(N): x = a + get_norm_cdf(N)[n]*(b-a) if dtype is int: data.append(int(x)) elif dtype is float: data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data def get_logarithmic_axis_sample(a, b, N, dtype): """ - returns a function value f(n) where f is logarithmic function e^x sampling + Returns a function value f(n) where f is logarithmic function e^x sampling the exponent range [log(a), log(b)] linear at N sampling points. The function values returned are in the range [a, b]. + :param a: left value range bound :param b: right value range bound :param N: discretization of intervall [a,b] :param dtype: data type + :return: [list] axis range """ assert a < b, "condition a < b violated!" assert a > 0, "condition a > 0 violated!" assert isinstance(N, int), "condition N of type int violated!" # convert input range into exponent range lexp = np.log(a) rexp = np.log(b) exp_range = np.linspace(lexp, rexp, N) data = [] for n in range(exp_range.shape[0]): x = np.exp(exp_range[n]) if dtype is int: data.append(int(x)) elif dtype is float: data.append(x) else: raise AssertionError("dtype {} not supported for uniform sampling!".format(dtype)) return data class GridsearchSolver(HyppopySolver): """ The GridsearchSolver class implements a gridsearch optimization. The gridsearch supports categorical, uniform, normal and loguniform sampling. To use the GridsearchSolver, besides a range, one must specifiy the number of samples in the domain, e.g. 'data': [0, 1, 100] """ def __init__(self, project=None): HyppopySolver.__init__(self, project) def define_interface(self): - self.add_hyperparameter_signature(name="domain", dtype=str, + self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "normal", "loguniform", "categorical"]) - self.add_hyperparameter_signature(name="data", dtype=list) - self.add_hyperparameter_signature(name="frequency", dtype=int) - self.add_hyperparameter_signature(name="type", dtype=type) + self._add_hyperparameter_signature(name="data", dtype=list) + self._add_hyperparameter_signature(name="frequency", dtype=int) + self._add_hyperparameter_signature(name="type", dtype=type) def loss_function_call(self, params): loss = self.blackbox(**params) if loss is None: return np.nan return loss def execute_solver(self, searchspace): for x in product(*searchspace[1]): params = {} for name, value in zip(searchspace[0], x): params[name] = value try: self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): """ - the function converts the standard parameter input into a range list depending + The function converts the standard parameter input into a range list depending on the domain. These rangelists are later used with itertools product to create a paramater space sample of each combination. + :param hyperparameter: [dict] hyperparameter space + :return: [list] name and range for each parameter space axis """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) searchspace = [[], []] for name, param in hyperparameter.items(): if param["domain"] != "categorical" and "frequency" not in param.keys(): param["frequency"] = DEFAULTGRIDFREQUENCY warnings.warn("No frequency field found, used default gridsearch frequency {}".format(DEFAULTGRIDFREQUENCY)) if param["domain"] == "categorical": searchspace[0].append(name) searchspace[1].append(param["data"]) elif param["domain"] == "uniform": searchspace[0].append(name) searchspace[1].append(get_uniform_axis_sample(param["data"][0], param["data"][1], param["frequency"], param["type"])) elif param["domain"] == "normal": searchspace[0].append(name) searchspace[1].append(get_gaussian_axis_sample(param["data"][0], param["data"][1], param["frequency"], param["type"])) elif param["domain"] == "loguniform": searchspace[0].append(name) searchspace[1].append(get_logarithmic_axis_sample(param["data"][0], param["data"][1], param["frequency"], param["type"])) return searchspace diff --git a/hyppopy/solvers/HyperoptSolver.py b/hyppopy/solvers/HyperoptSolver.py index 0b063c8..aba08c0 100644 --- a/hyppopy/solvers/HyperoptSolver.py +++ b/hyppopy/solvers/HyperoptSolver.py @@ -1,162 +1,161 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import copy import logging import numpy as np from pprint import pformat from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials from hyppopy.globals import DEBUGLEVEL from hyppopy.solvers.HyppopySolver import HyppopySolver from hyppopy.BlackboxFunction import BlackboxFunction LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyperoptSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None def define_interface(self): - self.add_member("max_iterations", int) - self.add_hyperparameter_signature(name="domain", dtype=str, + self._add_member("max_iterations", int) + self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "normal", "loguniform", "categorical"]) - self.add_hyperparameter_signature(name="data", dtype=list) - self.add_hyperparameter_signature(name="type", dtype=type) + self._add_hyperparameter_signature(name="data", dtype=list) + self._add_hyperparameter_signature(name="type", dtype=type) def loss_function(self, params): for name, p in self._searchspace.items(): if p["domain"] != "categorical": if params[name] < p["data"][0]: params[name] = p["data"][0] if params[name] > p["data"][1]: params[name] = p["data"][1] status = STATUS_FAIL try: loss = self.blackbox(**params) if loss is not None: status = STATUS_OK else: loss = 1e9 except Exception as e: LOG.error("execution of self.blackbox(**params) failed due to:\n {}".format(e)) status = STATUS_FAIL loss = 1e9 cbd = copy.deepcopy(params) cbd['iterations'] = self._trials.trials[-1]['tid'] + 1 cbd['loss'] = loss cbd['status'] = status cbd['book_time'] = self._trials.trials[-1]['book_time'] cbd['refresh_time'] = self._trials.trials[-1]['refresh_time'] if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: self.blackbox.callback_func(**cbd) if self._visdom_viewer is not None: self._visdom_viewer.update(cbd) return {'loss': loss, 'status': status} def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self.trials = Trials() try: self.best = fmin(fn=self.loss_function, space=searchspace, algo=tpe.suggest, max_evals=self.max_iterations, trials=self.trials) except Exception as e: msg = "internal error in hyperopt.fmin occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) def convert_searchspace(self, hyperparameter): self._searchspace = hyperparameter solution_space = {} for name, content in hyperparameter.items(): param_settings = {'name': name} for key, value in content.items(): if key == 'domain': param_settings['domain'] = value elif key == 'data': param_settings['data'] = value elif key == 'type': param_settings['dtype'] = value solution_space[name] = self.convert(param_settings) return solution_space def convert(self, param_settings): name = param_settings["name"] domain = param_settings["domain"] dtype = param_settings["dtype"] data = param_settings["data"] if domain == "uniform": if dtype is float: return hp.uniform(name, data[0], data[1]) elif dtype is int: data = list(np.arange(int(data[0]), int(data[1] + 1))) return hp.choice(name, data) else: msg = "cannot convert the type {} in domain {}".format(dtype, domain) LOG.error(msg) raise LookupError(msg) elif domain == "loguniform": if dtype is float: if data[0] == 0: data[0] += 1e-23 assert data[0] > 0, "precondition Violation, a < 0!" assert data[0] < data[1], "precondition Violation, a > b!" assert data[1] > 0, "precondition Violation, b < 0!" lexp = np.log(data[0]) rexp = np.log(data[1]) assert lexp is not np.nan, "precondition violation, left bound input error, results in nan!" assert rexp is not np.nan, "precondition violation, right bound input error, results in nan!" return hp.loguniform(name, lexp, rexp) else: msg = "cannot convert the type {} in domain {}".format(dtype, domain) LOG.error(msg) raise LookupError(msg) elif domain == "normal": if dtype is float: mu = (data[1] - data[0]) / 2.0 sigma = mu / 3 return hp.normal(name, data[0] + mu, sigma) else: msg = "cannot convert the type {} in domain {}".format(dtype, domain) LOG.error(msg) raise LookupError(msg) elif domain == "categorical": if dtype is str: return hp.choice(name, data) elif dtype is bool: data = [] for elem in data: if elem == "true" or elem == "True" or elem == 1 or elem == "1": data.append(True) elif elem == "false" or elem == "False" or elem == 0 or elem == "0": data.append(False) else: msg = "cannot convert the type {} in domain {}, unknown bool type value".format(dtype, domain) LOG.error(msg) raise LookupError(msg) return hp.choice(name, data) else: msg = "Precondition violation, domain named {} not available!".format(domain) LOG.error(msg) raise IOError(msg) diff --git a/hyppopy/solvers/HyppopySolver.py b/hyppopy/solvers/HyppopySolver.py index e6e5773..c774ad4 100644 --- a/hyppopy/solvers/HyppopySolver.py +++ b/hyppopy/solvers/HyppopySolver.py @@ -1,374 +1,445 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['HyppopySolver'] + import abc import copy import types import datetime import numpy as np import pandas as pd from hyperopt import Trials from hyppopy.globals import * from hyppopy.VisdomViewer import VisdomViewer from hyppopy.HyppopyProject import HyppopyProject from hyppopy.BlackboxFunction import BlackboxFunction -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyppopySolver(object): """ The HyppopySolver class is the base class for all solver addons. It defines virtual functions a child class has to implement to deal with the front-end communication, orchestrating the optimization process and ensuring a proper process information storing. The key idea is that the HyppopySolver class defines an interface to configure and run an object instance of itself independently from the concrete solver lib used to optimize in the background. To achieve this goal an addon developer needs to implement the abstract methods 'convert_searchspace', 'execute_solver' and 'loss_function_call'. These methods abstract the peculiarities of the solver libs to offer, on the user side, a simple and consistent parameter space configuration and optimization procedure. The method 'convert_searchspace' transforms the hyppopy parameter space description into the solver lib specific description. The method loss_function_call is used to handle solver lib specifics of calling the actual blackbox function and execute_solver is executed when the run method is invoked und takes care of calling the solver lib solving routine. + + The class HyppopySolver defines an interface to be implemented when writing a custom solver. Each solver derivative + needs to implement the abstract methods: + + - convert_searchspace + - execute_solver + - loss_function_call + - define_interface + + The dev-user interface consists of the methods: + + - _add_member + - _add_hyperparameter_signature + - _check_project + + The end-user interface consists of the methods: + + - run + - get_results + - print_best + - print_timestats + - start_viewer """ def __init__(self, project=None): self._idx = None # current iteration counter self._best = None # best parameter set self._trials = None # trials object, hyppopy uses the Trials object from hyperopt self._blackbox = None # blackbox function, eiter a function or a BlackboxFunction instance self._total_duration = None # keeps track of the solvers running time self._solver_overhead = None # stores the time overhead of the solver, means total time minus time in blackbox self._time_per_iteration = None # mean time per iterration self._accumulated_blackbox_time = None # summed time the solver was in the blackbox function self._visdom_viewer = None # visdom viewer instance self._child_members = {} # this dict keeps track of the settings the child solver defines self._hopt_signatures = {} # this dict keeps track of the hyperparameter signatures the child solver defines self.define_interface() # the child define interface function is called which defines settings and hyperparameter signatures if project is not None: self.project = project @abc.abstractmethod def convert_searchspace(self, hyperparameter): """ This function gets the unified hyppopy-like parameterspace description as input and, if necessary, should convert it into a solver lib specific format. The function is invoked when run is called and what it returns is passed as searchspace argument to the function execute_solver. + :param hyperparameter: [dict] nested parameter description dict e.g. {'name': {'domain':'uniform', 'data':[0,1], 'type':'float'}, ...} + :return: [object] converted hyperparameter space """ raise NotImplementedError('users must define convert_searchspace to use this class') @abc.abstractmethod def execute_solver(self, searchspace): """ This function is called immediatly after convert_searchspace and get the output of the latter as input. It's purpose is to call the solver libs main optimization function. + :param searchspace: converted hyperparameter space """ raise NotImplementedError('users must define execute_solver to use this class') @abc.abstractmethod def loss_function_call(self, params): """ This function is called within the function loss_function and encapsulates the actual blackbox function call in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver lib might need some special treatment between the parameter set selection and the calling of the actual blackbox function, e.g. parameter converting. + :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} + :return: [float] loss """ raise NotImplementedError('users must define loss_function_call to use this class') @abc.abstractmethod def define_interface(self): """ This function is called when HyppopySolver.__init__ function finished. Child classes need to define their - individual parameter here by calling the add_member function for each class member variable need to be defined. - Using add_hyperparameter_signature the structure of a hyperparameter the solver expects must be defined. + individual parameter here by calling the _add_member function for each class member variable need to be defined. + Using _add_hyperparameter_signature the structure of a hyperparameter the solver expects must be defined. Both, members and hyperparameter signatures are later get checked, before executing the solver, ensuring settings passed fullfill solver needs. """ raise NotImplementedError('users must define define_interface to use this class') - def add_member(self, name, dtype, value=None, default=None): + def _add_member(self, name, dtype, value=None, default=None): + """ + When designing your child solver class you need to implement the define_interface abstract method where you can + call _add_member to define custom solver options that are automatically converted to class attributes. + + :param name: [str] option name + :param dtype: [type] option data type + :param value: [object] option value + :param default: [object] option default value + """ assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) if value is not None: assert isinstance(value, dtype), "precondition violation, value does not match dtype condition!" if default is not None: assert isinstance(default, dtype), "precondition violation, default does not match dtype condition!" setattr(self, name, value) self._child_members[name] = {"type": dtype, "value": value, "default": default} - def add_hyperparameter_signature(self, name, dtype, options=None): + def _add_hyperparameter_signature(self, name, dtype, options=None): + """ + When designing your child solver class you need to implement the define_interface abstract method where you can + call _add_hyperparameter_signature to define a hyperparamter signature which is automatically checked for + consistency while solver execution. + + :param name: [str] hyperparameter name + :param dtype: [type] hyperparameter data type + :param options: [list] list of possible values the hp can be set, if None no option check is done + """ assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) self._hopt_signatures[name] = {"type": dtype, "options": options} + def _check_project(self): + """ + The function checks the members and hyperparameter signatures read from the project instance to be consistent + with the members and signatures defined in the child class via define_interface. + """ + assert isinstance(self.project, HyppopyProject), "Invalid project instance, either not set or setting failed!" + + # check hyperparameter signatures + for name, param in self.project.hyperparameter.items(): + for sig, settings in self._hopt_signatures.items(): + if sig not in param.keys(): + msg = "Missing hyperparameter signature {}!".format(sig) + LOG.error(msg) + raise LookupError(msg) + else: + if not isinstance(param[sig], settings["type"]): + msg = "Hyperparameter signature type mismatch, expected type {} got {}!".format(settings["type"], param[sig]) + LOG.error(msg) + raise TypeError(msg) + if settings["options"] is not None: + if param[sig] not in settings["options"]: + msg = "Wrong signature value, {} not found in signature options!".format(param[sig]) + LOG.error(msg) + raise LookupError(msg) + + # check child members + for name in self._child_members.keys(): + if name not in self.project.__dict__.keys(): + msg = "missing settings field {}!".format(name) + LOG.error(msg) + raise LookupError(msg) + self.__dict__[name] = self.project.settings[name] + + def __compute_time_statistics(self): + """ + Evaluates all timestatistic values available + """ + dts = [] + for trial in self._trials.trials: + if 'book_time' in trial.keys() and 'refresh_time' in trial.keys(): + dt = trial['refresh_time'] - trial['book_time'] + dts.append(dt.total_seconds()) + self._time_per_iteration = np.mean(dts) * 1e3 + self._accumulated_blackbox_time = np.sum(dts) * 1e3 + tmp = self.total_duration - self._accumulated_blackbox_time + self._solver_overhead = int(np.round(100.0 / (self.total_duration + 1e-12) * tmp)) + def loss_function(self, **params): """ This function is called each iteration with a selected parameter set. The parameter set selection is driven by the solver lib itself. The purpose of this function is to take care of the iteration reporting and the calling of the callback_func is available. As a developer you might want to overwrite this function completely (e.g. HyperoptSolver) but then you need to take care for iteration reporting for yourself. The alternative is to only implement loss_function_call (e.g. OptunitySolver). + :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} + :return: [float] loss """ self._idx += 1 vals = {} idx = {} for key, value in params.items(): vals[key] = [value] idx[key] = [self._idx] trial = {'tid': self._idx, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._idx, 'idxs': idx, 'vals': vals }, 'book_time': datetime.datetime.now(), 'refresh_time': None } try: loss = self.loss_function_call(params) trial['result']['loss'] = loss trial['result']['status'] = 'ok' if loss == np.nan: trial['result']['status'] = 'failed' except Exception as e: LOG.error("computing loss failed due to:\n {}".format(e)) loss = np.nan trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' trial['refresh_time'] = datetime.datetime.now() self._trials.trials.append(trial) cbd = copy.deepcopy(params) cbd['iterations'] = self._idx cbd['loss'] = loss cbd['status'] = trial['result']['status'] cbd['book_time'] = trial['book_time'] cbd['refresh_time'] = trial['refresh_time'] if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: self.blackbox.callback_func(**cbd) if self._visdom_viewer is not None: self._visdom_viewer.update(cbd) return loss def run(self, print_stats=True): """ This function starts the optimization process. + :param print_stats: [bool] en- or disable console output """ self._idx = 0 self.trials = Trials() start_time = datetime.datetime.now() try: search_space = self.convert_searchspace(self.project.hyperparameter) except Exception as e: msg = "Failed to convert searchspace, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) try: self.execute_solver(search_space) except Exception as e: msg = "Failed to execute solver, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) end_time = datetime.datetime.now() dt = end_time - start_time days = divmod(dt.total_seconds(), 86400) hours = divmod(days[1], 3600) minutes = divmod(hours[1], 60) seconds = divmod(minutes[1], 1) milliseconds = divmod(seconds[1], 0.001) self._total_duration = [int(days[0]), int(hours[0]), int(minutes[0]), int(seconds[0]), int(milliseconds[0])] if print_stats: self.print_best() self.print_timestats() def get_results(self): """ This function returns a complete optimization history as pandas DataFrame and a dict with the optimal parameter set. + :return: [DataFrame], [dict] history and optimal parameter set """ assert isinstance(self.trials, Trials), "precondition violation, wrong trials type! Maybe solver was not yet executed?" results = {'duration': [], 'losses': [], 'status': []} pset = self.trials.trials[0]['misc']['vals'] for p in pset.keys(): results[p] = [] for n, trial in enumerate(self.trials.trials): t1 = trial['book_time'] t2 = trial['refresh_time'] results['duration'].append((t2 - t1).microseconds / 1000.0) results['losses'].append(trial['result']['loss']) results['status'].append(trial['result']['status'] == 'ok') losses = np.array(results['losses']) results['losses'] = list(losses) pset = trial['misc']['vals'] for p in pset.items(): results[p[0]].append(p[1][0]) return pd.DataFrame.from_dict(results), self.best def print_best(self): + """ + Optimization result console output printing. + """ print("\n") print("#" * 40) print("### Best Parameter Choice ###") print("#" * 40) for name, value in self.best.items(): print(" - {}\t:\t{}".format(name, value)) print("\n - number of iterations\t:\t{}".format(self.trials.trials[-1]['tid']+1)) print(" - total time\t:\t{}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) - def compute_time_statistics(self): - dts = [] - for trial in self._trials.trials: - if 'book_time' in trial.keys() and 'refresh_time' in trial.keys(): - dt = trial['refresh_time'] - trial['book_time'] - dts.append(dt.total_seconds()) - self._time_per_iteration = np.mean(dts) * 1e3 - self._accumulated_blackbox_time = np.sum(dts) * 1e3 - tmp = self.total_duration - self._accumulated_blackbox_time - self._solver_overhead = int(np.round(100.0 / (self.total_duration+1e-12) * tmp)) - def print_timestats(self): + """ + Time statistic console output printing. + """ print("\n") print("#" * 40) print("### Timing Statistics ###") print("#" * 40) print(" - per iteration: {}ms".format(int(self.time_per_iteration*1e4)/10000)) print(" - total time: {}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) print(" - solver overhead: {}%".format(self.solver_overhead)) def start_viewer(self, port=8097, server="http://localhost"): + """ + Starts the visdom viewer. + + :param port: [int] port number, default: 8097 + :param server: [str] server name, default: http://localhost + """ try: self._visdom_viewer = VisdomViewer(self._project, port, server) except Exception as e: import warnings warnings.warn("Failed starting VisdomViewer. Is the server running? If not start it via $visdom") LOG.error("Failed starting VisdomViewer: {}".format(e)) self._visdom_viewer = None - def check_project(self): - # check hyperparameter signatures - for name, param in self.project.hyperparameter.items(): - for sig, settings in self._hopt_signatures.items(): - if sig not in param.keys(): - msg = "Missing hyperparameter signature {}!".format(sig) - LOG.error(msg) - raise LookupError(msg) - else: - if not isinstance(param[sig], settings["type"]): - msg = "Hyperparameter signature type mismatch, expected type {} got {}!".format(settings["type"], param[sig]) - LOG.error(msg) - raise TypeError(msg) - if settings["options"] is not None: - if param[sig] not in settings["options"]: - msg = "Wrong signature value, {} not found in signature options!".format(param[sig]) - LOG.error(msg) - raise LookupError(msg) - - # check child members - for name in self._child_members.keys(): - if name not in self.project.__dict__.keys(): - msg = "missing settings field {}!".format(name) - LOG.error(msg) - raise LookupError(msg) - self.__dict__[name] = self.project.settings[name] - @property def project(self): return self._project @project.setter def project(self, value): if isinstance(value, dict): self._project = HyppopyProject(value) elif isinstance(value, HyppopyProject): self._project = value else: msg = "Input error, project_manager of type: {} not allowed!".format(type(value)) LOG.error(msg) raise TypeError(msg) - self.check_project() + self._check_project() @property def blackbox(self): return self._blackbox @blackbox.setter def blackbox(self, value): - if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction) or isinstance(value, VirtualFunction): + if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction) or isinstance(value, FunctionSimulator): self._blackbox = value else: self._blackbox = None msg = "Input error, blackbox of type: {} not allowed!".format(type(value)) LOG.error(msg) raise TypeError(msg) @property def best(self): return self._best @best.setter def best(self, value): if not isinstance(value, dict): msg = "Input error, best of type: {} not allowed!".format(type(value)) LOG.error(msg) raise TypeError(msg) self._best = value @property def trials(self): return self._trials @trials.setter def trials(self, value): self._trials = value @property def total_duration(self): return (self._total_duration[0]*86400 + self._total_duration[1] * 3600 + self._total_duration[2] * 60 + self._total_duration[3]) * 1000 + self._total_duration[4] @property def solver_overhead(self): if self._solver_overhead is None: - self.compute_time_statistics() + self.__compute_time_statistics() return self._solver_overhead @property def time_per_iteration(self): if self._time_per_iteration is None: - self.compute_time_statistics() + self.__compute_time_statistics() return self._time_per_iteration @property def accumulated_blackbox_time(self): if self._accumulated_blackbox_time is None: - self.compute_time_statistics() + self.__compute_time_statistics() return self._accumulated_blackbox_time diff --git a/hyppopy/solvers/OptunaSolver.py b/hyppopy/solvers/OptunaSolver.py index 4514bc1..f02b086 100644 --- a/hyppopy/solvers/OptunaSolver.py +++ b/hyppopy/solvers/OptunaSolver.py @@ -1,87 +1,86 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import optuna import logging import warnings import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL from hyppopy.solvers.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class OptunaSolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) self._searchspace = None def define_interface(self): - self.add_member("max_iterations", int) - self.add_hyperparameter_signature(name="domain", dtype=str, + self._add_member("max_iterations", int) + self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) - self.add_hyperparameter_signature(name="data", dtype=list) - self.add_hyperparameter_signature(name="type", dtype=type) + self._add_hyperparameter_signature(name="data", dtype=list) + self._add_hyperparameter_signature(name="type", dtype=type) def reformat_parameter(self, params): out_params = {} for name, value in params.items(): if self._searchspace[name]["domain"] == "categorical": out_params[name] = self._searchspace[name]["data"][int(np.round(value))] else: if self._searchspace[name]["type"] is int: out_params[name] = int(np.round(value)) else: out_params[name] = value return out_params def trial_cache(self, trial): params = {} for name, param in self._searchspace.items(): if param["domain"] == "categorical": params[name] = trial.suggest_categorical(name, param["data"]) else: params[name] = trial.suggest_uniform(name, param["data"][0], param["data"][1]) return self.loss_function(**params) def loss_function_call(self, params): for key in params.keys(): if self.project.get_typeof(key) is int: params[key] = int(round(params[key])) return self.blackbox(**params) def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) self._searchspace = searchspace try: study = optuna.create_study() study.optimize(self.trial_cache, n_trials=self.max_iterations) self.best = study.best_trial.params except Exception as e: LOG.error("internal error in bayes_opt maximize occured. {}".format(e)) raise BrokenPipeError("internal error in bayes_opt maximize occured. {}".format(e)) def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) for name, param in hyperparameter.items(): if param["domain"] != "categorical" and param["domain"] != "uniform": msg = "Warning: Optuna cannot handle {} domain. Only uniform and categorical domains are supported!".format(param["domain"]) warnings.warn(msg) LOG.warning(msg) return hyperparameter diff --git a/hyppopy/solvers/OptunitySolver.py b/hyppopy/solvers/OptunitySolver.py index bf4519a..2c894af 100644 --- a/hyppopy/solvers/OptunitySolver.py +++ b/hyppopy/solvers/OptunitySolver.py @@ -1,99 +1,93 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import logging import optunity -import warnings from pprint import pformat from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from hyppopy.solvers.HyppopySolver import HyppopySolver class OptunitySolver(HyppopySolver): def __init__(self, project=None): HyppopySolver.__init__(self, project) - self._solver_info = None - self.opt_trials = None def define_interface(self): - self.add_member("max_iterations", int) - self.add_hyperparameter_signature(name="domain", dtype=str, + self._add_member("max_iterations", int) + self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) - self.add_hyperparameter_signature(name="data", dtype=list) - self.add_hyperparameter_signature(name="type", dtype=type) + self._add_hyperparameter_signature(name="data", dtype=list) + self._add_hyperparameter_signature(name="type", dtype=type) def loss_function_call(self, params): for key in params.keys(): if self.project.get_typeof(key) is int: params[key] = int(round(params[key])) return self.blackbox(**params) def execute_solver(self, searchspace): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace))) try: self.best, _, _ = optunity.minimize_structured(f=self.loss_function, num_evals=self.max_iterations, search_space=searchspace) except Exception as e: LOG.error("internal error in optunity.minimize_structured occured. {}".format(e)) raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e)) def split_categorical(self, pdict): categorical = {} uniform = {} for name, pset in pdict.items(): for key, value in pset.items(): if key == 'domain' and value == 'categorical': categorical[name] = pset elif key == 'domain': uniform[name] = pset return categorical, uniform def convert_searchspace(self, hyperparameter): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) - solution_space = {} # split input in categorical and non-categorical data cat, uni = self.split_categorical(hyperparameter) # build up dictionary keeping all non-categorical data uniforms = {} for key, value in uni.items(): for key2, value2 in value.items(): if key2 == 'data': if len(value2) == 3: uniforms[key] = value2[0:2] elif len(value2) == 2: uniforms[key] = value2 else: raise AssertionError("precondition violation, optunity searchspace needs list with left and right range bounds!") if len(cat) == 0: return uniforms # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} - tmp2 = {} + optunity_space = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level - tmp2[key] = tmp - inner_level = tmp2 - solution_space = tmp2 - return solution_space + optunity_space[key] = tmp + inner_level = optunity_space + return optunity_space diff --git a/hyppopy/solvers/QuasiRandomsearchSolver.py b/hyppopy/solvers/QuasiRandomsearchSolver.py index 28e028f..b9159ec 100644 --- a/hyppopy/solvers/QuasiRandomsearchSolver.py +++ b/hyppopy/solvers/QuasiRandomsearchSolver.py @@ -1,182 +1,201 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['HaltonSequenceGenerator', 'QuasiRandomSampleGenerator', 'QuasiRandomsearchSolver'] + import os import logging import warnings import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL from hyppopy.solvers.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) -def get_loguniform_ranges(a, b, N): - aL = np.log(a) - bL = np.log(b) - exp_range = np.linspace(aL, bL, N+1) - ranges = [] - for i in range(N): - ranges.append([np.exp(exp_range[i]), np.exp(exp_range[i+1])]) - return ranges - - class HaltonSequenceGenerator(object): + """ + This class generates Halton sequences (https://en.wikipedia.org/wiki/Halton_sequence). The class needs a total + number of samples and the number of dimensions to generate a quasirandom sequence for each axis. The method + get_unit_space returns a sequence list with N_samples for each axis representing N_samples vectors on a unit sphere. + """ + def __init__(self): + pass - def __init__(self, N_samples, dimensions): - self._N = N_samples - self._dims = dimensions - - def next_prime(self): + def __next_prime(self): + """ + Checks if num is a prime value + """ def is_prime(num): - "Checks if num is a prime value" for i in range(2, int(num ** 0.5) + 1): if (num % i) == 0: return False return True prime = 3 while 1: if is_prime(prime): yield prime prime += 2 - def vdc(self, n, base): + def __vdc(self, n, base): vdc, denom = 0, 1 while n: denom *= base n, remainder = divmod(n, base) vdc += remainder / float(denom) return vdc - def get_sequence(self): + def get_unit_space(self, N_samples, N_dims): + """ + Returns a unit space in form of a sequence list keeping N_dims sequences with N_sample samplings. Each sample + represents a N_dims dimensional vector on a unit sphere. + + :param N_samples: [int] Number of samples + :param N_dims: [int] Number of dimensions + + :return: [list] samples list of length N_dims keeping lists each of length N_samples + """ seq = [] - primeGen = self.next_prime() + primeGen = self.__next_prime() next(primeGen) - for d in range(self._dims): + for d in range(N_dims): base = next(primeGen) - seq.append([self.vdc(i, base) for i in range(self._N)]) + seq.append([self.__vdc(i, base) for i in range(N_samples)]) return seq class QuasiRandomSampleGenerator(object): - + """ + This class takes care of the hyperparameter space creation and next sample delivery. + """ def __init__(self, N_samples=None): self._axis = None self._samples = [] self._numerical = [] self._categorical = [] self._N_samples = N_samples def set_axis(self, name, data, domain, dtype): + """ + Add an axis description. + + :param name: [str] axis name + :param data: [list] axis range [min, max] + :param domain: [str] axis domain + :param dtype: [type] axis data type + """ if domain == "categorical": if dtype is int: data = [int(i) for i in data] elif dtype is str: data = [str(i) for i in data] elif dtype is float: data = [float(i) for i in data] self._categorical.append({"name": name, "data": data, "type": dtype}) else: self._numerical.append({"name": name, "data": data, "type": dtype, "domain": domain}) def generate_samples(self, N_samples=None): + """ + This function is called once when the first sample is requested. It generates the halton sequence space. + + :param N_samples: [int] number of samples + """ self._axis = [] if N_samples is None: assert isinstance(self._N_samples, int), "Precondition violation, no number of samples specified!" else: self._N_samples = N_samples axis_samples = {} if len(self._numerical) > 0: - generator = HaltonSequenceGenerator(self._N_samples, len(self._numerical)) - unit_space = generator.get_sequence() + generator = HaltonSequenceGenerator() + unit_space = generator.get_unit_space(self._N_samples, len(self._numerical)) for n, axis in enumerate(self._numerical): width = abs(axis["data"][1] - axis["data"][0]) unit_space[n] = [x * width for x in unit_space[n]] unit_space[n] = [x + axis["data"][0] for x in unit_space[n]] if axis["type"] is int: unit_space[n] = [int(round(x)) for x in unit_space[n]] axis_samples[axis["name"]] = unit_space[n] else: warnings.warn("No numerical axis defined, this warning can be ignored if searchspace is categorical only, otherwise check if axis was set!") for n in range(self._N_samples): sample = {} for name, data in axis_samples.items(): sample[name] = data[n] for cat in self._categorical: choice = np.random.choice(len(cat["data"]), 1)[0] sample[cat["name"]] = cat["data"][choice] self._samples.append(sample) def next(self): + """ + Returns the next sample. Returns None if all samples are requested. + + :return: [dict] sample dict {'name':value, ...} + """ if len(self._samples) == 0: self.generate_samples() if len(self._samples) == 0: return None next_index = np.random.choice(len(self._samples), 1)[0] sample = self._samples.pop(next_index) return sample class QuasiRandomsearchSolver(HyppopySolver): """ The QuasiRandomsearchSolver class implements a quasi randomsearch optimization. The quasi randomsearch supports - categorical, uniform, normal and loguniform sampling. The solver defines a grid which size and appearance depends - on the max_iterations parameter and the domain. The at each grid box a random value is drawn. This ensures both, - random parameter samples with the cosntraint that the space is evenly sampled and cluster building prevention.""" + categorical and uniform sampling. The solver defines a Halton Sequence distributed hyperparameter space. This + means a rather evenly distributed space sampling but no real randomness. + """ def __init__(self, project=None): HyppopySolver.__init__(self, project) self._sampler = None def define_interface(self): - self.add_member("max_iterations", int) - self.add_hyperparameter_signature(name="domain", dtype=str, + self._add_member("max_iterations", int) + self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "categorical"]) - self.add_hyperparameter_signature(name="data", dtype=list) - self.add_hyperparameter_signature(name="type", dtype=type) + self._add_hyperparameter_signature(name="data", dtype=list) + self._add_hyperparameter_signature(name="type", dtype=type) def loss_function_call(self, params): loss = self.blackbox(**params) if loss is None: return np.nan return loss def execute_solver(self, searchspace): N = self.max_iterations self._sampler = QuasiRandomSampleGenerator(N) for name, axis in searchspace.items(): self._sampler.set_axis(name, axis["data"], axis["domain"], axis["type"]) try: for n in range(N): params = self._sampler.next() if params is None: break self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): - """ - this function simply pipes the input parameter through, the sample - drawing functions are responsible for interpreting the parameter. - :param hyperparameter: [dict] hyperparameter space - :return: [dict] hyperparameter space - """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) return hyperparameter diff --git a/hyppopy/solvers/RandomsearchSolver.py b/hyppopy/solvers/RandomsearchSolver.py index d5aa50d..abbf85d 100644 --- a/hyppopy/solvers/RandomsearchSolver.py +++ b/hyppopy/solvers/RandomsearchSolver.py @@ -1,161 +1,172 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['RandomsearchSolver', + 'draw_uniform_sample', + 'draw_normal_sample', + 'draw_loguniform_sample', + 'draw_categorical_sample', + 'draw_sample'] + import os import copy import random import logging import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL from hyppopy.solvers.HyppopySolver import HyppopySolver LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def draw_uniform_sample(param): """ - function draws a random sample from a uniform range + Function draws a random sample from a uniform range + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ assert param['type'] is not str, "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" s = random.random() s *= np.abs(param['data'][1] - param['data'][0]) s += param['data'][0] if param['type'] is int: s = int(np.round(s)) if s < param['data'][0]: s = int(param['data'][0]) if s > param['data'][1]: s = int(param['data'][1]) return s def draw_normal_sample(param): """ - function draws a random sample from a normal distributed range + Function draws a random sample from a normal distributed range + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ assert param['type'] is not str, "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" mu = (param['data'][1] - param['data'][0]) / 2 sigma = mu / 3 s = np.random.normal(loc=param['data'][0] + mu, scale=sigma) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] s = float(s) if param["type"] is int: s = int(np.round(s)) return s def draw_loguniform_sample(param): """ - function draws a random sample from a logarithmic distributed range + Function draws a random sample from a logarithmic distributed range + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ assert param['type'] is not str, "cannot sample a string list!" assert param['data'][0] < param['data'][1], "precondition violation: data[0] > data[1]!" p = copy.deepcopy(param) p['data'][0] = np.log(param['data'][0]) p['data'][1] = np.log(param['data'][1]) assert p['data'][0] is not np.nan, "Precondition violation, left bound input error, results in nan!" assert p['data'][1] is not np.nan, "Precondition violation, right bound input error, results in nan!" x = draw_uniform_sample(p) s = np.exp(x) if s > param['data'][1]: s = param['data'][1] if s < param['data'][0]: s = param['data'][0] return s def draw_categorical_sample(param): """ - function draws a random sample from a categorical list + Function draws a random sample from a categorical list + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ return random.sample(param['data'], 1)[0] def draw_sample(param): """ - function draws a sample from the input hyperparameter descriptor depending on it's domain + Function draws a sample from the input hyperparameter descriptor depending on it's domain + :param param: [dict] input hyperparameter discription + :return: random sample value of type data['type'] """ assert isinstance(param, dict), "input error, hyperparam descriptors of type {} not allowed!".format(type(param)) if param['domain'] == "uniform": return draw_uniform_sample(param) elif param['domain'] == "normal": return draw_normal_sample(param) elif param['domain'] == "loguniform": return draw_loguniform_sample(param) elif param['domain'] == "categorical": return draw_categorical_sample(param) else: raise LookupError("Unknown domain {}".format(param['domain'])) class RandomsearchSolver(HyppopySolver): """ The RandomsearchSolver class implements a randomsearch optimization. The randomsearch supports categorical, uniform, normal and loguniform sampling. The solver draws an independent sample - from the parameter space each iteration.""" + from the parameter space each iteration. + """ def __init__(self, project=None): HyppopySolver.__init__(self, project) def define_interface(self): - self.add_member("max_iterations", int) - self.add_hyperparameter_signature(name="domain", dtype=str, + self._add_member("max_iterations", int) + self._add_hyperparameter_signature(name="domain", dtype=str, options=["uniform", "normal", "loguniform", "categorical"]) - self.add_hyperparameter_signature(name="data", dtype=list) - self.add_hyperparameter_signature(name="type", dtype=type) + self._add_hyperparameter_signature(name="data", dtype=list) + self._add_hyperparameter_signature(name="type", dtype=type) def loss_function_call(self, params): loss = self.blackbox(**params) if loss is None: return np.nan return loss def execute_solver(self, searchspace): N = self.max_iterations try: for n in range(N): params = {} for name, p in searchspace.items(): params[name] = draw_sample(p) self.loss_function(**params) except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) self.best = self._trials.argmin def convert_searchspace(self, hyperparameter): - """ - this function simply pipes the input parameter through, the sample - drawing functions are responsible for interpreting the parameter. - :param hyperparameter: [dict] hyperparameter space - :return: [dict] hyperparameter space - """ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter))) return hyperparameter diff --git a/hyppopy/solvers/__init__.py b/hyppopy/solvers/__init__.py index e69de29..72ec40a 100644 --- a/hyppopy/solvers/__init__.py +++ b/hyppopy/solvers/__init__.py @@ -0,0 +1,11 @@ +# Hyppopy - A Hyper-Parameter Optimization Toolbox +# +# Copyright (c) German Cancer Research Center, +# Division of Medical Image Computing. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE \ No newline at end of file diff --git a/hyppopy/tests/__init__.py b/hyppopy/tests/__init__.py index 8f5ccae..875003c 100644 --- a/hyppopy/tests/__init__.py +++ b/hyppopy/tests/__init__.py @@ -1,46 +1,58 @@ +# Hyppopy - A Hyper-Parameter Optimization Toolbox +# +# Copyright (c) German Cancer Research Center, +# Division of Medical Image Computing. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE + import os from hyppopy.globals import ROOT def create_readmesnippeds(): fname = os.path.join(ROOT, "README.md") f = open(fname, "r") codes = [] snipped = None for line in f.readlines(): if snipped is not None: snipped.append("\t\t{}".format(line)) if line.startswith("```"): if line.startswith("```python"): snipped = [] else: if snipped is not None: snipped.pop(-1) codes.append(snipped) snipped = None for n, snipped in enumerate(codes): f = open(os.path.join(ROOT, *("hyppopy", "tests", "test_snipped_{}.py".format(str(n).zfill(3)))), "w") test_code = "# DKFZ\n" test_code += "#\n" test_code += "#\n" test_code += "# Copyright (c) German Cancer Research Center,\n" test_code += "# Division of Medical Image Computing.\n" test_code += "# All rights reserved.\n" test_code += "#\n" test_code += "# This software is distributed WITHOUT ANY WARRANTY; without\n" test_code += "# even the implied warranty of MERCHANTABILITY or FITNESS FOR\n" test_code += "# A PARTICULAR PURPOSE.\n" test_code += "#\n" test_code += "# See LICENSE\n\n" test_code += "import os\n" test_code += "import unittest\n\n" test_code += "class ReadmeSnipped_{}TestSuite(unittest.TestCase):\n\n".format(str(n).zfill(3)) test_code += "\tdef test_scripts(self):\n\n" snipped.insert(0, test_code) f.writelines(snipped) f.close() create_readmesnippeds() diff --git a/hyppopy/tests/test_gridsearchsolver.py b/hyppopy/tests/test_gridsearchsolver.py index 86b309c..7f148e2 100644 --- a/hyppopy/tests/test_gridsearchsolver.py +++ b/hyppopy/tests/test_gridsearchsolver.py @@ -1,285 +1,284 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import unittest from hyppopy.solvers.GridsearchSolver import * -from hyppopy.VirtualFunction import VirtualFunction from hyppopy.HyppopyProject import HyppopyProject +from hyppopy.FunctionSimulator import FunctionSimulator class GridsearchTestSuite(unittest.TestCase): def setUp(self): pass def test_get_uniform_axis_sample(self): drange = [0, 10] N = 11 data = get_uniform_axis_sample(drange[0], drange[1], N, float) for i in range(11): self.assertEqual(float(i), data[i]) drange = [-10, 10] N = 21 data = get_uniform_axis_sample(drange[0], drange[1], N, int) self.assertEqual(data[0], -10) self.assertEqual(data[20], 10) self.assertEqual(data[10], 0) def test_get_norm_cdf(self): res = [0, 0.27337265, 0.4331928, 0.48777553, 0.4986501, 0.5013499, 0.51222447, 0.5668072, 0.72662735, 1] f = get_norm_cdf(10) for n, v in enumerate(res): self.assertAlmostEqual(v, f[n]) res = [0.0, 0.27337264762313174, 0.4331927987311419, 0.48777552734495533, 0.4986501019683699, 0.5, 0.5013498980316301, 0.5122244726550447, 0.5668072012688581, 0.7266273523768683, 1.0] f = get_norm_cdf(11) for n, v in enumerate(res): self.assertAlmostEqual(v, f[n]) def test_get_gaussian_axis_sampling(self): res = [-5.0, -2.2662735237686826, -0.6680720126885813, -0.12224472655044671, -0.013498980316301257, 0.013498980316301257, 0.12224472655044671, 0.6680720126885813, 2.2662735237686826, 5.0] bounds = (-5, 5) N = 10 data = get_gaussian_axis_sample(bounds[0], bounds[1], N, float) for n in range(N): self.assertAlmostEqual(res[n], data[n]) res = [-5.0, -2.2662735237686826, -0.6680720126885813, -0.12224472655044671, -0.013498980316301257, 0.0, 0.013498980316301257, 0.12224472655044671, 0.6680720126885813, 2.2662735237686826, 5.0] bounds = (-5, 5) N = 11 data = get_gaussian_axis_sample(bounds[0], bounds[1], N, float) for n in range(N): self.assertAlmostEqual(res[n], data[n]) def test_get_logarithmic_axis_sample(self): res = [0.0010000000000000002, 0.0035938136638046297, 0.012915496650148841, 0.046415888336127795, 0.1668100537200059, 0.5994842503189414, 2.154434690031884, 7.7426368268112675, 27.825594022071247, 100.00000000000004] bounds = (0.001, 1e2) N = 10 data = get_logarithmic_axis_sample(bounds[0], bounds[1], N, float) for n in range(N): self.assertAlmostEqual(res[n], data[n]) res = [0.0010000000000000002, 0.003162277660168382, 0.010000000000000004, 0.03162277660168381, 0.10000000000000006, 0.31622776601683833, 1.0000000000000009, 3.1622776601683813, 10.00000000000001, 31.622776601683846, 100.00000000000004] bounds = (0.001, 1e2) N = 11 data = get_logarithmic_axis_sample(bounds[0], bounds[1], N, float) for n in range(N): self.assertAlmostEqual(res[n], data[n]) def test_solver(self): config = { "hyperparameter": { "value 1": { "domain": "uniform", "data": [0, 20], "type": int, "frequency": 11 }, "value 2": { "domain": "normal", "data": [0, 20.0], "type": float, "frequency": 11 }, "value 3": { "domain": "loguniform", "data": [1, 10000], "type": float, "frequency": 11 }, "categorical": { "domain": "categorical", "data": ["a", "b"], "type": str, "frequency": 1 } }} res_labels = ['value 1', 'value 2', 'value 3', 'categorical'] res_values = [[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20], [0.0, 5.467452952462635, 8.663855974622837, 9.755510546899107, 9.973002039367397, 10.0, 10.026997960632603, 10.244489453100893, 11.336144025377163, 14.532547047537365, 20.0], [1.0, 2.51188643150958, 6.309573444801933, 15.848931924611136, 39.810717055349734, 100.00000000000004, 251.18864315095806, 630.9573444801938, 1584.8931924611143, 3981.071705534977, 10000.00000000001], ['a', 'b']] solver = GridsearchSolver(config) searchspace = solver.convert_searchspace(config["hyperparameter"]) for n in range(len(res_labels)): self.assertEqual(res_labels[n], searchspace[0][n]) for i in range(3): self.assertAlmostEqual(res_values[i], searchspace[1][i]) self.assertEqual(res_values[3], searchspace[1][3]) def test_solver_uniform(self): config = { "hyperparameter": { "axis_00": { "domain": "uniform", "data": [0, 800], "type": float, "frequency": 11 }, "axis_01": { "domain": "uniform", "data": [-1, 1], "type": float, "frequency": 11 }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": float, "frequency": 11 } }} project = HyppopyProject(config) solver = GridsearchSolver(project) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertAlmostEqual(best['axis_00'], 240, places=1) self.assertAlmostEqual(best['axis_01'], 0.2, places=1) self.assertAlmostEqual(best['axis_02'], 5.0, places=1) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) def test_solver_normal(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [100, 300], "type": float, "frequency": 11 }, "axis_01": { "domain": "normal", "data": [0, 0.8], "type": float, "frequency": 11 }, "axis_02": { "domain": "normal", "data": [4, 6], "type": float, "frequency": 11 } }} project = HyppopyProject(config) solver = GridsearchSolver(project) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertAlmostEqual(best['axis_00'], 197.555, places=1) self.assertAlmostEqual(best['axis_01'], 0.21869, places=1) self.assertAlmostEqual(best['axis_02'], 5.13361, places=1) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) def test_solver_loguniform(self): config = { "hyperparameter": { "axis_00": { "domain": "loguniform", "data": [0.00001, 300], "type": float, "frequency": 21 }, "axis_01": { "domain": "loguniform", "data": [0.00001, 0.8], "type": float, "frequency": 21 }, "axis_02": { "domain": "loguniform", "data": [4, 6], "type": float, "frequency": 21 } }} project = HyppopyProject(config) solver = GridsearchSolver(project) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertAlmostEqual(best['axis_00'], 299.999, places=1) self.assertAlmostEqual(best['axis_01'], 0.25869, places=1) self.assertAlmostEqual(best['axis_02'], 5.10169, places=1) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_hyperoptsolver.py b/hyppopy/tests/test_hyperoptsolver.py index 4a968eb..8552450 100644 --- a/hyppopy/tests/test_hyperoptsolver.py +++ b/hyppopy/tests/test_hyperoptsolver.py @@ -1,105 +1,103 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import unittest -import matplotlib.pylab as plt from hyppopy.solvers.HyperoptSolver import * -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.HyppopyProject import HyppopyProject class HyperoptSolverTestSuite(unittest.TestCase): def setUp(self): pass def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "uniform", "data": [300, 700], "type": float }, "axis_01": { "domain": "uniform", "data": [0, 0.8], "type": float }, "axis_02": { "domain": "uniform", "data": [3.5, 6.5], "type": float } }, "max_iterations": 500 } project = HyppopyProject(config) solver = HyperoptSolver(project) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(575 <= best['axis_00'] <= 585) self.assertTrue(0.1 <= best['axis_01'] <= 0.8) self.assertTrue(4.7 <= best['axis_02'] <= 5.3) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) def test_solver_normal(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [500, 650], "type": float }, "axis_01": { "domain": "normal", "data": [0.1, 0.8], "type": float }, "axis_02": { "domain": "normal", "data": [4.5, 5.5], "type": float } }, "max_iterations": 500, } project = HyppopyProject(config) solver = HyperoptSolver(project) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(575 <= best['axis_00'] <= 585) self.assertTrue(0.1 <= best['axis_01'] <= 0.8) self.assertTrue(4.7 <= best['axis_02'] <= 5.3) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_hyppopyproject.py b/hyppopy/tests/test_hyppopyproject.py index 39ad65d..04477e4 100644 --- a/hyppopy/tests/test_hyppopyproject.py +++ b/hyppopy/tests/test_hyppopyproject.py @@ -1,83 +1,82 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import unittest from hyppopy.HyppopyProject import HyppopyProject def foo(a, b): return a + b class HyppopyProjectTestSuite(unittest.TestCase): def setUp(self): pass def test_project_creation(self): config = { "hyperparameter": { "C": { "domain": "uniform", "data": [0.0001, 20], "type": float }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str } }, "max_iterations": 300, "param1": 1, "param2": 2, "function": foo } project = HyppopyProject() project.set_config(config) self.assertEqual(project.hyperparameter["C"]["domain"], "uniform") self.assertEqual(project.hyperparameter["C"]["data"], [0.0001, 20]) self.assertTrue(project.hyperparameter["C"]["type"] is float) self.assertEqual(project.hyperparameter["kernel"]["domain"], "categorical") self.assertEqual(project.hyperparameter["kernel"]["data"], ["linear", "sigmoid", "poly", "rbf"]) self.assertTrue(project.hyperparameter["kernel"]["type"] is str) self.assertEqual(project.max_iterations, 300) self.assertEqual(project.param1, 1) self.assertEqual(project.param2, 2) self.assertEqual(project.function(2, 3), 5) self.assertTrue(project.get_typeof("C") is float) self.assertTrue(project.get_typeof("kernel") is str) project = HyppopyProject() project.add_hyperparameter(name="C", domain="uniform", data=[0.0001, 20], type=float) project.add_hyperparameter(name="kernel", domain="categorical", data=["linear", "sigmoid", "poly", "rbf"], type=str) self.assertEqual(project.hyperparameter["C"]["domain"], "uniform") self.assertEqual(project.hyperparameter["C"]["data"], [0.0001, 20]) self.assertTrue(project.hyperparameter["C"]["type"] is float) self.assertEqual(project.hyperparameter["kernel"]["domain"], "categorical") self.assertEqual(project.hyperparameter["kernel"]["data"], ["linear", "sigmoid", "poly", "rbf"]) self.assertTrue(project.hyperparameter["kernel"]["type"] is str) project.set_settings(max_iterations=500) self.assertEqual(project.max_iterations, 500) project.add_setting("my_param", 42) self.assertEqual(project.my_param, 42) project.add_setting("max_iterations", 200) self.assertEqual(project.max_iterations, 200) diff --git a/hyppopy/tests/test_optunasolver.py b/hyppopy/tests/test_optunasolver.py index 9c185cf..0622bd2 100644 --- a/hyppopy/tests/test_optunasolver.py +++ b/hyppopy/tests/test_optunasolver.py @@ -1,67 +1,65 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import unittest -import matplotlib.pylab as plt from hyppopy.solvers.OptunaSolver import * -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.HyppopyProject import HyppopyProject class OptunaSolverTestSuite(unittest.TestCase): def setUp(self): pass def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "uniform", "data": [300, 800], "type": float }, "axis_01": { "domain": "uniform", "data": [-1, 1], "type": float }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": float } }, "max_iterations": 100 } project = HyppopyProject(config) solver = OptunaSolver(project) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(300 <= best['axis_00'] <= 800) self.assertTrue(-1 <= best['axis_01'] <= 1) self.assertTrue(0 <= best['axis_02'] <= 10) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_optunitysolver.py b/hyppopy/tests/test_optunitysolver.py index 683cd9b..4707bd6 100644 --- a/hyppopy/tests/test_optunitysolver.py +++ b/hyppopy/tests/test_optunitysolver.py @@ -1,67 +1,65 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import unittest -import matplotlib.pylab as plt from hyppopy.solvers.OptunitySolver import * -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.HyppopyProject import HyppopyProject class OptunitySolverTestSuite(unittest.TestCase): def setUp(self): pass def test_solver_complete(self): config = { "hyperparameter": { "axis_00": { "domain": "uniform", "data": [300, 800], "type": float }, "axis_01": { "domain": "uniform", "data": [-1, 1], "type": float }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": float } }, "max_iterations": 100 } project = HyppopyProject(config) solver = OptunitySolver(project) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(300 <= best['axis_00'] <= 800) self.assertTrue(-1 <= best['axis_01'] <= 1) self.assertTrue(0 <= best['axis_02'] <= 10) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_quasirandomsearchsolver.py b/hyppopy/tests/test_quasirandomsearchsolver.py index e571a92..703247b 100644 --- a/hyppopy/tests/test_quasirandomsearchsolver.py +++ b/hyppopy/tests/test_quasirandomsearchsolver.py @@ -1,67 +1,65 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import unittest -import matplotlib.pylab as plt from hyppopy.solvers.QuasiRandomsearchSolver import * -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.HyppopyProject import HyppopyProject class QuasiRandomsearchTestSuite(unittest.TestCase): def setUp(self): pass def test_solver_uniform(self): config = { "hyperparameter": { "axis_00": { "domain": "uniform", "data": [0, 800], "type": float }, "axis_01": { "domain": "uniform", "data": [-1, 1], "type": float }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": float } }, "max_iterations": 300 } project = HyppopyProject(config) solver = QuasiRandomsearchSolver(project) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(0 <= best['axis_00'] <= 800) self.assertTrue(-1 <= best['axis_01'] <= 1) self.assertTrue(0 <= best['axis_02'] <= 10) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_randomsearchsolver.py b/hyppopy/tests/test_randomsearchsolver.py index 10a7117..0be138c 100644 --- a/hyppopy/tests/test_randomsearchsolver.py +++ b/hyppopy/tests/test_randomsearchsolver.py @@ -1,165 +1,165 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import unittest +import numpy as np import matplotlib.pylab as plt from hyppopy.solvers.RandomsearchSolver import * -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.HyppopyProject import HyppopyProject class RandomsearchTestSuite(unittest.TestCase): def setUp(self): pass def test_draw_uniform_sample(self): param = {"data": [0, 1, 10], "type": float} values = [] for i in range(10000): values.append(draw_uniform_sample(param)) self.assertTrue(0 <= values[-1] <= 1) self.assertTrue(isinstance(values[-1], float)) hist = plt.hist(values, bins=10, normed=True) std = np.std(hist[0]) mean = np.mean(hist[0]) self.assertTrue(std < 0.05) self.assertTrue(0.9 < mean < 1.1) param = {"data": [0, 10, 11], "type": int} values = [] for i in range(10000): values.append(draw_uniform_sample(param)) self.assertTrue(0 <= values[-1] <= 10) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=11, normed=True) std = np.std(hist[0]) mean = np.mean(hist[0]) self.assertTrue(std < 0.05) self.assertTrue(0.09 < mean < 0.11) def test_draw_normal_sample(self): param = {"data": [0, 10, 11], "type": int} values = [] for i in range(10000): values.append(draw_normal_sample(param)) self.assertTrue(0 <= values[-1] <= 10) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=11, normed=True) for i in range(1, 5): self.assertTrue(hist[0][i-1]-hist[0][i] < 0) for i in range(5, 10): self.assertTrue(hist[0][i] - hist[0][i+1] > 0) def test_draw_loguniform_sample(self): param = {"data": [1, 1000, 11], "type": float} values = [] for i in range(10000): values.append(draw_loguniform_sample(param)) self.assertTrue(1 <= values[-1] <= 1000) self.assertTrue(isinstance(values[-1], float)) hist = plt.hist(values, bins=11, normed=True) for i in range(4): self.assertTrue(hist[0][i] > hist[0][i+1]) self.assertTrue((hist[0][i] - hist[0][i+1]) > 0) def test_draw_categorical_sample(self): param = {"data": [1, 2, 3], "type": int} values = [] for i in range(10000): values.append(draw_categorical_sample(param)) self.assertTrue(values[-1] == 1 or values[-1] == 2 or values[-1] == 3) self.assertTrue(isinstance(values[-1], int)) hist = plt.hist(values, bins=3, normed=True) for i in range(3): self.assertTrue(0.45 < hist[0][i] < 0.55) def test_solver_uniform(self): config = { "hyperparameter": { "axis_00": { "domain": "uniform", "data": [0, 800], "type": float }, "axis_01": { "domain": "uniform", "data": [-1, 1], "type": float }, "axis_02": { "domain": "uniform", "data": [0, 10], "type": float } }, "max_iterations": 300 } project = HyppopyProject(config) solver = RandomsearchSolver(project) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(0 <= best['axis_00'] <= 800) self.assertTrue(-1 <= best['axis_01'] <= 1) self.assertTrue(0 <= best['axis_02'] <= 10) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) def test_solver_normal(self): config = { "hyperparameter": { "axis_00": { "domain": "normal", "data": [500, 650], "type": float }, "axis_01": { "domain": "normal", "data": [0, 1], "type": float }, "axis_02": { "domain": "normal", "data": [4, 5], "type": float } }, "max_iterations": 500, } solver = RandomsearchSolver(config) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default() solver.blackbox = vfunc solver.run(print_stats=False) df, best = solver.get_results() self.assertTrue(500 <= best['axis_00'] <= 650) self.assertTrue(0 <= best['axis_01'] <= 1) self.assertTrue(4 <= best['axis_02'] <= 5) for status in df['status']: self.assertTrue(status) for loss in df['losses']: self.assertTrue(isinstance(loss, float)) if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_virtualfunction.py b/hyppopy/tests/test_virtualfunction.py index 9481111..d4dbc38 100644 --- a/hyppopy/tests/test_virtualfunction.py +++ b/hyppopy/tests/test_virtualfunction.py @@ -1,94 +1,93 @@ -# DKFZ -# +# Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import unittest import numpy as np -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.globals import TESTDATA_DIR -class VirtualFunctionTestSuite(unittest.TestCase): +class FunctionSimulatorTestSuite(unittest.TestCase): def setUp(self): pass def test_imagereading(self): - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_images(os.path.join(TESTDATA_DIR, 'functionsimulator')) self.assertTrue(isinstance(vfunc.data, np.ndarray)) self.assertEqual(vfunc.data.shape[0], 5) self.assertEqual(vfunc.data.shape[1], 512) gt = [0.83984375*5, 0.44140625*20-10, 0.25390625*20, 0.81640625*8-10, 0.67578125*2+2] for i in range(5): self.assertAlmostEqual(vfunc.data[i][0], gt[i]) gt = [[0, 1], [-10, 10], [0, 20], [-30, 5], [5, 10]] for i in range(5): self.assertEqual(vfunc.axis[i][0], gt[i][0]) self.assertEqual(vfunc.axis[i][1], gt[i][1]) def test_data_adding(self): gt = [[-10, 10], [-30, 5]] - vfunc = VirtualFunction() + vfunc = FunctionSimulator() dim0 = np.arange(0, 1.1, 0.1) dim1 = np.arange(1.0, -0.1, -0.1) vfunc.add_dimension(dim0, gt[0]) self.assertEqual(len(vfunc.data.shape), 2) self.assertEqual(vfunc.data.shape[0], 1) self.assertEqual(vfunc.data.shape[1], 11) vfunc.add_dimension(dim1, gt[1]) self.assertEqual(vfunc.data.shape[0], 2) self.assertEqual(vfunc.data.shape[1], 11) for n in range(11): self.assertAlmostEqual(dim0[n], vfunc.data[0, n]) self.assertAlmostEqual(dim1[n], vfunc.data[1, n]) for i in range(2): self.assertEqual(vfunc.axis[i][0], gt[i][0]) self.assertEqual(vfunc.axis[i][1], gt[i][1]) def test_minima(self): - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_images(os.path.join(TESTDATA_DIR, 'functionsimulator')) minima = vfunc.minima() gt = [[[0.7265625], 0.48828125], [[-4.0234375], -7.890625], [[2.265625], 0.859375], [ [-17.421875, -17.353515625, -17.28515625, -17.216796875, -17.1484375, -17.080078125, -17.01171875, -16.943359375, -16.875, -16.806640625, -16.73828125, -16.669921875, -16.6015625, -16.533203125, -16.46484375, -16.396484375, -16.328125, -16.259765625, -16.19140625, -16.123046875, -16.0546875, -15.986328125, -15.91796875, -15.849609375, -15.78125, -15.712890625, -15.64453125, -15.576171875, -15.5078125, -15.439453125, -15.37109375, -15.302734375, -15.234375, -15.166015625, -15.09765625, -15.029296875, -14.9609375, -14.892578125, -14.82421875, -14.755859375, -14.6875, -14.619140625, -14.55078125, -14.482421875, -14.4140625, -14.345703125, -14.27734375, -14.208984375, -14.140625, -14.072265625, -14.00390625, -13.935546875, -13.8671875, -13.798828125, -13.73046875, -13.662109375, -13.59375, -13.525390625, -13.45703125, -13.388671875, -13.3203125, -13.251953125, -13.18359375, -13.115234375, -13.046875, -12.978515625, -12.91015625, -12.841796875, -12.7734375, -12.705078125, -12.63671875, -12.568359375, -12.5, -12.431640625, -12.36328125, -12.294921875, -12.2265625, -12.158203125, -12.08984375, -12.021484375, -11.953125, -11.884765625, -11.81640625, -11.748046875, -11.6796875, -11.611328125, -11.54296875, -11.474609375, -11.40625, -11.337890625, -11.26953125, -11.201171875, -11.1328125, -11.064453125, -10.99609375, -10.927734375, -10.859375, -10.791015625, -10.72265625, -10.654296875, -10.5859375, -10.517578125, -10.44921875, -10.380859375, -10.3125, -10.244140625, -10.17578125, -10.107421875, -10.0390625, -9.970703125, -9.90234375, -9.833984375, -9.765625, -9.697265625, -9.62890625, -9.560546875, -9.4921875, -9.423828125, -9.35546875, -9.287109375, -9.21875, -9.150390625, -9.08203125, -9.013671875, -8.9453125, -8.876953125, -8.80859375, -8.740234375, -8.671875, -8.603515625, -8.53515625, -8.466796875, -8.3984375, -8.330078125, -8.26171875, -8.193359375, -8.125, -8.056640625, -7.98828125, -7.919921875, -7.8515625, -7.783203125, -7.71484375, -7.646484375, -7.578125, -7.509765625, -7.44140625, -7.373046875, -7.3046875, -7.236328125, -7.16796875, -7.099609375, -7.03125], -9.125], [[5.44921875, 5.458984375, 5.46875, 5.478515625, 5.48828125, 5.498046875, 5.5078125, 5.517578125, 5.52734375], 2.09375]] self.assertAlmostEqual(minima, gt) if __name__ == '__main__': unittest.main() diff --git a/requirements.txt b/requirements.txt index 2311210..2c5bae8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,12 @@ -bayesian-optimization>=1.0.1 hyperopt>=0.1.2 matplotlib>=3.0.3 numpy>=1.16.2 optuna>=0.9.0 Optunity>=1.1.1 pandas>=0.24.2 pytest>=4.3.1 scikit-learn>=0.20.3 scipy>=1.2.1 visdom>=0.1.8.8 +xmlrunner>=1.7.7 +Sphinx>=1.8.3 \ No newline at end of file diff --git a/resources/hyppopy.vpp b/resources/hyppopy.vpp new file mode 100644 index 0000000..3da44d0 Binary files /dev/null and b/resources/hyppopy.vpp differ