diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..0780ab3
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,3 @@
+[run]
+source=hyppopy
+omit=hyppopy/tests/*
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 784180b..507df4e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,108 +1,111 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# latex
*.aux
*.bbl
*.blg
*.log
*.tcp
# solver_comparison
examples/solver_comparison/gfx/data_I
examples/solver_comparison/gfx/data_II
examples/solver_comparison/gfx/data_III
*.vpp.lck
.pytest_cache/
*.vpp.bak_*
python_tests_xml
+doc/CHANGELOG.md
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
.idea/
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
# Sphinx documentation
-docs/_build/
-doc/
+doc/_build/
+doc/LICENSE.rst
+doc/README.md
+
# PyBuilder
target/
#Ipython Notebook
.ipynb_checkpoints
#Pycharm files
*.iml
# merging stuff
*.orig
*~
# Paths in repository
mcml.py
# images etc
*.tif
*.nrrd
*.caffemodel
# C++ stuff
build*
*.user
hyppopy/tests/test_snipped_000.py
hyppopy/tests/test_snipped_001.py
hyppopy/tests/test_snipped_002.py
hyppopy/tests/test_snipped_003.py
hyppopy/tests/test_snipped_004.py
hyppopy/tests/test_snipped_005.py
hyppopy/tests/test_snipped_006.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d245baa..5093a4e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,16 +1,18 @@
+# Changelog
+
Release 0.5.0.0
- settings structure changed, additional settings now can be addded as additional entries in the config dict or using the methods add_setting or set_settings
- sections solver and custom in config dict are removed completely
- use_solver setting in config dict is renamed to solver
- hyperparameter type now a native type, not a string anymore
- automatic consistency check between config and solver conditions, each solver defines now it's interface which is checked when executing the solver throwing exceptions if the project instance and the solvers interface doesn't work together
- bayesOpt solver removed, extremely slow and not very good
Release 0.4.2.0
New feature QuasiRandomSolver added. The QuasiRandomSolver provides a randomized gridsampling. This means that depending
on max_iterations a grid over all numerical parameter is spanned and each cell is populated with a random value within the
the cell bounds for numerical and a random draw for each categorical parameter. This ensures a random sampling of the
parameter space and a good space coverage without random cluster building. The solver also supports normal and
loguniform sampling.
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index 1422535..bc8ee80 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,38 +1,39 @@
-=======================================================================
+License
+=======
+
Copyright (c) 2019 German Cancer Research Center,
Division of Medical Image Computing
All rights reserved.
Redistribution and use in source and binary forms, with or
without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the
following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other
materials provided with the distribution.
* Neither the name of the German Cancer Research Center,
nor the names of its contributors may be used to endorse
or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-=======================================================================
diff --git a/README.md b/README.md
index 1e4e59b..9dcfc33 100644
--- a/README.md
+++ b/README.md
@@ -1,380 +1,384 @@
![docs_title_logo](./resources/docs_title_logo.png)
# A Hyper-Parameter Optimization Toolbox
+## Project Status
+[![Documentation Status](https://readthedocs.org/projects/hyppopy/badge/?version=latest)](https://hyppopy.readthedocs.io/en/latest/?badge=latest)
+[![codecov](https://codecov.io/gh/mic-dkfz/hyppopy/branch/master/graph/badge.svg)](https://codecov.io/gh/mic-dkfz/hyppopy)
+
## What is Hyppopy?
Hyppopy is a python toolbox for blackbox optimization. It's purpose is to offer a unified and easy to use interface to a collection of solver libraries. Currently provided solvers are:
* [Hyperopt](http://hyperopt.github.io/hyperopt/)
* [Optunity](https://optunity.readthedocs.io/en/latest/user/index.html)
* [Optuna](https://optuna.org/)
* Quasi-Randomsearch Solver
* Randomsearch Solver
* Gridsearch Solver
[See a solver analysis here: https://github.com/MIC-DKFZ/Hyppopy/blob/master/examples/solver_comparison/HyppopyReport.pdf]
## Installation
1. clone the [Hyppopy](http:\\github.com) project from Github
2. (create a virtual environment), open a console (with your activated virtual env) and go to the hyppopy root folder
3. ```$ pip install -r requirements.txt```
4. ```$ python setup.py install``` (for normal usage) or ```$ python setup.py develop``` (if you want to join the hyppopy development *hooray*)
## How to use Hyppopy?
#### The Hyperparamaterspace
Hyppopy defines a common hyperparameterspace description, whatever solver is used. A hyperparameter description includes the following fields:
* domain: the domain defines how the solver samples the parameter space, options are:
* uniform: samples the data range [a,b] evenly, whereas b>a
* normal: samples the data range [a,b] using a normal distribution with mu=a+(b-a)/2, sigma=(b-a)/6, whereas b>a
* loguniform: samples the data range [a,b] logarithmic using e^x by sampling the exponent range x=[log(a), log(b)] uniformly, whereas a>0 and b>a
* categorical: is used to define a data list
* data: in case of categorical domain data is a list, all other domains expect a range [a, b]
* type: the parameter data type as string 'int', 'float' or 'str'
An exeption must be kept in mind when using the GridsearchSolver. The gridsearch additionally needs a number of samples per domain, which must be set using the field: frequency.
#### The HyppopyProject class
The HyppopyProject class takes care all settings necessary for the solver and your workflow. To setup a HyppopyProject instance we can use a nested dictionary or the classes memberfunctions respectively.
```python
# Import the HyppopyProject class
from hyppopy.HyppopyProject import HyppopyProject
# Create a nested dict with a section hyperparameter. We define a 2 dimensional
# hyperparameter space with a numerical dimension named myNumber of type float and
# a uniform sampling. The second dimension is a categorical parameter of type string.
config = {
"hyperparameter": {
"myNumber": {
"domain": "uniform",
"data": [0, 100],
"type": float
},
"myOption": {
"domain": "categorical",
"data": ["a", "b", "c"],
"type": str
}
}}
# Create a HyppopyProject instance and pass the config dict to
# the constructor. Alternatively one can use set_config method.
project = HyppopyProject(config=config)
# We can also add hyperparameter using the add_hyperparameter method
project = HyppopyProject()
project.add_hyperparameter(name="myNumber", domain="uniform", data=[0, 100], dtype=float)
project.add_hyperparameter(name="myOption", domain="categorical", data=["a", "b", "c"], dtype=str)
```
Additional settings for the solver or custom parameters can be set either as additional entries in the config dict, or via the methods set_settings or add_setting:
```python
from hyppopy.HyppopyProject import HyppopyProject
config = {
"hyperparameter": {
"myNumber": {
"domain": "uniform",
"data": [0, 100],
"type": float
},
"myOption": {
"domain": "categorical",
"data": ["a", "b", "c"],
"type": str
}
},
"max_iterations": 500,
"anything_you_want": 42
}
project = HyppopyProject(config=config)
print("max_iterations:", project.max_iterations)
print("anything_you_want:", project.anything_you_want)
#alternatively
project = HyppopyProject()
project.set_settings(max_iterations=500, anything_you_want=42)
print("anything_you_want:", project.anything_you_want)
#alternatively
project = HyppopyProject()
project.add_setting(name="max_iterations", value=500)
project.add_setting(name="anything_you_want", value=42)
print("anything_you_want:", project.anything_you_want)
```
#### The HyppopySolver classes
Each solver is a child of the HyppopySolver class. This is only interesting if you're planning to write a new solver, we will discuss this in the section Solver Development. All solvers we can use to optimize our blackbox function are part of the module 'hyppopy.solver'. Below is a list of all solvers available along with their access key in squared brackets.
* HyperoptSolver [hyperopt]
_Bayes Optimization use Tree-Parzen Estimator, supports uniform, normal, loguniform and categorical parameter_
* OptunitySolver [optunity]
_Particle Swarm Optimizer, supports uniform and categorical parameter_
* OptunaSolver [optuna]
_Bayes Optimization, supports uniform, and categorical parameter_
* RandomsearchSolver [randomsearch]
_Naive randomized parameter search, supports uniform, normal, loguniform and categorical parameter_
* QuasiRandomsearchSolver [quasirandomsearch]
_Randomized grid ensuring random sample drawing and a good space coverage, supports uniform, normal, loguniform and categorical parameter_
* GridsearchSolver [gridsearch]
_Standard gridsearch, supports uniform, normal, loguniform and categorical parameter_
There are two options to get a solver, we can import directly from the hyppopy.solvers package or we use the SolverPool class. We look into both options by optimizing a simple function, starting with the direct import case.
```python
# Import the HyppopyProject class
from hyppopy.HyppopyProject import HyppopyProject
# Import the HyperoptSolver class, in this case wh use Hyperopt
from hyppopy.solvers.HyperoptSolver import HyperoptSolver
# Our function to optimize
def my_loss_func(x, y):
return x**2+y**2
# Creating a HyppopyProject instance
project = HyppopyProject()
project.add_hyperparameter(name="x", domain="uniform", data=[-10, 10], type=float)
project.add_hyperparameter(name="y", domain="uniform", data=[-10, 10], type=float)
project.add_setting(name="max_iterations", value=300)
# create a solver instance
solver = HyperoptSolver(project)
# pass the loss function to the solver
solver.blackbox = my_loss_func
# run the solver
solver.run()
df, best = solver.get_results()
print("\n")
print("*"*100)
print("Best Parameter Set:\n{}".format(best))
print("*"*100)
```
The SolverPool is a class keeping track of all solver classes. We have several options to ask the SolverPool for the desired solver. We can add a setting called solver to our config or to the project instance respectively, or we can use the solver access key (see solver listing above) to ask for the solver directly.
```python
# import the SolverPool class
from hyppopy.SolverPool import SolverPool
# Import the HyppopyProject class
from hyppopy.HyppopyProject import HyppopyProject
# Our function to optimize
def my_loss_func(x, y):
return x**2+y**2
# Creating a HyppopyProject instance
project = HyppopyProject()
project.add_hyperparameter(name="x", domain="uniform", data=[-10, 10], type=float)
project.add_hyperparameter(name="y", domain="uniform", data=[-10, 10], type=float)
project.set_settings(max_iterations=300, solver="hyperopt")
# create a solver instance. The SolverPool class is a singleton
# and can be used without instanciating. It looks in the project
# instance for the use_solver option and returns the correct solver.
solver = SolverPool.get(project=project)
# Another option without the usage of the solver field would be:
# solver = SolverPool.get(solver_name='hyperopt', project=project)
# pass the loss function to the solver
solver.blackbox = my_loss_func
# run the solver
solver.run()
df, best = solver.get_results()
print("\n")
print("*"*100)
print("Best Parameter Set:\n{}".format(best))
print("*"*100)
```
#### The BlackboxFunction class
To extend the possibilities beyond using parameter only loss functions as in the examples above, we can use the BlackboxFunction class. This class is a wrapper class around the actual loss_function providing a more advanced access interface to data handling and a callback_function for accessing the solvers iteration loop.
```python
# import the HyppopyProject class keeping track of inputs
from hyppopy.HyppopyProject import HyppopyProject
# import the SolverPool singleton class
from hyppopy.SolverPool import SolverPool
# import the Blackboxfunction class wrapping your problem for Hyppopy
from hyppopy.BlackboxFunction import BlackboxFunction
# Create the HyppopyProject class instance
project = HyppopyProject()
project.add_hyperparameter(name="C", domain="uniform", data=[0.0001, 20], type=float)
project.add_hyperparameter(name="gamma", domain="uniform", data=[0.0001, 20], type=float)
project.add_hyperparameter(name="kernel", domain="categorical", data=["linear", "sigmoid", "poly", "rbf"], type=str)
project.add_setting(name="max_iterations", value=500)
project.add_setting(name="solver", value="optunity")
# The BlackboxFunction signature is as follows:
# BlackboxFunction(blackbox_func=None,
# dataloader_func=None,
# preprocess_func=None,
# callback_func=None,
# data=None,
# **kwargs)
#
# - blackbox_func: a function pointer to the users loss function
# - dataloader_func: a function pointer for handling dataloading. The function is called once before
# optimizing. What it returns is passed as first argument to your loss functions
# data argument.
# - preprocess_func: a function pointer for data preprocessing. The function is called once before
# optimizing and gets via kwargs['data'] the raw data object set directly or returned
# from dataloader_func. What this function returns is then what is passed as first
# argument to your loss function.
# - callback_func: a function pointer called after each iteration. The input kwargs is a dictionary
# keeping the parameters used in this iteration, the 'iteration' index, the 'loss'
# and the 'status'. The function in this example is used for realtime printing it's
# input but can also be used for realtime visualization.
# - data: if not done via dataloader_func one can set a raw_data object directly
# - kwargs: dict that whose content is passed to all functions above.
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
def my_dataloader_function(**kwargs):
print("Dataloading...")
# kwargs['params'] allows accessing additional parameter passed,
# see below my_preproc_param, my_dataloader_input.
print("my loading argument: {}".format(kwargs['params']['my_dataloader_input']))
iris_data = load_iris()
return [iris_data.data, iris_data.target]
def my_preprocess_function(**kwargs):
print("Preprocessing...")
# kwargs['data'] allows accessing the input data
print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape)
# kwargs['params'] allows accessing additional parameter passed,
# see below my_preproc_param, my_dataloader_input.
print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n")
# if the preprocessing function returns something,
# the input data will be replaced with the data returned by this function.
x = kwargs['data'][0]
y = kwargs['data'][1]
for i in range(x.shape[0]):
x[i, :] += kwargs['params']['my_preproc_param']
return [x, y]
def my_callback_function(**kwargs):
print("\r{}".format(kwargs), end="")
def my_loss_function(data, params):
clf = SVC(**params)
return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean()
# We now create the BlackboxFunction object and pass all function pointers defined above,
# as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes.
blackbox = BlackboxFunction(blackbox_func=my_loss_function,
dataloader_func=my_dataloader_function,
preprocess_func=my_preprocess_function,
callback_func=my_callback_function,
my_preproc_param=1,
my_dataloader_input='could/be/a/path')
# Get the solver
solver = SolverPool.get(project=project)
# Give the solver your blackbox
solver.blackbox = blackbox
# Run the solver
solver.run()
# Get your results
df, best = solver.get_results()
print("\n")
print("*"*100)
print("Best Parameter Set:\n{}".format(best))
print("*"*100)
```
#### The Parameter Space Domains
Each hyperparameter needs a range and a domain specifier. The range, specified via 'data', is the left and right bound of an interval (exception is the domain 'categorical', here 'data' is the actual list of data elements) and the domain specifier the way this interval is sampled. Currently supported domains are:
* uniform (samples the interval [a,b] evenly)
* normal* (a gaussian sampling of the interval [a,b] such that mu=a+(b-a)/2 and sigma=(b-a)/6)
* loguniform* (a logaritmic sampling of the iterval [a,b], such that the exponent e^x is sampled evenly x=[log(a),log(b)])
* categorical (in this case data is not interpreted as interval but as actual list of objects)
*Not all domains are supported by all solvers, this might be fixed in the future, but until, the solver throws an error telling you that the domain is unknown.
When using the GridsearchSolver we need to specifiy an interval and a number of samples using a frequency specifier. The max_iterations parameter is obsolet in this case, because each axis specifies an individual number of samples via frequency. This applies only to numerical space domains, categorical space domains need a frequency value of 1.
```python
# import the SolverPool class
from hyppopy.solvers.GridsearchSolver import GridsearchSolver
# Import the HyppopyProject class
from hyppopy.HyppopyProject import HyppopyProject
# Our function to optimize
def my_loss_func(x, y):
return x**2+y**2
# Creating a HyppopyProject instance
project = HyppopyProject()
project.add_hyperparameter(name="x", domain="uniform", data=[-1.1, 1], frequency=10, type=float)
project.add_hyperparameter(name="y", domain="uniform", data=[-1.1, 1], frequency=12, type=float)
solver = GridsearchSolver(project=project)
# pass the loss function to the solver
solver.blackbox = my_loss_func
# run the solver
solver.run()
df, best = solver.get_results()
print("\n")
print("*"*100)
print("Best Parameter Set:\n{}".format(best))
print("*"*100)
```
#### Using a Visdom Server to Visualize the Optimization Process
We can simply create a realtime visualization using a visdom server. If installed, start your visdom server via console command:
```
>visdom
```
Go to your browser and open the site: http://localhost:8097
To enable the visualization call the function 'start_viewer' before running the solver:
```
#enable visualization
solver.start_viewer()
# Run the solver
solver.run()
```
You can also change the port and the server name in start_viewer(port=8097, server="http://localhost")
## Acknowledgements:
_This work is supported by the [Helmholtz Association Initiative and Networking](https://www.helmholtz.de/en/about_us/the_association/initiating_and_networking/) Fund under project number ZT-I-0003._
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 0000000..298ea9e
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,19 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
diff --git a/hyppopy/solvers/__init__.py b/doc/__init__.py
similarity index 100%
copy from hyppopy/solvers/__init__.py
copy to doc/__init__.py
diff --git a/doc/_static/class_diagram.png b/doc/_static/class_diagram.png
new file mode 100644
index 0000000..a9d762a
Binary files /dev/null and b/doc/_static/class_diagram.png differ
diff --git a/doc/api.rst b/doc/api.rst
new file mode 100644
index 0000000..6d5d790
--- /dev/null
+++ b/doc/api.rst
@@ -0,0 +1,77 @@
+***********
+Hyppopy API
+***********
+
+Main Classes
+############
+
+HyppopyProject
+**************
+.. automodule:: hyppopy.HyppopyProject
+ :members:
+
+HyppopySolver
+*************
+.. automodule:: hyppopy.solvers.HyppopySolver
+ :members:
+
+BlackboxFunction
+****************
+.. automodule:: hyppopy.BlackboxFunction
+ :members:
+
+SolverPool
+**********
+.. automodule:: hyppopy.SolverPool
+ :members:
+
+Solver Classes
+##############
+
+HyperoptSolver
+**************
+.. automodule:: hyppopy.solvers.HyperoptSolver
+ :members:
+
+OptunitySolver
+**************
+.. automodule:: hyppopy.solvers.OptunitySolver
+ :members:
+
+OptunaSolver
+**************
+.. automodule:: hyppopy.solvers.OptunaSolver
+ :members:
+
+RandomsearchSolver
+******************
+.. automodule:: hyppopy.solvers.RandomsearchSolver
+ :members:
+
+QuasiRandomsearchSolver
+***********************
+.. automodule:: hyppopy.solvers.QuasiRandomsearchSolver
+ :members:
+
+RandomsearchSolver
+******************
+.. automodule:: hyppopy.solvers.RandomsearchSolver
+ :members:
+
+Helpers
+#######
+
+VisdomViewer
+************
+.. automodule:: hyppopy.VisdomViewer
+ :members:
+
+FunctionSimulator
+*****************
+.. automodule:: hyppopy.FunctionSimulator
+ :members:
+
+Singleton
+*********
+.. automodule:: hyppopy.Singleton
+ :members:
\ No newline at end of file
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100644
index 0000000..1eda279
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,223 @@
+# -*- coding: utf-8 -*-
+#
+# Configuration file for the Sphinx documentation builder.
+#
+# This file does only contain a selection of the most common options. For a
+# full list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+
+import os
+import sys
+from shutil import copyfile
+
+ROOT = os.path.abspath('../')
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(0, ROOT)
+
+README_PATH_SRC = os.path.join(ROOT, "README.md")
+README_PATH_DST = os.path.join(ROOT, *("doc", "README.md"))
+print("copy", README_PATH_SRC, "to", README_PATH_DST)
+try:
+ copyfile(README_PATH_SRC, README_PATH_DST)
+except:
+ print("Missing README.md file in subdir!")
+
+LICENSE_PATH_SRC = os.path.join(ROOT, "LICENSE")
+LICENSE_PATH_DST = os.path.join(ROOT, *("doc", "LICENSE.rst"))
+print("copy", LICENSE_PATH_SRC, "to", LICENSE_PATH_DST)
+try:
+ copyfile(LICENSE_PATH_SRC, LICENSE_PATH_DST)
+except:
+ print("Missing LICENSE file in subdir!")
+
+CHANGELOG_PATH_SRC = os.path.join(ROOT, "CHANGELOG.md")
+CHANGELOG_PATH_DST = os.path.join(ROOT, *("doc", "CHANGELOG.md"))
+print("copy", CHANGELOG_PATH_SRC, "to", CHANGELOG_PATH_DST)
+try:
+ copyfile(CHANGELOG_PATH_SRC, CHANGELOG_PATH_DST)
+except:
+ print("Missing CHANGELOG.md file in subdir!")
+
+# -- Project information -----------------------------------------------------
+
+project = 'Hyppopy'
+copyright = '2019, DKFZ'
+author = 'S. Wanner'
+
+# The short X.Y version
+version = '0.5'
+# The full version, including alpha/beta/rc tags
+release = '0.5.0'
+
+
+# -- General configuration ---------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+#extensions = [
+ # 'sphinx.ext.autodoc',
+# 'recommonmark',
+# 'autoapi.extension',
+# 'sphinx.ext.napoleon'
+#]
+
+extensions = ['recommonmark',
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.coverage',
+ 'sphinx.ext.napoleon',
+ #'autoapi.extension',
+ 'sphinx.ext.inheritance_diagram']
+
+#autoapi_type = 'python'
+#autoapi_dirs = [ROOT, '']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'doc', 'tests']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = None
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'alabaster'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# The default sidebars (for documents that don't match any pattern) are
+# defined by theme itself. Builtin themes are using these templates by
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
+# 'searchbox.html']``.
+#
+# html_sidebars = {}
+
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Hyppopydoc'
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements = {
+ # The paper size ('letterpaper' or 'a4paper').
+ #
+ # 'papersize': 'letterpaper',
+
+ # The font size ('10pt', '11pt' or '12pt').
+ #
+ # 'pointsize': '10pt',
+
+ # Additional stuff for the LaTeX preamble.
+ #
+ # 'preamble': '',
+
+ # Latex figure (float) alignment
+ #
+ # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ (master_doc, 'Hyppopy.tex', 'Hyppopy Documentation',
+ 'S. Wanner', 'manual'),
+]
+
+
+# -- Options for manual page output ------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ (master_doc, 'hyppopy', 'Hyppopy Documentation',
+ [author], 1)
+]
+
+
+# -- Options for Texinfo output ----------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ (master_doc, 'Hyppopy', 'Hyppopy Documentation',
+ author, 'Hyppopy', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+
+# -- Options for Epub output -------------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = project
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#
+# epub_identifier = ''
+
+# A unique identification for the text.
+#
+# epub_uid = ''
+
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+
+
+# -- Extension configuration -------------------------------------------------
+
+# -- Options for todo extension ----------------------------------------------
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = True
diff --git a/doc/developer_guide.rst b/doc/developer_guide.rst
new file mode 100644
index 0000000..af19150
--- /dev/null
+++ b/doc/developer_guide.rst
@@ -0,0 +1,169 @@
+****************
+Developers Guide
+****************
+
+The main classes and their connections
+**************************************
+
+The picture below depicts the releationships between the most important classes of hyppopy.
+
+.. image:: _static/class_diagram.png
+
+To understand the concept behind Hyppopy the following classes are important:
+ - :py:mod:`hyppopy.solvers.HyppopySolver`
+ - :py:mod:`hyppopy.HyppopyProject`
+ - :py:mod:`hyppopy.BlackboxFunction`
+
+
+The :py:mod:`hyppopy.solvers.HyppopySolver` class is the parent class of all solvers in Hyppopy. It defines
+an abstract interface that needs to be implemented by each custom solver class. The main idea is to
+define a common interface for the different approaches the solver libraries are based on. When designing
+Hyppopy there were three main challenges that drove the design. Each solver library has a different
+approach to define or describe the hyperparameter space, has a different approach to track the solver
+information and is different in setting the blackbox function and running the optimization process. To
+deal with those differences the :py:mod:`hyppopy.solvers.HyppopySolver` class defines the abstract interface
+functions `convert_searchspace`, `execute_solver`, `loss_function_call` and `define_interface`. Those serve as
+abstraction layer to handle the individual needs of each solver library.
+
+Each solver needs a :py:mod:`hyppopy.HyppopyProject` instance keeping the user configuration input and a
+:py:mod:`hyppopy.BlackboxFunction` instance, implementing the loss function.
+
+Implementing a custom solver
+****************************
+
+Adding a new solver is only about deriving a new class from :py:mod:`hyppopy.solvers.HyppopySolver` as well as
+telling the :py:mod:`hyppopy.SolverPool` that it exists. We go through the whole process on the example of the
+solver :py:mod:`hyppopy.solvers.OptunitySolver`:
+
+.. code-block:: python
+
+ import os
+ import optunity
+ from pprint import pformat
+
+
+ from hyppopy.solvers.HyppopySolver import HyppopySolver
+
+
+ class OptunitySolver(HyppopySolver):
+
+ def __init__(self, project=None):
+ HyppopySolver.__init__(self, project)
+
+First step is to derive from the HyppopySolver class. Good practice would be that the project can be set via __init__
+and if, is piped through to the HyppopySolver.__init__. Next step is implementing the abstract interface methods.
+We start with define_interface. This functions purpose is to define the relevant input parameter and the signature
+of the hyperparameter space. Our solver needs an parameter called max_iterations of type int. The hyperparameter
+space has a domain that allows values 'uniform' and 'categorical', a field data of type list and a field type of type
+type. This guarantees that exceptions are thrown if the user disrespects this signature or forgets to set max_iterations.
+
+.. code-block:: python
+
+ def define_interface(self):
+ self._add_member("max_iterations", int)
+ self._add_hyperparameter_signature(name="domain", dtype=str,
+ options=["uniform", "categorical"])
+ self._add_hyperparameter_signature(name="data", dtype=list)
+ self._add_hyperparameter_signature(name="type", dtype=type)
+
+
+Next abstract method to implement is convert_searchspace. This method is responsible for interpreting the users hyperparameter
+input and convert it to a form the solver framework needs. An input for example can be:
+
+.. code-block:: python
+
+ hyperparameter = {
+ 'C': {'domain': 'uniform', 'data': [0.0001, 20], 'type': float},
+ 'gamma': {'domain': 'uniform', 'data': [0.0001, 20.0], 'type': float},
+ 'kernel': {'domain': 'categorical', 'data': ['linear', 'sigmoid', 'poly', 'rbf'], 'type': str},
+ 'decision_function_shape': {'domain': 'categorical', 'data': ['ovo', 'ovr'], 'type': str'}
+ }
+
+
+Optunity instead expects a hyperparameter space formulation as follows:
+
+.. code-block:: python
+
+ optunity_space = {'decision_function_shape':
+ {'ovo': {
+ 'kernel': {
+ 'linear': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]},
+ 'sigmoid': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]},
+ 'poly': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]},
+ 'rbf': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}}
+ },
+ 'ovr': {
+ 'kernel': {
+ 'linear': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]},
+ 'sigmoid': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]},
+ 'poly': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]},
+ 'rbf': {'C': [0.0001, 20], 'gamma': [0.0001, 20.0]}}
+ }
+ }}
+
+This conversion is what convert_searchspace is meant for.
+
+.. code-block:: python
+
+ def convert_searchspace(self, hyperparameter):
+ LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(hyperparameter)))
+ # split input in categorical and non-categorical data
+ cat, uni = self.split_categorical(hyperparameter)
+ # build up dictionary keeping all non-categorical data
+ uniforms = {}
+ for key, value in uni.items():
+ for key2, value2 in value.items():
+ if key2 == 'data':
+ if len(value2) == 3:
+ uniforms[key] = value2[0:2]
+ elif len(value2) == 2:
+ uniforms[key] = value2
+ else:
+ raise AssertionError("precondition violation, optunity searchspace needs list with left and right range bounds!")
+
+ if len(cat) == 0:
+ return uniforms
+ # build nested categorical structure
+ inner_level = uniforms
+ for key, value in cat.items():
+ tmp = {}
+ optunity_space = {}
+ for key2, value2 in value.items():
+ if key2 == 'data':
+ for elem in value2:
+ tmp[elem] = inner_level
+ optunity_space[key] = tmp
+ inner_level = optunity_space
+ return optunity_space
+
+
+Now we have defined how the solver looks from outside and how to convert the parameterspace coming in, we can define how the blackbox function
+is called. The abstract method loss_function_call is a wrapper function enabling to customize the call of the blackbox function. In case of Optunity
+we only check if a parameter is of type int and convert it to ensure that no exception are thrown in case of integers are expected in the blackbox.
+
+.. code-block:: python
+
+ def loss_function_call(self, params):
+ for key in params.keys():
+ if self.project.get_typeof(key) is int:
+ params[key] = int(round(params[key]))
+ return self.blackbox(**params)
+
+
+In execute_solver the actual wrapping of the solver framework call is done. Here call the Optunity optimizing function. A dictionary keeping the optimal
+parameter set must assigned to self.best.
+
+
+.. code-block:: python
+
+ def execute_solver(self, searchspace):
+ LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(searchspace)))
+ try:
+ self.best, _, _ = optunity.minimize_structured(f=self.loss_function,
+ num_evals=self.max_iterations,
+ search_space=searchspace)
+ except Exception as e:
+ LOG.error("internal error in optunity.minimize_structured occured. {}".format(e))
+ raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e))
+
+
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 0000000..92e6a66
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,19 @@
+Welcome to Hyppopy's documentation!
+===================================
+
+.. toctree::
+ :maxdepth: 3
+ :caption: Contents:
+
+ README
+ api
+ developer_guide
+ CHANGELOG
+ LICENSE
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
\ No newline at end of file
diff --git a/doc/make.bat b/doc/make.bat
new file mode 100644
index 0000000..7893348
--- /dev/null
+++ b/doc/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/examples/solver_comparison.py b/examples/solver_comparison.py
index f4743b0..8a09b41 100644
--- a/examples/solver_comparison.py
+++ b/examples/solver_comparison.py
@@ -1,364 +1,369 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical Image Computing.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE
import os
import sys
import time
import pickle
import numpy as np
from math import pi
import matplotlib.pyplot as plt
from hyppopy.SolverPool import SolverPool
from hyppopy.HyppopyProject import HyppopyProject
-from hyppopy.VirtualFunction import VirtualFunction
+from hyppopy.FunctionSimulator import FunctionSimulator
from hyppopy.BlackboxFunction import BlackboxFunction
-#OUTPUTDIR = "C:\\Users\\s635r\\Desktop\\solver_comparison"
-OUTPUTDIR = "D:\\Projects\\Python\\hyppopy\\examples\\solver_comparison\\gfx"
+OUTPUTDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), *("solver_comparison", "gfx")))
+# The solvers to be evaluated
SOLVER = []
-
SOLVER.append("quasirandomsearch")
SOLVER.append("randomsearch")
SOLVER.append("hyperopt")
SOLVER.append("optunity")
SOLVER.append("optuna")
+# number of iterations to be tested
ITERATIONS = []
ITERATIONS.append(15)
ITERATIONS.append(50)
ITERATIONS.append(300)
ITERATIONS.append(1000)
+# number of repetitions for each solver and iteration the results
+# plottet are the mean and std dev of these independent trials
STATREPEATS = 50
+# evaluations are stored using pickle, if OVERWRITE is True these
+# are ignored and overwritten each time, set to False when only the
+# plottings need to be re-evaluated
OVERWRITE = False
def compute_deviation(solver_name, vfunc_id, iterations, N, fname):
project = HyppopyProject()
project.add_hyperparameter(name="axis_00", domain="uniform", data=[0, 1], type=float)
project.add_hyperparameter(name="axis_01", domain="uniform", data=[0, 1], type=float)
project.add_hyperparameter(name="axis_02", domain="uniform", data=[0, 1], type=float)
project.add_hyperparameter(name="axis_03", domain="uniform", data=[0, 1], type=float)
project.add_hyperparameter(name="axis_04", domain="uniform", data=[0, 1], type=float)
- vfunc = VirtualFunction()
+ vfunc = FunctionSimulator()
vfunc.load_default(vfunc_id)
minima = vfunc.minima()
def my_loss_function(data, params):
return vfunc(**params)
blackbox = BlackboxFunction(data=[], blackbox_func=my_loss_function)
results = {}
results["gt"] = []
for mini in minima:
results["gt"].append(np.median(mini[0]))
for iter in iterations:
results[iter] = {"minima": {},
"distance": {},
"duration": None,
"set_difference": None,
"loss": None,
"loss_history": {}}
for i in range(vfunc.dims()):
results[iter]["minima"]["axis_0{}".format(i)] = []
results[iter]["distance"]["axis_0{}".format(i)] = []
project.add_setting("max_iterations", iter)
project.add_setting("solver", solver_name)
solver = SolverPool.get(project=project)
solver.blackbox = blackbox
axis_minima = []
best_losses = []
best_sets_diff = []
for i in range(vfunc.dims()):
axis_minima.append([])
loss_history = []
durations = []
for n in range(N):
print("\rSolver={} iteration={} round={}".format(solver, iter, n), end="")
start = time.time()
solver.run(print_stats=False)
end = time.time()
durations.append(end-start)
df, best = solver.get_results()
loss_history.append(np.flip(np.sort(df['losses'].values)))
best_row = df['losses'].idxmin()
best_losses.append(df['losses'][best_row])
best_sets_diff.append(abs(df['axis_00'][best_row] - best['axis_00'])+
abs(df['axis_01'][best_row] - best['axis_01'])+
abs(df['axis_02'][best_row] - best['axis_02'])+
abs(df['axis_03'][best_row] - best['axis_03'])+
abs(df['axis_04'][best_row] - best['axis_04']))
for i in range(vfunc.dims()):
tmp = df['axis_0{}'.format(i)][best_row]
axis_minima[i].append(tmp)
results[iter]["loss_history"] = loss_history
for i in range(vfunc.dims()):
results[iter]["minima"]["axis_0{}".format(i)] = [np.mean(axis_minima[i]), np.std(axis_minima[i])]
dist = np.sqrt((axis_minima[i]-results["gt"][i])**2)
results[iter]["distance"]["axis_0{}".format(i)] = [np.mean(dist), np.std(dist)]
results[iter]["loss"] = [np.mean(best_losses), np.std(best_losses)]
results[iter]["set_difference"] = sum(best_sets_diff)
results[iter]["duration"] = np.mean(durations)
file = open(fname, 'wb')
pickle.dump(results, file)
file.close()
def make_radarplot(results, title, fname=None):
gt = results.pop("gt")
categories = list(results[list(results.keys())[0]]["minima"].keys())
N = len(categories)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]
ax = plt.subplot(1, 1, 1, polar=True, )
ax.set_theta_offset(pi / 2)
ax.set_theta_direction(-1)
plt.xticks(angles[:-1], categories, color='grey', size=8)
ax.set_rlabel_position(0)
plt.yticks([0.2, 0.4, 0.6, 0.8, 1.0], ["0.2", "0.4", "0.6", "0.8", "1.0"], color="grey", size=7)
plt.ylim(0, 1)
gt += gt[:1]
ax.fill(angles, gt, color=(0.2, 0.8, 0.2), alpha=0.2)
colors = []
cm = plt.get_cmap('Set1')
if len(results) > 2:
indices = list(range(0, len(results) + 1))
indices.pop(2)
else:
indices = list(range(0, len(results)))
for i in range(len(results)):
colors.append(cm(indices[i]))
for iter, data in results.items():
values = []
for i in range(len(categories)):
values.append(data["minima"]["axis_0{}".format(i)][0])
values += values[:1]
color = colors.pop(0)
ax.plot(angles, values, color=color, linewidth=2, linestyle='solid', label="iterations {}".format(iter))
plt.title(title, size=11, color=(0.1, 0.1, 0.1), y=1.1)
plt.legend(bbox_to_anchor=(0.08, 1.12))
if fname is None:
plt.show()
else:
plt.savefig(fname + ".png")
#plt.savefig(fname + ".svg")
plt.clf()
def make_errrorbars_plot(results, fname=None):
n_groups = len(results)
for iter in ITERATIONS:
means = []
stds = []
names = []
colors = []
axis = []
fig = plt.figure(figsize=(10, 8))
for solver_name, numbers in results.items():
names.append(solver_name)
means.append([])
stds.append([])
for axis_name, data in numbers[iter]["distance"].items():
means[-1].append(data[0])
stds[-1].append(data[1])
if len(axis) < 5:
axis.append(axis_name)
for c in range(len(names)):
colors.append(plt.cm.Set2(c/len(names)))
index = np.arange(len(axis))
bar_width = 0.14
opacity = 0.8
error_config = {'ecolor': '0.3'}
for k, name in enumerate(names):
plt.bar(index + k*bar_width, means[k], bar_width,
alpha=opacity,
color=colors[k],
yerr=stds[k],
error_kw=error_config,
label=name)
plt.xlabel('Axis')
plt.ylabel('Mean [+/- std]')
plt.title('Deviation per Axis and Solver for {} Iterations'.format(iter))
plt.xticks(index + 2*bar_width, axis)
plt.legend()
if fname is None:
plt.show()
else:
plt.savefig(fname + "_{}.png".format(iter))
#plt.savefig(fname + "_{}.svg".format(iter))
plt.clf()
def plot_loss_histories(results, fname=None):
colors = []
for c in range(len(SOLVER)):
colors.append(plt.cm.Set2(c / len(SOLVER)))
for iter in ITERATIONS:
fig = plt.figure(figsize=(10, 8))
added_solver = []
for n, solver_name in enumerate(results.keys()):
for history in results[solver_name][iter]["loss_history"]:
if solver_name not in added_solver:
plt.plot(history, color=colors[n], label=solver_name, alpha=0.5)
added_solver.append(solver_name)
else:
plt.plot(history, color=colors[n], alpha=0.5)
plt.legend()
plt.ylabel('Loss')
plt.xlabel('Iteration')
if fname is None:
plt.show()
else:
plt.savefig(fname + "_{}.png".format(iter))
plt.clf()
def print_durations(results, fname=None):
# colors = []
# for c in range(len(SOLVER)):
# colors.append(plt.cm.Set2(c / len(SOLVER)))
f = open(fname + ".txt", "w")
lines = ["iterations\t"+"\t".join(SOLVER)+"\n"]
for iter in ITERATIONS:
txt = str(iter) + "\t"
for solver_name in SOLVER:
duration = results[solver_name][iter]["duration"]
txt += str(duration) + "\t"
txt += "\n"
lines.append(txt)
f.writelines(lines)
f.close()
durations = {}
for iter in ITERATIONS:
for solver_name in SOLVER:
duration = results[solver_name][iter]["duration"]
if not solver_name in durations:
durations[solver_name] = duration/iter
else:
durations[solver_name] += duration/iter
for name in durations.keys():
durations[name] /= len(ITERATIONS)
fig, ax = plt.subplots(figsize=(14, 6))
# Example data
y_pos = np.arange(len(durations.keys()))
t = []
for solver in SOLVER:
t.append(durations[solver])
print(SOLVER)
print(t)
ax.barh(y_pos, t, align='center', color='green')
ax.set_yticks(y_pos)
ax.set_yticklabels(SOLVER)
ax.invert_yaxis()
ax.set_xscale('log')
ax.set_xlabel('Duration in [s]')
ax.set_title('Mean Solver Computation Time per Iteration')
if fname is None:
plt.show()
else:
plt.savefig(fname + ".png")
# plt.savefig(fname + "_{}.svg".format(iter))
plt.clf()
id2dirmapping = {"5D": "data_I", "5D2": "data_II", "5D3": "data_III"}
if __name__ == "__main__":
vfunc_ID = "5D"
if len(sys.argv) == 2:
vfunc_ID = sys.argv[1]
print("Start Evaluation on {}".format(vfunc_ID))
OUTPUTDIR = os.path.join(OUTPUTDIR, id2dirmapping[vfunc_ID])
if not os.path.isdir(OUTPUTDIR):
os.makedirs(OUTPUTDIR)
##################################################
############### create datasets ##################
fnames = []
for solver_name in SOLVER:
fname = os.path.join(OUTPUTDIR, solver_name)
fnames.append(fname)
if OVERWRITE or not os.path.isfile(fname):
compute_deviation(solver_name, vfunc_ID, ITERATIONS, N=STATREPEATS, fname=fname)
##################################################
##################################################
##################################################
############## create radarplots #################
all_results = {}
for solver_name, fname in zip(SOLVER, fnames):
file = open(fname, 'rb')
results = pickle.load(file)
file.close()
make_radarplot(results, solver_name, fname + "_deviation")
all_results[solver_name] = results
fname = os.path.join(OUTPUTDIR, "errorbars")
make_errrorbars_plot(all_results, fname)
fname = os.path.join(OUTPUTDIR, "losshistory")
plot_loss_histories(all_results, fname)
fname = os.path.join(OUTPUTDIR, "durations")
print_durations(all_results, fname)
for solver_name, iterations in all_results.items():
for iter, numbers in iterations.items():
if numbers["set_difference"] != 0:
print("solver {} has a different parameter set match in iteration {}".format(solver_name, iter))
##################################################
##################################################
plt.imsave(fname=os.path.join(OUTPUTDIR, "dummy.png"), arr=np.ones((800, 1000, 3), dtype=np.uint8)*255)
diff --git a/examples/tutorial_custom_visualization.py b/examples/tutorial_custom_visualization.py
index 1b624ab..c5da2de 100644
--- a/examples/tutorial_custom_visualization.py
+++ b/examples/tutorial_custom_visualization.py
@@ -1,105 +1,105 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical Image Computing.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE
import matplotlib.pylab as plt
from hyppopy.SolverPool import SolverPool
from hyppopy.HyppopyProject import HyppopyProject
-from hyppopy.VirtualFunction import VirtualFunction
+from hyppopy.FunctionSimulator import FunctionSimulator
from hyppopy.BlackboxFunction import BlackboxFunction
project = HyppopyProject()
project.add_hyperparameter(name="axis_00", domain="uniform", data=[0, 1], type=float)
project.add_hyperparameter(name="axis_01", domain="uniform", data=[0, 1], type=float)
project.add_hyperparameter(name="axis_02", domain="uniform", data=[0, 1], type=float)
project.add_hyperparameter(name="axis_03", domain="uniform", data=[0, 1], type=float)
project.add_hyperparameter(name="axis_04", domain="uniform", data=[0, 1], type=float)
project.add_setting("max_iterations", 500)
project.add_setting("solver", "randomsearch")
plt.ion()
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(12, 8), sharey=True)
plot_data = {"iterations": [],
"loss": [],
"axis_00": [],
"axis_01": [],
"axis_02": [],
"axis_03": [],
"axis_04": []}
def my_visualization_function(**kwargs):
print("\r{}".format(kwargs), end="")
plot_data["iterations"].append(kwargs['iterations'])
plot_data["loss"].append(kwargs['loss'])
plot_data["axis_00"].append(kwargs['axis_00'])
plot_data["axis_01"].append(kwargs['axis_01'])
plot_data["axis_02"].append(kwargs['axis_02'])
plot_data["axis_03"].append(kwargs['axis_03'])
plot_data["axis_04"].append(kwargs['axis_04'])
axes[0, 0].clear()
axes[0, 0].scatter(plot_data["axis_00"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.')
axes[0, 0].set_ylabel("loss")
axes[0, 0].set_xlabel("axis_00")
axes[0, 1].clear()
axes[0, 1].scatter(plot_data["axis_01"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.')
axes[0, 1].set_xlabel("axis_01")
axes[0, 2].clear()
axes[0, 2].scatter(plot_data["axis_02"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.')
axes[0, 2].set_xlabel("axis_02")
axes[1, 0].clear()
axes[1, 0].scatter(plot_data["axis_03"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.')
axes[1, 0].set_ylabel("loss")
axes[1, 0].set_xlabel("axis_03")
axes[1, 1].clear()
axes[1, 1].scatter(plot_data["axis_04"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.')
axes[1, 1].set_xlabel("axis_04")
axes[1, 2].clear()
axes[1, 2].plot(plot_data["iterations"], plot_data["loss"], "--", c=(0.8, 0.8, 0.8, 0.5))
axes[1, 2].scatter(plot_data["iterations"], plot_data["loss"], marker='.', c=(0.2, 0.2, 0.2))
axes[1, 2].set_xlabel("iterations")
plt.draw()
plt.tight_layout()
plt.pause(0.001)
def my_loss_function(data, params):
- vfunc = VirtualFunction()
+ vfunc = FunctionSimulator()
vfunc.load_default("5D")
return vfunc(**params)
blackbox = BlackboxFunction(data=[],
blackbox_func=my_loss_function,
callback_func=my_visualization_function)
solver = SolverPool.get(project=project)
solver.blackbox = blackbox
solver.run()
df, best = solver.get_results()
print("\n")
print("*" * 100)
print("Best Parameter Set:\n{}".format(best))
print("*" * 100)
print("")
save_plot = input("Save Plot? [y/n] ")
if save_plot == "y":
plt.savefig('plot_{}.png'.format(project.custom_use_solver))
diff --git a/examples/tutorial_multisolver.py b/examples/tutorial_multisolver.py
index c1e0d96..9a7fe52 100644
--- a/examples/tutorial_multisolver.py
+++ b/examples/tutorial_multisolver.py
@@ -1,183 +1,183 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical Image Computing.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE
# In this tutorial we solve an optimization problem using the Hyperopt Solver (http://hyperopt.github.io/hyperopt/).
# Hyperopt uses a Baysian - Tree Parzen Estimator - Optimization approach, which means that each iteration computes a
# new function value of the blackbox, interpolates a guess for the whole energy function and predicts a point to
# compute the next function value at. This next point is not necessarily a "better" value, it's only the value with
# the highest uncertainty for the function interpolation.
#
# See a visual explanation e.g. here (http://philipperemy.github.io/visualization/)
# import the HyppopyProject class keeping track of inputs
from hyppopy.HyppopyProject import HyppopyProject
# import the SolverPool singleton class
from hyppopy.SolverPool import SolverPool
# import the Blackboxfunction class wrapping your problem for Hyppopy
from hyppopy.BlackboxFunction import BlackboxFunction
# Next step is defining the problem space and all settings Hyppopy needs to optimize your problem.
# The config is a simple nested dictionary with two obligatory main sections, hyperparameter and settings.
# The hyperparameter section defines your searchspace. Each hyperparameter is again a dictionary with:
#
# - a domain ['categorical', 'uniform', 'normal', 'loguniform']
# - the domain data [left bound, right bound] and
# - a type of your domain ['str', 'int', 'float']
#
# The settings section has two subcategories, solver and custom. The first contains settings for the solver,
# here 'max_iterations' - is the maximum number of iteration.
#
# The custom section allows defining custom parameter. An entry here is transformed to a member variable of the
# HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script.
# Here we use it as a solver switch to control the usage of our solver via the config. This means with the script
# below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',...
# It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is
# ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so:
# 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals.
config = {
"hyperparameter": {
"C": {
"domain": "uniform",
"data": [0.0001, 20],
"type": float
},
"gamma": {
"domain": "uniform",
"data": [0.0001, 20.0],
"type": float
},
"kernel": {
"domain": "categorical",
"data": ["linear", "sigmoid", "poly", "rbf"],
"type": str
},
"decision_function_shape": {
"domain": "categorical",
"data": ["ovo", "ovr"],
"type": str
}
},
"max_iterations": 300,
-"solver": "quasirandomsearch"
+"solver": "hyperopt"
}
# When creating a HyppopyProject instance we
# pass the config dictionary to the constructor.
project = HyppopyProject(config=config)
# demonstration of the custom parameter access
print("-"*30)
print("max_iterations:\t{}".format(project.max_iterations))
print("solver chosen -> {}".format(project.solver))
print("-"*30)
# The BlackboxFunction signature is as follows:
# BlackboxFunction(blackbox_func=None,
# dataloader_func=None,
# preprocess_func=None,
# callback_func=None,
# data=None,
# **kwargs)
#
# - blackbox_func: a function pointer to the users loss function
# - dataloader_func: a function pointer for handling dataloading. The function is called once before
# optimizing. What it returns is passed as first argument to your loss functions
# data argument.
# - preprocess_func: a function pointer for data preprocessing. The function is called once before
# optimizing and gets via kwargs['data'] the raw data object set directly or returned
# from dataloader_func. What this function returns is then what is passed as first
# argument to your loss function.
# - callback_func: a function pointer called after each iteration. The input kwargs is a dictionary
# keeping the parameters used in this iteration, the 'iteration' index, the 'loss'
# and the 'status'. The function in this example is used for realtime printing it's
# input but can also be used for realtime visualization.
# - data: if not done via dataloader_func one can set a raw_data object directly
# - kwargs: dict that whose content is passed to all functions above.
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
def my_dataloader_function(**kwargs):
print("Dataloading...")
# kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input.
print("my loading argument: {}".format(kwargs['params']['my_dataloader_input']))
iris_data = load_iris()
return [iris_data.data, iris_data.target]
def my_preprocess_function(**kwargs):
print("Preprocessing...")
# kwargs['data'] allows accessing the input data
print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape)
# kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input.
print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n")
# if the preprocessing function returns something,
# the input data will be replaced with the data returned by this function.
x = kwargs['data'][0]
y = kwargs['data'][1]
for i in range(x.shape[0]):
x[i, :] += kwargs['params']['my_preproc_param']
return [x, y]
def my_callback_function(**kwargs):
print("\r{}".format(kwargs), end="")
def my_loss_function(data, params):
clf = SVC(**params)
return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean()
# We now create the BlackboxFunction object and pass all function pointers defined above,
# as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes.
blackbox = BlackboxFunction(blackbox_func=my_loss_function,
dataloader_func=my_dataloader_function,
preprocess_func=my_preprocess_function,
callback_func=my_callback_function,
my_preproc_param=1,
my_dataloader_input='could/be/a/path')
# Last step, is we use our SolverPool which automatically returns the correct solver.
# There are multiple ways to get the desired solver from the solver pool.
# 1. solver = SolverPool.get('hyperopt')
# solver.project = project
# 2. solver = SolverPool.get('hyperopt', project)
# 3. The SolverPool will look for the field 'use_solver' in the project instance, if
# it is present it will be used to specify the solver so that in this case it is enough
# to pass the project instance.
solver = SolverPool.get(project=project)
# Give the solver your blackbox and run it. After execution we can get the result
# via get_result() which returns a pandas dataframe containing the complete history
# The dict best contains the best parameter set.
solver.blackbox = blackbox
#solver.start_viewer()
solver.run()
df, best = solver.get_results()
print("\n")
print("*"*100)
print("Best Parameter Set:\n{}".format(best))
print("*"*100)
diff --git a/hyppopy/BlackboxFunction.py b/hyppopy/BlackboxFunction.py
index 1348d4f..32cce46 100644
--- a/hyppopy/BlackboxFunction.py
+++ b/hyppopy/BlackboxFunction.py
@@ -1,96 +1,135 @@
-# DKFZ
-#
+# Hyppopy - A Hyper-Parameter Optimization Toolbox
#
# Copyright (c) German Cancer Research Center,
# Division of Medical Image Computing.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE
+__all__ = ['BlackboxFunction']
+
import os
import logging
import functools
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
def default_kwargs(**defaultKwargs):
+ """
+ Decorator defining default args in **kwargs arguments
+ """
def actual_decorator(fn):
@functools.wraps(fn)
def g(*args, **kwargs):
defaultKwargs.update(kwargs)
return fn(*args, **defaultKwargs)
return g
return actual_decorator
class BlackboxFunction(object):
+ """
+ This class is a BlackboxFunction wrapper class encapsulating the loss function. Additional function pointer can be
+ set to get access at different pipelining steps:
+
+ - dataloader_func: data loading, the function must return a data object and is called first when the solver is executed.
+ The data object returned will be the input of the blackbox function.
+ - preprocess_func: data preprocessing is called after dataloader_func, the functions signature must be foo(data, params)
+ and must return a data object. The input is the data object set directly or via dataloader_func,
+ the params are passed from constructor params.
+ - callback_func: this function is called at each iteration step getting passed the trail info content, can be used for
+ custom visualization
+ - data: add a data object directly
+ """
@default_kwargs(blackbox_func=None, dataloader_func=None, preprocess_func=None, callback_func=None, data=None)
def __init__(self, **kwargs):
+ """
+ Constructor accepts function pointer or a data object which are all None by default. Additionally one can define
+ an arbitrary number of arg pairs. These are passed as input to each function pointer as arguments.
+
+ :param dataloader_func: data loading function pointer, default=None
+ :param preprocess_func: data preprocessing function pointer, default=None
+ :param callback_func: callback function pointer, default=None
+ :param data: data object, default=None
+ :param kwargs: additional arg=value pairs
+ """
self._blackbox_func = None
self._preprocess_func = None
self._dataloader_func = None
self._callback_func = None
self._raw_data = None
self._data = None
self.setup(kwargs)
def __call__(self, **kwargs):
+ """
+ Call method calls blackbox_func passing the data object and the args passed
+
+ :param kwargs: [dict] args
+
+ :return: blackbox_func(data, kwargs)
+ """
return self.blackbox_func(self.data, kwargs)
def setup(self, kwargs):
+ """
+ Alternative to Constructor, kwargs signature see __init__
+
+ :param kwargs: (see __init__)
+ """
self._blackbox_func = kwargs['blackbox_func']
self._preprocess_func = kwargs['preprocess_func']
self._dataloader_func = kwargs['dataloader_func']
self._callback_func = kwargs['callback_func']
self._raw_data = kwargs['data']
self._data = self._raw_data
del kwargs['blackbox_func']
del kwargs['preprocess_func']
del kwargs['dataloader_func']
del kwargs['data']
params = kwargs
if self.dataloader_func is not None:
self._raw_data = self.dataloader_func(params=params)
assert self._raw_data is not None, "Missing data exception!"
assert self.blackbox_func is not None, "Missing blackbox fucntion exception!"
if self.preprocess_func is not None:
result = self.preprocess_func(data=self._raw_data, params=params)
if result is not None:
self._data = result
else:
self._data = self._raw_data
else:
self._data = self._raw_data
@property
def blackbox_func(self):
return self._blackbox_func
@property
def preprocess_func(self):
return self._preprocess_func
@property
def dataloader_func(self):
return self._dataloader_func
@property
def callback_func(self):
return self._callback_func
@property
def raw_data(self):
return self._raw_data
@property
def data(self):
return self._data
diff --git a/hyppopy/VirtualFunction.py b/hyppopy/FunctionSimulator.py
similarity index 85%
rename from hyppopy/VirtualFunction.py
rename to hyppopy/FunctionSimulator.py
index b7af171..38174ff 100644
--- a/hyppopy/VirtualFunction.py
+++ b/hyppopy/FunctionSimulator.py
@@ -1,223 +1,240 @@
-# DKFZ
-#
+# Hyppopy - A Hyper-Parameter Optimization Toolbox
#
# Copyright (c) German Cancer Research Center,
# Division of Medical Image Computing.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE
########################################################################################################################
# USAGE
#
-# The class VirtualFunction is meant to be a virtual energy function with an arbitrary dimensionality. The user can
+# The class FunctionSimulator is meant to be a virtual energy function with an arbitrary dimensionality. The user can
# simply scribble functions as a binary image using e.g. Gimp, defining their ranges using .cfg file and loading them
-# into the VirtualFunction. An instance of the class can then be used like a normal function returning the sampling of
+# into the FunctionSimulator. An instance of the class can then be used like a normal function returning the sampling of
# each dimension loaded.
#
# 1. create binary images (IMPORTANT same shape for each), background black the function signature white, ensure that
# each column has a white pixel. If more than one pixel appears in a column, only the lowest will be used.
#
# 2. create a .cfg file, see an example in hyppopy/virtualparameterspace
#
-# 3. vfunc = VirtualFunction()
+# 3. vfunc = FunctionSimulator()
# vfunc.load_images(path/of/your/binaryfiles/and/the/configfile)
#
# 4. use vfunc like a normal function, if you loaded 4 dimension binary images use it like f = vfunc(a,b,c,d)
########################################################################################################################
+__all__ = ['FunctionSimulator']
+
import os
import sys
import numpy as np
import configparser
from glob import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
-from hyppopy.globals import VFUNCDATAPATH
+from hyppopy.globals import FUNCTIONSIMULATOR_DATAPATH
+
+class FunctionSimulator(object):
+ """
+ The FunctionSimulator class serves as simulation tool for solver testing and evaluation purposes. It's designed to
+ simulate an energy functional by setting axis data for each dimension via binary image files. The binary image files
+ are sampled and a range interval is read from a config file. The class implements __call__ to act like a blackbox function
+ when initialized.
-class VirtualFunction(object):
+ f=f(x1,x2,...,xn) [for n binary images and n range config files
+ as image input .png grayscale images are expected
+ as range config .cfg ascii files are expected containing
+ """
def __init__(self):
self.config = None
self.data = None
self.axis = []
def __call__(self, *args, **kwargs):
+ """
+ the call function expects the hyperparameter
+ :param args:
+ :param kwargs:
+ :return:
+ """
if len(kwargs) == self.dims():
args = [0]*len(kwargs)
for key, value in kwargs.items():
index = int(key.split("_")[1])
args[index] = value
assert len(args) == self.dims(), "wrong number of arguments!"
for i in range(len(args)):
assert self.axis[i][0] <= args[i] <= self.axis[i][1], "out of range access on axis {}!".format(i)
lpos, rpos, fracs = self.pos_to_indices(args)
fl = self.data[(list(range(self.dims())), lpos)]
fr = self.data[(list(range(self.dims())), rpos)]
return np.sum(fl*np.array(fracs) + fr*(1-np.array(fracs)))
def clear(self):
self.axis.clear()
self.data = None
self.config = None
def dims(self):
return self.data.shape[0]
def size(self):
return self.data.shape[1]
def range(self, dim):
return np.abs(self.axis[dim][1] - self.axis[dim][0])
def minima(self):
glob_mins = []
for dim in range(self.dims()):
x = []
fmin = np.min(self.data[dim, :])
for _x in range(self.size()):
if self.data[dim, _x] <= fmin:
x.append(_x/self.size()*(self.axis[dim][1]-self.axis[dim][0])+self.axis[dim][0])
glob_mins.append([x, fmin])
return glob_mins
def pos_to_indices(self, positions):
lpos = []
rpos = []
pfracs = []
for n in range(self.dims()):
pos = positions[n]
pos -= self.axis[n][0]
pos /= np.abs(self.axis[n][1]-self.axis[n][0])
pos *= self.data.shape[1]-1
lp = int(np.floor(pos))
if lp < 0:
lp = 0
rp = int(np.ceil(pos))
if rp > self.data.shape[1]-1:
rp = self.data.shape[1]-1
pfracs.append(1.0-(pos-np.floor(pos)))
lpos.append(lp)
rpos.append(rp)
return lpos, rpos, pfracs
def plot(self, dim=None, title=""):
if dim is None:
dim = list(range(self.dims()))
else:
dim = [dim]
fig = plt.figure(figsize=(10, 8))
for i in range(len(dim)):
width = np.abs(self.axis[dim[i]][1]-self.axis[dim[i]][0])
ax = np.arange(self.axis[dim[i]][0], self.axis[dim[i]][1], width/self.size())
plt.plot(ax, self.data[dim[i], :], '.', label='axis_{}'.format(str(dim[i]).zfill(2)))
plt.legend()
plt.grid()
plt.title(title)
plt.show()
def add_dimension(self, data, x_range):
if self.data is None:
self.data = data
if len(self.data.shape) == 1:
self.data = self.data.reshape((1, self.data.shape[0]))
else:
if len(data.shape) == 1:
data = data.reshape((1, data.shape[0]))
assert self.data.shape[1] == data.shape[1], "shape mismatch while adding dimension!"
dims = self.data.shape[0]
size = self.data.shape[1]
tmp = np.append(self.data, data)
self.data = tmp.reshape((dims+1, size))
self.axis.append(x_range)
def load_default(self, name="3D"):
- path = os.path.join(VFUNCDATAPATH, "{}".format(name))
+ path = os.path.join(FUNCTIONSIMULATOR_DATAPATH, "{}".format(name))
if os.path.exists(path):
self.load_images(path)
else:
- raise FileExistsError("No virtualfunction of dimension {} available".format(name))
+ raise FileExistsError("No FunctionSimulator of dimension {} available".format(name))
def load_images(self, path):
self.config = None
self.data = None
self.axis.clear()
img_fnames = []
for f in glob(path + os.sep + "*"):
if f.endswith(".png"):
img_fnames.append(f)
elif f.endswith(".cfg"):
self.config = self.read_config(f)
else:
print("WARNING: files of type {} not supported, the file {} is ignored!".format(f.split(".")[-1],
os.path.basename(f)))
if self.config is None:
print("Aborted, failed to read configfile!")
sys.exit()
sections = self.config.sections()
if len(sections) != len(img_fnames):
print("Aborted, inconsistent number of image tmplates and axis specifications!")
sys.exit()
img_fnames.sort()
size_x = None
size_y = None
for n, fname in enumerate(img_fnames):
img = mpimg.imread(fname)
if len(img.shape) > 2:
img = img[:, :, 0]
if size_x is None:
size_x = img.shape[1]
if size_y is None:
size_y = img.shape[0]
self.data = np.zeros((len(img_fnames), size_x), dtype=np.float32)
assert img.shape[0] == size_y, "Shape mismatch in dimension y {} is not {}".format(img.shape[0], size_y)
assert img.shape[1] == size_x, "Shape mismatch in dimension x {} is not {}".format(img.shape[1], size_x)
self.sample_image(img, n)
def sample_image(self, img, dim):
sec_name = "axis_{}".format(str(dim).zfill(2))
assert sec_name in self.config.sections(), "config section {} not found!".format(sec_name)
settings = self.get_axis_settings(sec_name)
self.axis.append([float(settings['min_x']), float(settings['max_x'])])
y_range = [float(settings['min_y']), float(settings['max_y'])]
for x in range(img.shape[1]):
candidates = np.where(img[:, x] > 0)
assert len(candidates[0]) > 0, "non function value in image detected, ensure each column has at least one value > 0!"
y_pos = candidates[0][0]/img.shape[0]
self.data[dim, x] = 1-y_pos
self.data[dim, :] *= np.abs(y_range[1] - y_range[0])
self.data[dim, :] += y_range[0]
def read_config(self, fname):
try:
config = configparser.ConfigParser()
config.read(fname)
return config
except Exception as e:
print(e)
return None
def get_axis_settings(self, section):
dict1 = {}
options = self.config.options(section)
for option in options:
try:
dict1[option] = self.config.get(section, option)
if dict1[option] == -1:
print("skip: %s" % option)
except:
print("exception on %s!" % option)
dict1[option] = None
return dict1
diff --git a/hyppopy/HyppopyProject.py b/hyppopy/HyppopyProject.py
index 7bfdc93..719f386 100644
--- a/hyppopy/HyppopyProject.py
+++ b/hyppopy/HyppopyProject.py
@@ -1,77 +1,147 @@
-# DKFZ
-#
+# Hyppopy - A Hyper-Parameter Optimization Toolbox
#
# Copyright (c) German Cancer Research Center,
# Division of Medical Image Computing.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE
-import copy
+__all__ = ['HyppopyProject']
+
+import copy
from hyppopy.globals import *
+
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
class HyppopyProject(object):
+ """
+ The HyppopyProject class takes care of the optimization settings. An instance can be configured using a config
+ dictionary or by using the hyperparameter and settings methods. In case of initializing via dicts those can be
+ passed to the constructor or by using the set_config method. After initialization a HyppopyProject instance is
+ passed to a solver class which internally checks for consistency with it's needs. The class distinguished
+ between two categories, hyperparameter and general settings.
+
+ The hyperparameter are a dictionary structure as follows and can be accessed via hyperparameter
+ {'param_name: {'domain': 'uniform', ...}, ...}
+
+ General settings are internally converted to class attributes and can accessed directly or via settings
+
+ An example config could look like:
+ config = {'hyperparameter': {'myparam': {'domain': 'uniform', 'data': [0, 100], 'type': float}, ...},
+ 'my_setting_1': 3.1415,
+ 'my_setting_2': 'hello world'}
+ project = HyppopyProject(config)
+
+ The same can be achieved using:
+ project = HyppopyProject()
+ project.add_hyperparameter(name='myparam', domain='uniform', data=[0, 100], type=float})
+ project.add_setting('my_setting_1', 3.1415)
+ project.add_setting('my_setting_2', 'hello world')
+ """
def __init__(self, config=None):
+ """
+ Constructor
+
+ :param config: [dict] config dictionary of the form {'hyperparameter': {...}, ...}
+ """
self._data = {HYPERPARAMETERPATH: {}, SETTINGSPATH: {}}
if config is not None:
self.set_config(config)
+ def __parse_members(self):
+ """
+ The function converts settings into class attributes
+ """
+ for name, value in self.settings.items():
+ if name not in self.__dict__.keys():
+ setattr(self, name, value)
+ else:
+ self.__dict__[name] = value
+
def set_config(self, config):
+ """
+ Set a config dict
+
+ :param config: [dict] configuration dict defining hyperparameter and general settings
+ """
assert isinstance(config, dict), "precondition violation, config needs to be of type dict, got {}".format(type(config))
confic_cp = copy.deepcopy(config)
if HYPERPARAMETERPATH in confic_cp.keys():
self._data[HYPERPARAMETERPATH] = confic_cp[HYPERPARAMETERPATH]
del confic_cp[HYPERPARAMETERPATH]
self._data[SETTINGSPATH] = confic_cp
- self.parse_members()
+ self.__parse_members()
def set_hyperparameter(self, params):
+ """
+ This function can be used to set the hyperparameter description directly by passing the hyperparameter section
+ of a config dict (see class description). Alternatively use add_hyperparameter to add one after each other.
+
+ :param params: [dict] configuration dict defining hyperparameter
+ """
assert isinstance(params, dict), "precondition violation, params needs to be of type dict, got {}".format(type(params))
self._data[HYPERPARAMETERPATH] = params
- def set_settings(self, **kwargs):
- self._data[SETTINGSPATH] = kwargs
- self.parse_members()
-
def add_hyperparameter(self, name, **kwargs):
+ """
+ This function can be used to set hyperparameter descriptions. Alternatively use set_hyperparameter to set all at
+ once.
+
+ :param name: [str] hyperparameter name
+ :param kwargs: [dict] configuration dict defining a hyperparameter e.g. domain='uniform', data=[1,100], ...
+ """
assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name))
self._data[HYPERPARAMETERPATH][name] = kwargs
+ def set_settings(self, **kwargs):
+ """
+ This function can be used to set the general settings directly by passing the settings as name=value pairs.
+ Alternatively use add_setting to add one after each other.
+
+ :param kwargs: [dict] settings dict e.g. my_setting_1=3.1415, my_setting_2='hello world', ...
+ """
+ self._data[SETTINGSPATH] = kwargs
+ self.__parse_members()
+
def add_setting(self, name, value):
+ """
+ This function can be used to set a general settings. Alternatively use set_settings to set all at once.
+
+ :param name: [str] setting name
+ :param value: [object] settings value
+ """
assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name))
self._data[SETTINGSPATH][name] = value
- self.parse_members()
-
- def parse_members(self):
- for name, value in self.settings.items():
- if name not in self.__dict__.keys():
- setattr(self, name, value)
- else:
- self.__dict__[name] = value
+ self.__parse_members()
def get_typeof(self, name):
+ """
+ Returns a hyperparameter type by name
+
+ :param name: [str] hyperparameter name
+ :return: [type] hyperparameter type
+ """
if not name in self.hyperparameter.keys():
raise LookupError("Typechecking failed, couldn't find hyperparameter {}!".format(name))
if not "type" in self.hyperparameter[name].keys():
raise LookupError("Typechecking failed, couldn't find hyperparameter signature type!")
dtype = self.hyperparameter[name]["type"]
return dtype
@property
def hyperparameter(self):
return self._data[HYPERPARAMETERPATH]
@property
def settings(self):
return self._data[SETTINGSPATH]
diff --git a/hyppopy/ProjectManager.py b/hyppopy/ProjectManager.py
deleted file mode 100644
index 0d072dd..0000000
--- a/hyppopy/ProjectManager.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# DKFZ
-#
-#
-# Copyright (c) German Cancer Research Center,
-# Division of Medical Image Computing.
-# All rights reserved.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR
-# A PARTICULAR PURPOSE.
-#
-# See LICENSE
-
-from .Singleton import *
-
-import os
-import logging
-from hyppopy.HyppopyProject import HyppopyProject
-from hyppopy.globals import DEBUGLEVEL
-
-LOG = logging.getLogger(os.path.basename(__file__))
-LOG.setLevel(DEBUGLEVEL)
-
-
-@singleton_object
-class ProjectManager(metaclass=Singleton):
-
- def __init__(self):
- self._current_project = None
- self._projects = {}
-
- def clear_all(self):
- pass
-
- def new_project(self, name="HyppopyProject", config=None):
- if name in self._projects.keys():
- name = self.check_projectname(name)
- self._projects[name] = HyppopyProject(config)
- self._current_project = self._projects[name]
- return self._current_project
-
- def check_projectname(self, name):
- split = name.split(".")
- if len(split) == 0:
- return split[0] + "." + str(0).zfill(3)
- else:
- try:
- number = int(split[-1])
- del split[-1]
- except:
- number = 0
- return '.'.join(split) + "." + str(number).zfill(3)
-
- def get_current(self):
- if self._current_project is None:
- self.new_project()
- return self._current_project
-
- def get_project(self, name):
- if name in self._projects.keys():
- self._current_project = self._projects[name]
- return self.get_current()
- return self.new_project(name)
-
- def get_projectnames(self):
- return self._projects.keys()
-
diff --git a/hyppopy/Singleton.py b/hyppopy/Singleton.py
index fac0bc8..39bd2b2 100644
--- a/hyppopy/Singleton.py
+++ b/hyppopy/Singleton.py
@@ -1,50 +1,49 @@
-# DKFZ
-#
+# Hyppopy - A Hyper-Parameter Optimization Toolbox
#
# Copyright (c) German Cancer Research Center,
# Division of Medical Image Computing.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE
class Singleton(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
return cls._instances[cls]
@classmethod
def __instancecheck__(mcs, instance):
if instance.__class__ is mcs:
return True
else:
return isinstance(instance.__class__, mcs)
def singleton_object(cls):
"""Class decorator that transforms (and replaces) a class definition (which
must have a Singleton metaclass) with the actual singleton object. Ensures
that the resulting object can still be "instantiated" (i.e., called),
returning the same object. Also ensures the object can be pickled, is
hashable, and has the correct string representation (the name of the
singleton)
"""
assert isinstance(cls, Singleton), cls.__name__ + " must use Singleton metaclass"
def self_instantiate(self):
return self
cls.__call__ = self_instantiate
cls.__hash__ = lambda self: hash(cls)
cls.__repr__ = lambda self: cls.__name__
cls.__reduce__ = lambda self: cls.__name__
obj = cls()
obj.__name__ = cls.__name__
return obj
diff --git a/hyppopy/SolverPool.py b/hyppopy/SolverPool.py
index 1e4fd6b..71868bc 100644
--- a/hyppopy/SolverPool.py
+++ b/hyppopy/SolverPool.py
@@ -1,79 +1,96 @@
-# DKFZ
-#
+# Hyppopy - A Hyper-Parameter Optimization Toolbox
#
# Copyright (c) German Cancer Research Center,
# Division of Medical Image Computing.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE
+__all__ = ['SolverPool']
+
from .Singleton import *
import os
import logging
from hyppopy.HyppopyProject import HyppopyProject
from hyppopy.solvers.OptunaSolver import OptunaSolver
from hyppopy.solvers.HyperoptSolver import HyperoptSolver
from hyppopy.solvers.OptunitySolver import OptunitySolver
from hyppopy.solvers.GridsearchSolver import GridsearchSolver
from hyppopy.solvers.RandomsearchSolver import RandomsearchSolver
from hyppopy.solvers.QuasiRandomsearchSolver import QuasiRandomsearchSolver
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
@singleton_object
class SolverPool(metaclass=Singleton):
+ """
+ The SolverPool is a helper singleton class to get the desired solver either by name and a HyppopyProject instance or
+ by a HyppopyProject instance only, if it defines a setting field called solver.
+ """
def __init__(self):
self._solver_list = ["hyperopt",
"optunity",
"optuna",
"randomsearch",
"quasirandomsearch",
"gridsearch"]
def get_solver_names(self):
+ """
+ Returns a list of available solvers
+
+ :return: [list] solver list
+ """
return self._solver_list
def get(self, solver_name=None, project=None):
+ """
+ Get the configured solver instance
+
+ :param solver_name: [str] solver name, if None, the project must have an attribute solver keeping the solver name, default=None
+ :param project: [HyppopyProject] HyppopyProject instance
+
+ :return: [HyppopySolver] the configured solver instance
+ """
if solver_name is not None:
assert isinstance(solver_name, str), "precondition violation, solver_name type str expected, got {} instead!".format(type(solver_name))
if project is not None:
assert isinstance(project, HyppopyProject), "precondition violation, project type HyppopyProject expected, got {} instead!".format(type(project))
if "solver" in project.__dict__:
solver_name = project.solver
if solver_name not in self._solver_list:
raise AssertionError("Solver named [{}] not implemented!".format(solver_name))
if solver_name == "hyperopt":
if project is not None:
return HyperoptSolver(project)
return HyperoptSolver()
elif solver_name == "optunity":
if project is not None:
return OptunitySolver(project)
return OptunitySolver()
elif solver_name == "optuna":
if project is not None:
return OptunaSolver(project)
return OptunaSolver()
elif solver_name == "gridsearch":
if project is not None:
return GridsearchSolver(project)
return GridsearchSolver()
elif solver_name == "randomsearch":
if project is not None:
return RandomsearchSolver(project)
return RandomsearchSolver()
elif solver_name == "quasirandomsearch":
if project is not None:
return QuasiRandomsearchSolver(project)
return QuasiRandomsearchSolver()
-
diff --git a/hyppopy/VisdomViewer.py b/hyppopy/VisdomViewer.py
index b10d151..d21ee51 100644
--- a/hyppopy/VisdomViewer.py
+++ b/hyppopy/VisdomViewer.py
@@ -1,114 +1,160 @@
+# Hyppopy - A Hyper-Parameter Optimization Toolbox
+#
+# Copyright (c) German Cancer Research Center,
+# Division of Medical Image Computing.
+# All rights reserved.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# See LICENSE
+
+__all__ = ['VisdomViewer']
+
import warnings
import numpy as np
from visdom import Visdom
-import matplotlib.pyplot as plt
def time_formatter(time_s):
+ """
+ Formats time in seconds input to more intuitive form h, min, s or ms, depending on magnitude
+ :param time_s: [float] time in seconds
+ :return:
+ """
if time_s < 0.01:
return int(time_s * 1000.0 * 1000) / 1000.0, "ms"
elif 100 < time_s < 3600:
return int(time_s / 60 * 1000) / 1000.0, "min"
elif time_s >= 3600:
return int(time_s / 3600 * 1000) / 1000.0, "h"
else:
return int(time_s * 1000) / 1000.0, "s"
class VisdomViewer(object):
-
+ """
+ The VisdomViewer class implements the live viewer plots via visdom. When extending implement your plot as methos and
+ call it in update. Using this class make it necessary starting a visdom server beforehand $ python -m visdom.server
+ """
def __init__(self, project, port=8097, server="http://localhost"):
self._viz = Visdom(port=port, server=server)
self._enabled = self._viz.check_connection(timeout_seconds=3)
if not self._enabled:
warnings.warn("No connection to visdom server established. Visualization cannot be displayed!")
self._project = project
self._best_win = None
self._best_loss = None
self._loss_iter_plot = None
self._status_report = None
self._axis_tags = None
self._axis_plots = None
def plot_losshistory(self, input_data):
+ """
+ This function plots the loss history loss over iteration
+
+ :param input_data: [dict] trail infos
+ """
loss = np.array([input_data["loss"]])
iter = np.array([input_data["iterations"]])
if self._loss_iter_plot is None:
self._loss_iter_plot = self._viz.line(loss, X=iter, opts=dict(
markers=True,
markersize=5,
dash=np.array(['dashdot']),
title="Loss History",
xlabel='iteration',
ylabel='loss'
))
else:
self._viz.line(loss, X=iter, win=self._loss_iter_plot, update='append')
def plot_hyperparameter(self, input_data):
+ """
+ This function plots each hyperparameter axis
+
+ :param input_data: [dict] trail infos
+ """
if self._axis_plots is None:
self._axis_tags = []
self._axis_plots = {}
for item in input_data.keys():
if item == "refresh_time" or item == "book_time" or item == "iterations" or item == "status" or item == "loss":
continue
self._axis_tags.append(item)
for axis in self._axis_tags:
xlabel = "value"
if isinstance(input_data[axis], str):
if self._project.hyperparameter[axis]["domain"] == "categorical":
xlabel = '-'.join(self._project.hyperparameter[axis]["data"])
input_data[axis] = self._project.hyperparameter[axis]["data"].index(input_data[axis])
axis_loss = np.array([input_data[axis], input_data["loss"]]).reshape(1, -1)
self._axis_plots[axis] = self._viz.scatter(axis_loss, opts=dict(
markersize=5,
title=axis,
xlabel=xlabel,
ylabel='loss'))
else:
for axis in self._axis_tags:
if isinstance(input_data[axis], str):
if self._project.hyperparameter[axis]["domain"] == "categorical":
input_data[axis] = self._project.hyperparameter[axis]["data"].index(input_data[axis])
axis_loss = np.array([input_data[axis], input_data["loss"]]).reshape(1, -1)
self._viz.scatter(axis_loss, win=self._axis_plots[axis], update='append')
def show_statusreport(self, input_data):
+ """
+ This function prints status report per iteration
+
+ :param input_data: [dict] trail infos
+ """
duration = input_data['refresh_time'] - input_data['book_time']
duration, time_format = time_formatter(duration.total_seconds())
report = "Iteration {}: {}{} -> {}\n".format(input_data["iterations"], duration, time_format, input_data["status"])
if self._status_report is None:
self._status_report = self._viz.text(report)
else:
self._viz.text(report, win=self._status_report, append=True)
def show_best(self, input_data):
+ """
+ Shows best parameter set
+
+ :param input_data: [dict] trail infos
+ """
if self._best_win is None:
self._best_loss = input_data["loss"]
txt = "Best Parameter Set: