diff --git a/.gitignore b/.gitignore index 379200a..224a2ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,108 +1,110 @@ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # latex *.aux *.bbl *.blg *.log *.tcp # solver_comparison examples/solver_comparison/gfx/data_I examples/solver_comparison/gfx/data_II examples/solver_comparison/gfx/data_III *.vpp.lck .pytest_cache/ *.vpp.bak_* python_tests_xml # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg .idea/ # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation -docs/_build/ +doc/_build/ +doc/LICENSE.rst +doc/README.md # PyBuilder target/ #Ipython Notebook .ipynb_checkpoints #Pycharm files *.iml # merging stuff *.orig *~ # Paths in repository mcml.py # images etc *.tif *.nrrd *.caffemodel # C++ stuff build* *.user hyppopy/tests/test_snipped_000.py hyppopy/tests/test_snipped_001.py hyppopy/tests/test_snipped_002.py hyppopy/tests/test_snipped_003.py hyppopy/tests/test_snipped_004.py hyppopy/tests/test_snipped_005.py hyppopy/tests/test_snipped_006.py diff --git a/doc/conf.py b/doc/conf.py index 646d8a0..60de06a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,207 +1,207 @@ # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys from shutil import copyfile ROOT = os.path.abspath('..') print("ROOT", ROOT) sys.path.insert(0, ROOT) README_PATH_SRC = os.path.join(ROOT, "README.md") -README_PATH_DST = os.path.join(ROOT, *("hyppopy", "README.md")) +README_PATH_DST = os.path.join(ROOT, *("doc", "README.md")) print("copy", README_PATH_SRC, "to", README_PATH_DST) try: copyfile(README_PATH_SRC, README_PATH_DST) except: print("Missing README.md file in subdir!") LICENSE_PATH_SRC = os.path.join(ROOT, "LICENSE") -LICENSE_PATH_DST = os.path.join(ROOT, *("hyppopy", "LICENSE.rst")) +LICENSE_PATH_DST = os.path.join(ROOT, *("doc", "LICENSE.rst")) print("copy", LICENSE_PATH_SRC, "to", LICENSE_PATH_DST) try: copyfile(LICENSE_PATH_SRC, LICENSE_PATH_DST) except: print("Missing LICENSE file in subdir!") # -- Project information ----------------------------------------------------- project = 'Hyppopy' copyright = '2019, DKFZ' author = 'S. Wanner' # The short X.Y version version = '0.5' # The full version, including alpha/beta/rc tags release = '0.5.0' # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'recommonmark', 'autoapi.extension' ] autoapi_type = 'python' autoapi_dirs = [ROOT, ''] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'Hyppopydoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'Hyppopy.tex', 'Hyppopy Documentation', 'S. Wanner', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'hyppopy', 'Hyppopy Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'Hyppopy', 'Hyppopy Documentation', author, 'Hyppopy', 'One line description of project.', 'Miscellaneous'), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] # -- Extension configuration ------------------------------------------------- # -- Options for todo extension ---------------------------------------------- # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True diff --git a/hyppopy/solvers/HyppopySolver.py b/hyppopy/solvers/HyppopySolver.py index 1325f0a..2caf24b 100644 --- a/hyppopy/solvers/HyppopySolver.py +++ b/hyppopy/solvers/HyppopySolver.py @@ -1,373 +1,382 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import abc import copy import types import datetime import numpy as np import pandas as pd from hyperopt import Trials from hyppopy.globals import * from hyppopy.VisdomViewer import VisdomViewer from hyppopy.HyppopyProject import HyppopyProject from hyppopy.BlackboxFunction import BlackboxFunction from hyppopy.VirtualFunction import VirtualFunction from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyppopySolver(object): """ The HyppopySolver class is the base class for all solver addons. It defines virtual functions a child class has to implement to deal with the front-end communication, orchestrating the optimization process and ensuring a proper process information storing. The key idea is that the HyppopySolver class defines an interface to configure and run an object instance of itself independently from the concrete solver lib used to optimize in the background. To achieve this goal an addon developer needs to implement the abstract methods 'convert_searchspace', 'execute_solver' and 'loss_function_call'. These methods abstract the peculiarities of the solver libs to offer, on the user side, a simple and consistent parameter space configuration and optimization procedure. The method 'convert_searchspace' transforms the hyppopy parameter space description into the solver lib specific description. The method loss_function_call is used to handle solver lib specifics of calling the actual blackbox function and execute_solver is executed when the run method is invoked und takes care of calling the solver lib solving routine. """ def __init__(self, project=None): self._idx = None # current iteration counter self._best = None # best parameter set self._trials = None # trials object, hyppopy uses the Trials object from hyperopt self._blackbox = None # blackbox function, eiter a function or a BlackboxFunction instance self._total_duration = None # keeps track of the solvers running time self._solver_overhead = None # stores the time overhead of the solver, means total time minus time in blackbox self._time_per_iteration = None # mean time per iterration self._accumulated_blackbox_time = None # summed time the solver was in the blackbox function self._visdom_viewer = None # visdom viewer instance self._child_members = {} # this dict keeps track of the settings the child solver defines self._hopt_signatures = {} # this dict keeps track of the hyperparameter signatures the child solver defines self.define_interface() # the child define interface function is called which defines settings and hyperparameter signatures if project is not None: self.project = project @abc.abstractmethod def convert_searchspace(self, hyperparameter): """ This function gets the unified hyppopy-like parameterspace description as input and, if necessary, should convert it into a solver lib specific format. The function is invoked when run is called and what it returns is passed as searchspace argument to the function execute_solver. + :param hyperparameter: [dict] nested parameter description dict e.g. {'name': {'domain':'uniform', 'data':[0,1], 'type':'float'}, ...} + :return: [object] converted hyperparameter space """ raise NotImplementedError('users must define convert_searchspace to use this class') @abc.abstractmethod def execute_solver(self, searchspace): """ This function is called immediatly after convert_searchspace and get the output of the latter as input. It's purpose is to call the solver libs main optimization function. + :param searchspace: converted hyperparameter space """ raise NotImplementedError('users must define execute_solver to use this class') @abc.abstractmethod def loss_function_call(self, params): """ This function is called within the function loss_function and encapsulates the actual blackbox function call in each iteration. The function loss_function takes care of the iteration driving and reporting, but each solver lib might need some special treatment between the parameter set selection and the calling of the actual blackbox function, e.g. parameter converting. + :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} + :return: [float] loss """ raise NotImplementedError('users must define loss_function_call to use this class') @abc.abstractmethod def define_interface(self): """ This function is called when HyppopySolver.__init__ function finished. Child classes need to define their individual parameter here by calling the add_member function for each class member variable need to be defined. Using add_hyperparameter_signature the structure of a hyperparameter the solver expects must be defined. Both, members and hyperparameter signatures are later get checked, before executing the solver, ensuring settings passed fullfill solver needs. """ raise NotImplementedError('users must define define_interface to use this class') def add_member(self, name, dtype, value=None, default=None): assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) if value is not None: assert isinstance(value, dtype), "precondition violation, value does not match dtype condition!" if default is not None: assert isinstance(default, dtype), "precondition violation, default does not match dtype condition!" setattr(self, name, value) self._child_members[name] = {"type": dtype, "value": value, "default": default} def add_hyperparameter_signature(self, name, dtype, options=None): assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) self._hopt_signatures[name] = {"type": dtype, "options": options} def loss_function(self, **params): """ This function is called each iteration with a selected parameter set. The parameter set selection is driven by the solver lib itself. The purpose of this function is to take care of the iteration reporting and the calling of the callback_func is available. As a developer you might want to overwrite this function completely (e.g. HyperoptSolver) but then you need to take care for iteration reporting for yourself. The alternative is to only implement loss_function_call (e.g. OptunitySolver). + :param params: [dict] hyperparameter space sample e.g. {'p1': 0.123, 'p2': 3.87, ...} + :return: [float] loss """ self._idx += 1 vals = {} idx = {} for key, value in params.items(): vals[key] = [value] idx[key] = [self._idx] trial = {'tid': self._idx, 'result': {'loss': None, 'status': 'ok'}, 'misc': { 'tid': self._idx, 'idxs': idx, 'vals': vals }, 'book_time': datetime.datetime.now(), 'refresh_time': None } try: loss = self.loss_function_call(params) trial['result']['loss'] = loss trial['result']['status'] = 'ok' if loss == np.nan: trial['result']['status'] = 'failed' except Exception as e: LOG.error("computing loss failed due to:\n {}".format(e)) loss = np.nan trial['result']['loss'] = np.nan trial['result']['status'] = 'failed' trial['refresh_time'] = datetime.datetime.now() self._trials.trials.append(trial) cbd = copy.deepcopy(params) cbd['iterations'] = self._idx cbd['loss'] = loss cbd['status'] = trial['result']['status'] cbd['book_time'] = trial['book_time'] cbd['refresh_time'] = trial['refresh_time'] if isinstance(self.blackbox, BlackboxFunction) and self.blackbox.callback_func is not None: self.blackbox.callback_func(**cbd) if self._visdom_viewer is not None: self._visdom_viewer.update(cbd) return loss def run(self, print_stats=True): """ This function starts the optimization process. + :param print_stats: [bool] en- or disable console output """ self._idx = 0 self.trials = Trials() start_time = datetime.datetime.now() try: search_space = self.convert_searchspace(self.project.hyperparameter) except Exception as e: msg = "Failed to convert searchspace, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) try: self.execute_solver(search_space) except Exception as e: msg = "Failed to execute solver, error: {}".format(e) LOG.error(msg) raise AssertionError(msg) end_time = datetime.datetime.now() dt = end_time - start_time days = divmod(dt.total_seconds(), 86400) hours = divmod(days[1], 3600) minutes = divmod(hours[1], 60) seconds = divmod(minutes[1], 1) milliseconds = divmod(seconds[1], 0.001) self._total_duration = [int(days[0]), int(hours[0]), int(minutes[0]), int(seconds[0]), int(milliseconds[0])] if print_stats: self.print_best() self.print_timestats() def get_results(self): """ This function returns a complete optimization history as pandas DataFrame and a dict with the optimal parameter set. + :return: [DataFrame], [dict] history and optimal parameter set """ assert isinstance(self.trials, Trials), "precondition violation, wrong trials type! Maybe solver was not yet executed?" results = {'duration': [], 'losses': [], 'status': []} pset = self.trials.trials[0]['misc']['vals'] for p in pset.keys(): results[p] = [] for n, trial in enumerate(self.trials.trials): t1 = trial['book_time'] t2 = trial['refresh_time'] results['duration'].append((t2 - t1).microseconds / 1000.0) results['losses'].append(trial['result']['loss']) results['status'].append(trial['result']['status'] == 'ok') losses = np.array(results['losses']) results['losses'] = list(losses) pset = trial['misc']['vals'] for p in pset.items(): results[p[0]].append(p[1][0]) return pd.DataFrame.from_dict(results), self.best def print_best(self): print("\n") print("#" * 40) print("### Best Parameter Choice ###") print("#" * 40) for name, value in self.best.items(): print(" - {}\t:\t{}".format(name, value)) print("\n - number of iterations\t:\t{}".format(self.trials.trials[-1]['tid']+1)) print(" - total time\t:\t{}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) def compute_time_statistics(self): dts = [] for trial in self._trials.trials: if 'book_time' in trial.keys() and 'refresh_time' in trial.keys(): dt = trial['refresh_time'] - trial['book_time'] dts.append(dt.total_seconds()) self._time_per_iteration = np.mean(dts) * 1e3 self._accumulated_blackbox_time = np.sum(dts) * 1e3 tmp = self.total_duration - self._accumulated_blackbox_time self._solver_overhead = int(np.round(100.0 / (self.total_duration+1e-12) * tmp)) def print_timestats(self): print("\n") print("#" * 40) print("### Timing Statistics ###") print("#" * 40) print(" - per iteration: {}ms".format(int(self.time_per_iteration*1e4)/10000)) print(" - total time: {}d:{}h:{}m:{}s:{}ms".format(self._total_duration[0], self._total_duration[1], self._total_duration[2], self._total_duration[3], self._total_duration[4])) print("#" * 40) print(" - solver overhead: {}%".format(self.solver_overhead)) def start_viewer(self, port=8097, server="http://localhost"): try: self._visdom_viewer = VisdomViewer(self._project, port, server) except Exception as e: import warnings warnings.warn("Failed starting VisdomViewer. Is the server running? If not start it via $visdom") LOG.error("Failed starting VisdomViewer: {}".format(e)) self._visdom_viewer = None def check_project(self): # check hyperparameter signatures for name, param in self.project.hyperparameter.items(): for sig, settings in self._hopt_signatures.items(): if sig not in param.keys(): msg = "Missing hyperparameter signature {}!".format(sig) LOG.error(msg) raise LookupError(msg) else: if not isinstance(param[sig], settings["type"]): msg = "Hyperparameter signature type mismatch, expected type {} got {}!".format(settings["type"], param[sig]) LOG.error(msg) raise TypeError(msg) if settings["options"] is not None: if param[sig] not in settings["options"]: msg = "Wrong signature value, {} not found in signature options!".format(param[sig]) LOG.error(msg) raise LookupError(msg) # check child members for name in self._child_members.keys(): if name not in self.project.__dict__.keys(): msg = "missing settings field {}!".format(name) LOG.error(msg) raise LookupError(msg) self.__dict__[name] = self.project.settings[name] @property def project(self): return self._project @project.setter def project(self, value): if isinstance(value, dict): self._project = HyppopyProject(value) elif isinstance(value, HyppopyProject): self._project = value else: msg = "Input error, project_manager of type: {} not allowed!".format(type(value)) LOG.error(msg) raise TypeError(msg) self.check_project() @property def blackbox(self): return self._blackbox @blackbox.setter def blackbox(self, value): if isinstance(value, types.FunctionType) or isinstance(value, BlackboxFunction) or isinstance(value, VirtualFunction): self._blackbox = value else: self._blackbox = None msg = "Input error, blackbox of type: {} not allowed!".format(type(value)) LOG.error(msg) raise TypeError(msg) @property def best(self): return self._best @best.setter def best(self, value): if not isinstance(value, dict): msg = "Input error, best of type: {} not allowed!".format(type(value)) LOG.error(msg) raise TypeError(msg) self._best = value @property def trials(self): return self._trials @trials.setter def trials(self, value): self._trials = value @property def total_duration(self): return (self._total_duration[0]*86400 + self._total_duration[1] * 3600 + self._total_duration[2] * 60 + self._total_duration[3]) * 1000 + self._total_duration[4] @property def solver_overhead(self): if self._solver_overhead is None: self.compute_time_statistics() return self._solver_overhead @property def time_per_iteration(self): if self._time_per_iteration is None: self.compute_time_statistics() return self._time_per_iteration @property def accumulated_blackbox_time(self): if self._accumulated_blackbox_time is None: self.compute_time_statistics() return self._accumulated_blackbox_time