diff --git a/examples/solver_comparison.py b/examples/solver_comparison.py index 84d061b..8a09b41 100644 --- a/examples/solver_comparison.py +++ b/examples/solver_comparison.py @@ -1,363 +1,369 @@ -# Hyppopy - A Hyper-Parameter Optimization Toolbox +# DKFZ +# # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import os import sys import time import pickle import numpy as np from math import pi import matplotlib.pyplot as plt from hyppopy.SolverPool import SolverPool from hyppopy.HyppopyProject import HyppopyProject -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.BlackboxFunction import BlackboxFunction -#OUTPUTDIR = "C:\\Users\\s635r\\Desktop\\solver_comparison" -OUTPUTDIR = "D:\\Projects\\Python\\hyppopy\\examples\\solver_comparison\\gfx" +OUTPUTDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), *("solver_comparison", "gfx"))) +# The solvers to be evaluated SOLVER = [] - SOLVER.append("quasirandomsearch") SOLVER.append("randomsearch") SOLVER.append("hyperopt") SOLVER.append("optunity") SOLVER.append("optuna") +# number of iterations to be tested ITERATIONS = [] ITERATIONS.append(15) ITERATIONS.append(50) ITERATIONS.append(300) ITERATIONS.append(1000) +# number of repetitions for each solver and iteration the results +# plottet are the mean and std dev of these independent trials STATREPEATS = 50 +# evaluations are stored using pickle, if OVERWRITE is True these +# are ignored and overwritten each time, set to False when only the +# plottings need to be re-evaluated OVERWRITE = False def compute_deviation(solver_name, vfunc_id, iterations, N, fname): project = HyppopyProject() project.add_hyperparameter(name="axis_00", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_01", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_02", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_03", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_04", domain="uniform", data=[0, 1], type=float) - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default(vfunc_id) minima = vfunc.minima() def my_loss_function(data, params): return vfunc(**params) blackbox = BlackboxFunction(data=[], blackbox_func=my_loss_function) results = {} results["gt"] = [] for mini in minima: results["gt"].append(np.median(mini[0])) for iter in iterations: results[iter] = {"minima": {}, "distance": {}, "duration": None, "set_difference": None, "loss": None, "loss_history": {}} for i in range(vfunc.dims()): results[iter]["minima"]["axis_0{}".format(i)] = [] results[iter]["distance"]["axis_0{}".format(i)] = [] project.add_setting("max_iterations", iter) project.add_setting("solver", solver_name) solver = SolverPool.get(project=project) solver.blackbox = blackbox axis_minima = [] best_losses = [] best_sets_diff = [] for i in range(vfunc.dims()): axis_minima.append([]) loss_history = [] durations = [] for n in range(N): print("\rSolver={} iteration={} round={}".format(solver, iter, n), end="") start = time.time() solver.run(print_stats=False) end = time.time() durations.append(end-start) df, best = solver.get_results() loss_history.append(np.flip(np.sort(df['losses'].values))) best_row = df['losses'].idxmin() best_losses.append(df['losses'][best_row]) best_sets_diff.append(abs(df['axis_00'][best_row] - best['axis_00'])+ abs(df['axis_01'][best_row] - best['axis_01'])+ abs(df['axis_02'][best_row] - best['axis_02'])+ abs(df['axis_03'][best_row] - best['axis_03'])+ abs(df['axis_04'][best_row] - best['axis_04'])) for i in range(vfunc.dims()): tmp = df['axis_0{}'.format(i)][best_row] axis_minima[i].append(tmp) results[iter]["loss_history"] = loss_history for i in range(vfunc.dims()): results[iter]["minima"]["axis_0{}".format(i)] = [np.mean(axis_minima[i]), np.std(axis_minima[i])] dist = np.sqrt((axis_minima[i]-results["gt"][i])**2) results[iter]["distance"]["axis_0{}".format(i)] = [np.mean(dist), np.std(dist)] results[iter]["loss"] = [np.mean(best_losses), np.std(best_losses)] results[iter]["set_difference"] = sum(best_sets_diff) results[iter]["duration"] = np.mean(durations) file = open(fname, 'wb') pickle.dump(results, file) file.close() def make_radarplot(results, title, fname=None): gt = results.pop("gt") categories = list(results[list(results.keys())[0]]["minima"].keys()) N = len(categories) angles = [n / float(N) * 2 * pi for n in range(N)] angles += angles[:1] ax = plt.subplot(1, 1, 1, polar=True, ) ax.set_theta_offset(pi / 2) ax.set_theta_direction(-1) plt.xticks(angles[:-1], categories, color='grey', size=8) ax.set_rlabel_position(0) plt.yticks([0.2, 0.4, 0.6, 0.8, 1.0], ["0.2", "0.4", "0.6", "0.8", "1.0"], color="grey", size=7) plt.ylim(0, 1) gt += gt[:1] ax.fill(angles, gt, color=(0.2, 0.8, 0.2), alpha=0.2) colors = [] cm = plt.get_cmap('Set1') if len(results) > 2: indices = list(range(0, len(results) + 1)) indices.pop(2) else: indices = list(range(0, len(results))) for i in range(len(results)): colors.append(cm(indices[i])) for iter, data in results.items(): values = [] for i in range(len(categories)): values.append(data["minima"]["axis_0{}".format(i)][0]) values += values[:1] color = colors.pop(0) ax.plot(angles, values, color=color, linewidth=2, linestyle='solid', label="iterations {}".format(iter)) plt.title(title, size=11, color=(0.1, 0.1, 0.1), y=1.1) plt.legend(bbox_to_anchor=(0.08, 1.12)) if fname is None: plt.show() else: plt.savefig(fname + ".png") #plt.savefig(fname + ".svg") plt.clf() def make_errrorbars_plot(results, fname=None): n_groups = len(results) for iter in ITERATIONS: means = [] stds = [] names = [] colors = [] axis = [] fig = plt.figure(figsize=(10, 8)) for solver_name, numbers in results.items(): names.append(solver_name) means.append([]) stds.append([]) for axis_name, data in numbers[iter]["distance"].items(): means[-1].append(data[0]) stds[-1].append(data[1]) if len(axis) < 5: axis.append(axis_name) for c in range(len(names)): colors.append(plt.cm.Set2(c/len(names))) index = np.arange(len(axis)) bar_width = 0.14 opacity = 0.8 error_config = {'ecolor': '0.3'} for k, name in enumerate(names): plt.bar(index + k*bar_width, means[k], bar_width, alpha=opacity, color=colors[k], yerr=stds[k], error_kw=error_config, label=name) plt.xlabel('Axis') plt.ylabel('Mean [+/- std]') plt.title('Deviation per Axis and Solver for {} Iterations'.format(iter)) plt.xticks(index + 2*bar_width, axis) plt.legend() if fname is None: plt.show() else: plt.savefig(fname + "_{}.png".format(iter)) #plt.savefig(fname + "_{}.svg".format(iter)) plt.clf() def plot_loss_histories(results, fname=None): colors = [] for c in range(len(SOLVER)): colors.append(plt.cm.Set2(c / len(SOLVER))) for iter in ITERATIONS: fig = plt.figure(figsize=(10, 8)) added_solver = [] for n, solver_name in enumerate(results.keys()): for history in results[solver_name][iter]["loss_history"]: if solver_name not in added_solver: plt.plot(history, color=colors[n], label=solver_name, alpha=0.5) added_solver.append(solver_name) else: plt.plot(history, color=colors[n], alpha=0.5) plt.legend() plt.ylabel('Loss') plt.xlabel('Iteration') if fname is None: plt.show() else: plt.savefig(fname + "_{}.png".format(iter)) plt.clf() def print_durations(results, fname=None): # colors = [] # for c in range(len(SOLVER)): # colors.append(plt.cm.Set2(c / len(SOLVER))) f = open(fname + ".txt", "w") lines = ["iterations\t"+"\t".join(SOLVER)+"\n"] for iter in ITERATIONS: txt = str(iter) + "\t" for solver_name in SOLVER: duration = results[solver_name][iter]["duration"] txt += str(duration) + "\t" txt += "\n" lines.append(txt) f.writelines(lines) f.close() durations = {} for iter in ITERATIONS: for solver_name in SOLVER: duration = results[solver_name][iter]["duration"] if not solver_name in durations: durations[solver_name] = duration/iter else: durations[solver_name] += duration/iter for name in durations.keys(): durations[name] /= len(ITERATIONS) fig, ax = plt.subplots(figsize=(14, 6)) # Example data y_pos = np.arange(len(durations.keys())) t = [] for solver in SOLVER: t.append(durations[solver]) print(SOLVER) print(t) ax.barh(y_pos, t, align='center', color='green') ax.set_yticks(y_pos) ax.set_yticklabels(SOLVER) ax.invert_yaxis() ax.set_xscale('log') ax.set_xlabel('Duration in [s]') ax.set_title('Mean Solver Computation Time per Iteration') if fname is None: plt.show() else: plt.savefig(fname + ".png") # plt.savefig(fname + "_{}.svg".format(iter)) plt.clf() id2dirmapping = {"5D": "data_I", "5D2": "data_II", "5D3": "data_III"} if __name__ == "__main__": vfunc_ID = "5D" if len(sys.argv) == 2: vfunc_ID = sys.argv[1] print("Start Evaluation on {}".format(vfunc_ID)) OUTPUTDIR = os.path.join(OUTPUTDIR, id2dirmapping[vfunc_ID]) if not os.path.isdir(OUTPUTDIR): os.makedirs(OUTPUTDIR) ################################################## ############### create datasets ################## fnames = [] for solver_name in SOLVER: fname = os.path.join(OUTPUTDIR, solver_name) fnames.append(fname) if OVERWRITE or not os.path.isfile(fname): compute_deviation(solver_name, vfunc_ID, ITERATIONS, N=STATREPEATS, fname=fname) ################################################## ################################################## ################################################## ############## create radarplots ################# all_results = {} for solver_name, fname in zip(SOLVER, fnames): file = open(fname, 'rb') results = pickle.load(file) file.close() make_radarplot(results, solver_name, fname + "_deviation") all_results[solver_name] = results fname = os.path.join(OUTPUTDIR, "errorbars") make_errrorbars_plot(all_results, fname) fname = os.path.join(OUTPUTDIR, "losshistory") plot_loss_histories(all_results, fname) fname = os.path.join(OUTPUTDIR, "durations") print_durations(all_results, fname) for solver_name, iterations in all_results.items(): for iter, numbers in iterations.items(): if numbers["set_difference"] != 0: print("solver {} has a different parameter set match in iteration {}".format(solver_name, iter)) ################################################## ################################################## plt.imsave(fname=os.path.join(OUTPUTDIR, "dummy.png"), arr=np.ones((800, 1000, 3), dtype=np.uint8)*255) diff --git a/examples/tutorial_custom_visualization.py b/examples/tutorial_custom_visualization.py index f40cde5..c5da2de 100644 --- a/examples/tutorial_custom_visualization.py +++ b/examples/tutorial_custom_visualization.py @@ -1,104 +1,105 @@ -# Hyppopy - A Hyper-Parameter Optimization Toolbox +# DKFZ +# # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE import matplotlib.pylab as plt from hyppopy.SolverPool import SolverPool from hyppopy.HyppopyProject import HyppopyProject -from hyppopy.VirtualFunction import VirtualFunction +from hyppopy.FunctionSimulator import FunctionSimulator from hyppopy.BlackboxFunction import BlackboxFunction project = HyppopyProject() project.add_hyperparameter(name="axis_00", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_01", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_02", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_03", domain="uniform", data=[0, 1], type=float) project.add_hyperparameter(name="axis_04", domain="uniform", data=[0, 1], type=float) project.add_setting("max_iterations", 500) project.add_setting("solver", "randomsearch") plt.ion() fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(12, 8), sharey=True) plot_data = {"iterations": [], "loss": [], "axis_00": [], "axis_01": [], "axis_02": [], "axis_03": [], "axis_04": []} def my_visualization_function(**kwargs): print("\r{}".format(kwargs), end="") plot_data["iterations"].append(kwargs['iterations']) plot_data["loss"].append(kwargs['loss']) plot_data["axis_00"].append(kwargs['axis_00']) plot_data["axis_01"].append(kwargs['axis_01']) plot_data["axis_02"].append(kwargs['axis_02']) plot_data["axis_03"].append(kwargs['axis_03']) plot_data["axis_04"].append(kwargs['axis_04']) axes[0, 0].clear() axes[0, 0].scatter(plot_data["axis_00"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[0, 0].set_ylabel("loss") axes[0, 0].set_xlabel("axis_00") axes[0, 1].clear() axes[0, 1].scatter(plot_data["axis_01"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[0, 1].set_xlabel("axis_01") axes[0, 2].clear() axes[0, 2].scatter(plot_data["axis_02"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[0, 2].set_xlabel("axis_02") axes[1, 0].clear() axes[1, 0].scatter(plot_data["axis_03"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[1, 0].set_ylabel("loss") axes[1, 0].set_xlabel("axis_03") axes[1, 1].clear() axes[1, 1].scatter(plot_data["axis_04"], plot_data["loss"], c=plot_data["loss"], cmap="jet", marker='.') axes[1, 1].set_xlabel("axis_04") axes[1, 2].clear() axes[1, 2].plot(plot_data["iterations"], plot_data["loss"], "--", c=(0.8, 0.8, 0.8, 0.5)) axes[1, 2].scatter(plot_data["iterations"], plot_data["loss"], marker='.', c=(0.2, 0.2, 0.2)) axes[1, 2].set_xlabel("iterations") plt.draw() plt.tight_layout() plt.pause(0.001) def my_loss_function(data, params): - vfunc = VirtualFunction() + vfunc = FunctionSimulator() vfunc.load_default("5D") return vfunc(**params) blackbox = BlackboxFunction(data=[], blackbox_func=my_loss_function, callback_func=my_visualization_function) solver = SolverPool.get(project=project) solver.blackbox = blackbox solver.run() df, best = solver.get_results() print("\n") print("*" * 100) print("Best Parameter Set:\n{}".format(best)) print("*" * 100) print("") save_plot = input("Save Plot? [y/n] ") if save_plot == "y": plt.savefig('plot_{}.png'.format(project.custom_use_solver)) diff --git a/examples/tutorial_gridsearch.py b/examples/tutorial_gridsearch.py index 1f8a9ec..6f45995 100644 --- a/examples/tutorial_gridsearch.py +++ b/examples/tutorial_gridsearch.py @@ -1,127 +1,128 @@ -# Hyppopy - A Hyper-Parameter Optimization Toolbox +# DKFZ +# # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # In this tutorial we solve an optimization problem using the GridsearchSolver # Gridsearch is very inefficient a Randomsearch might most of the time be the # better choice. # import the HyppopyProject class keeping track of inputs from hyppopy.HyppopyProject import HyppopyProject # import the GridsearchSolver classes from hyppopy.solvers.GridsearchSolver import GridsearchSolver # import the Blackboxfunction class wrapping your problem for Hyppopy from hyppopy.BlackboxFunction import BlackboxFunction # To configure the GridsearchSolver we only need the hyperparameter section. Another # difference to the other solvers is that we need to define a gridsampling in addition # to the range: 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 # intervals. Gridsearch also supports categorical, uniform, normal and lognormal sampling config = { "hyperparameter": { "C": { "domain": "uniform", "data": [0.0001, 20], "type": float, "frequency": 20 }, "gamma": { "domain": "uniform", "data": [0.0001, 20.0], "type": float, "frequency": 20 }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str, "frequency": 1 } }} # When creating a HyppopyProject instance we # pass the config dictionary to the constructor. project = HyppopyProject(config=config) # Hyppopy offers a class called BlackboxFunction to wrap your problem for Hyppopy. # The function signature is as follows: # BlackboxFunction(blackbox_func=None, # dataloader_func=None, # preprocess_func=None, # callback_func=None, # data=None, # **kwargs) # # Means we can set a couple of function pointers, a data object and an arbitrary number of custom parameter via kwargs. # # - blackbox_func: a function pointer to the actual, user defined, blackbox function that is computing our loss # - dataloader_func: a function pointer to a function handling the dataloading # - preprocess_func: a function pointer to a function automatically executed before starting the optimization process # - callback_func: a function pointer to a function that is called after each iteration with the trail object as input # - data: setting data can be done via dataloader_func or directly # - kwargs are passed to all functions above and thus can be used for parameter sharing between the functions # # (more details see in the documentation) # # Below we demonstrate the usage of all the above by defining a my_dataloader_function which in fact only grabs the # iris dataset from sklearn and returns it. A my_preprocess_function which also does nothing useful here but # demonstrating that a custom parameter can be set via kwargs and used in all of our functions when called within # Hyppopy. The my_callback_function gets as input the dictionary containing the state of the iteration and thus can be # used to access the current state of each solver iteration. Finally we define the actual loss_function # my_loss_function, which gets as input a data object and params. Both parameter are fixed, the first is defined by # the user depending on what is dataloader returns or the data object set in the constructor, the second is a dictionary # with a sample of your hyperparameter space which content is in the choice of the solver. from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score def my_dataloader_function(**kwargs): print("Dataloading...") iris_data = load_iris() return [iris_data.data, iris_data.target] def my_callback_function(**kwargs): print("\r{}".format(kwargs), end="") def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() # We now create the BlackboxFunction object and pass all function pointers defined above, # as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes. blackbox = BlackboxFunction(blackbox_func=my_loss_function, dataloader_func=my_dataloader_function, callback_func=my_callback_function) # create a solver instance solver = GridsearchSolver(project) # pass the loss function to the solver solver.blackbox = blackbox # run the solver solver.run() # get the result via get_result() which returns a pandas dataframe # containing the complete history and a dict best containing the # best parameter set. df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) diff --git a/examples/tutorial_hyppopyprojectclass.py b/examples/tutorial_hyppopyprojectclass.py index b6d89a4..ec98ad7 100644 --- a/examples/tutorial_hyppopyprojectclass.py +++ b/examples/tutorial_hyppopyprojectclass.py @@ -1,63 +1,64 @@ -# Hyppopy - A Hyper-Parameter Optimization Toolbox +# DKFZ +# # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # In this tutorial we demonstrate the HyppopyProject class usage # import the HyppopyProject class from hyppopy.HyppopyProject import HyppopyProject # To configure a solver we need to instanciate a HyppopyProject class. # This class can be configured using a nested dict. This dict has two # obligatory sections, hyperparameter and settings. A hyperparameter # is described using a dict containing a section, data and type field # and thus the hyperparameter section is a collection of hyperparameter # dicts. The settings section keeps solver settings. These might depend # on the solver used and need to be checked for each. E.g. Randomsearch, # Hyperopt and Optunity need a solver setting max_iterations, the Grid- # searchSolver don't. config = { "hyperparameter": { "C": { "domain": "uniform", "data": [0.0001, 20], "type": float }, "gamma": { "domain": "uniform", "data": [0.0001, 20.0], "type": float }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str }}, "max_iterations": 500 } # When creating a HyppopyProject instance we # pass the config dictionary to the constructor. project = HyppopyProject(config=config) # When building the project programmatically we can also use the methods # add_hyperparameter and add_settings project = HyppopyProject() project.add_hyperparameter(name="C", domain="uniform", data=[0.0001, 20], dtype="float") project.add_hyperparameter(name="kernel", domain="categorical", data=["linear", "sigmoid"], dtype="str") project.set_settings(max_iterations=500) # The custom section can be used freely project.add_setting("my_var", 10) # Settings are automatically transformed to member variables of the project class with the section as prefix if project.max_iterations < 1000 and project.my_var == 10: print("Project configured!") diff --git a/examples/tutorial_multisolver.py b/examples/tutorial_multisolver.py index 8ef9581..c1e0d96 100644 --- a/examples/tutorial_multisolver.py +++ b/examples/tutorial_multisolver.py @@ -1,182 +1,183 @@ -# Hyppopy - A Hyper-Parameter Optimization Toolbox +# DKFZ +# # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # In this tutorial we solve an optimization problem using the Hyperopt Solver (http://hyperopt.github.io/hyperopt/). # Hyperopt uses a Baysian - Tree Parzen Estimator - Optimization approach, which means that each iteration computes a # new function value of the blackbox, interpolates a guess for the whole energy function and predicts a point to # compute the next function value at. This next point is not necessarily a "better" value, it's only the value with # the highest uncertainty for the function interpolation. # # See a visual explanation e.g. here (http://philipperemy.github.io/visualization/) # import the HyppopyProject class keeping track of inputs from hyppopy.HyppopyProject import HyppopyProject # import the SolverPool singleton class from hyppopy.SolverPool import SolverPool # import the Blackboxfunction class wrapping your problem for Hyppopy from hyppopy.BlackboxFunction import BlackboxFunction # Next step is defining the problem space and all settings Hyppopy needs to optimize your problem. # The config is a simple nested dictionary with two obligatory main sections, hyperparameter and settings. # The hyperparameter section defines your searchspace. Each hyperparameter is again a dictionary with: # # - a domain ['categorical', 'uniform', 'normal', 'loguniform'] # - the domain data [left bound, right bound] and # - a type of your domain ['str', 'int', 'float'] # # The settings section has two subcategories, solver and custom. The first contains settings for the solver, # here 'max_iterations' - is the maximum number of iteration. # # The custom section allows defining custom parameter. An entry here is transformed to a member variable of the # HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script. # Here we use it as a solver switch to control the usage of our solver via the config. This means with the script # below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',... # It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is # ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so: # 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals. config = { "hyperparameter": { "C": { "domain": "uniform", "data": [0.0001, 20], "type": float }, "gamma": { "domain": "uniform", "data": [0.0001, 20.0], "type": float }, "kernel": { "domain": "categorical", "data": ["linear", "sigmoid", "poly", "rbf"], "type": str }, "decision_function_shape": { "domain": "categorical", "data": ["ovo", "ovr"], "type": str } }, "max_iterations": 300, "solver": "quasirandomsearch" } # When creating a HyppopyProject instance we # pass the config dictionary to the constructor. project = HyppopyProject(config=config) # demonstration of the custom parameter access print("-"*30) print("max_iterations:\t{}".format(project.max_iterations)) print("solver chosen -> {}".format(project.solver)) print("-"*30) # The BlackboxFunction signature is as follows: # BlackboxFunction(blackbox_func=None, # dataloader_func=None, # preprocess_func=None, # callback_func=None, # data=None, # **kwargs) # # - blackbox_func: a function pointer to the users loss function # - dataloader_func: a function pointer for handling dataloading. The function is called once before # optimizing. What it returns is passed as first argument to your loss functions # data argument. # - preprocess_func: a function pointer for data preprocessing. The function is called once before # optimizing and gets via kwargs['data'] the raw data object set directly or returned # from dataloader_func. What this function returns is then what is passed as first # argument to your loss function. # - callback_func: a function pointer called after each iteration. The input kwargs is a dictionary # keeping the parameters used in this iteration, the 'iteration' index, the 'loss' # and the 'status'. The function in this example is used for realtime printing it's # input but can also be used for realtime visualization. # - data: if not done via dataloader_func one can set a raw_data object directly # - kwargs: dict that whose content is passed to all functions above. from sklearn.svm import SVC from sklearn.datasets import load_iris from sklearn.model_selection import cross_val_score def my_dataloader_function(**kwargs): print("Dataloading...") # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("my loading argument: {}".format(kwargs['params']['my_dataloader_input'])) iris_data = load_iris() return [iris_data.data, iris_data.target] def my_preprocess_function(**kwargs): print("Preprocessing...") # kwargs['data'] allows accessing the input data print("data:", kwargs['data'][0].shape, kwargs['data'][1].shape) # kwargs['params'] allows accessing additional parameter passed, see below my_preproc_param, my_dataloader_input. print("kwargs['params']['my_preproc_param']={}".format(kwargs['params']['my_preproc_param']), "\n") # if the preprocessing function returns something, # the input data will be replaced with the data returned by this function. x = kwargs['data'][0] y = kwargs['data'][1] for i in range(x.shape[0]): x[i, :] += kwargs['params']['my_preproc_param'] return [x, y] def my_callback_function(**kwargs): print("\r{}".format(kwargs), end="") def my_loss_function(data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() # We now create the BlackboxFunction object and pass all function pointers defined above, # as well as 2 dummy parameter (my_preproc_param, my_dataloader_input) for demonstration purposes. blackbox = BlackboxFunction(blackbox_func=my_loss_function, dataloader_func=my_dataloader_function, preprocess_func=my_preprocess_function, callback_func=my_callback_function, my_preproc_param=1, my_dataloader_input='could/be/a/path') # Last step, is we use our SolverPool which automatically returns the correct solver. # There are multiple ways to get the desired solver from the solver pool. # 1. solver = SolverPool.get('hyperopt') # solver.project = project # 2. solver = SolverPool.get('hyperopt', project) # 3. The SolverPool will look for the field 'use_solver' in the project instance, if # it is present it will be used to specify the solver so that in this case it is enough # to pass the project instance. solver = SolverPool.get(project=project) # Give the solver your blackbox and run it. After execution we can get the result # via get_result() which returns a pandas dataframe containing the complete history # The dict best contains the best parameter set. solver.blackbox = blackbox #solver.start_viewer() solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) diff --git a/examples/tutorial_simple.py b/examples/tutorial_simple.py index 0e7b8f8..199bc9e 100644 --- a/examples/tutorial_simple.py +++ b/examples/tutorial_simple.py @@ -1,79 +1,80 @@ -# Hyppopy - A Hyper-Parameter Optimization Toolbox +# DKFZ +# # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE # A hyppopy minimal example optimizing a simple demo function f(x,y) = x**2+y**2 # import the HyppopyProject class keeping track of inputs from hyppopy.HyppopyProject import HyppopyProject # import the HyperoptSolver class from hyppopy.solvers.HyperoptSolver import HyperoptSolver # To configure the Hyppopy solver we use a simple nested dictionary with two obligatory main sections, # hyperparameter and settings. The hyperparameter section defines your searchspace. Each hyperparameter # is again a dictionary with: # # - a domain ['categorical', 'uniform', 'normal', 'loguniform'] # - the domain data [left bound, right bound] and # - a type of your domain ['str', 'int', 'float'] # # The settings section has two subcategories, solver and custom. The first contains settings for the solver, # here 'max_iterations' - is the maximum number of iteration. # # The custom section allows defining custom parameter. An entry here is transformed to a member variable of the # HyppopyProject class. These can be useful when implementing new solver classes or for control your hyppopy script. # Here we use it as a solver switch to control the usage of our solver via the config. This means with the script # below your can try out every solver by changing use_solver to 'optunity', 'randomsearch', 'gridsearch',... # It can be used like so: project.custom_use_plugin (see below) If using the gridsearch solver, max_iterations is # ignored, instead each hyperparameter must specifiy a number of samples additionally to the range like so: # 'data': [0, 1, 100] which means sampling the space from 0 to 1 in 100 intervals. config = { "hyperparameter": { "x": { "domain": "normal", "data": [-10.0, 10.0], "type": float }, "y": { "domain": "uniform", "data": [-10.0, 10.0], "type": float } }, "max_iterations": 500 } # When creating a HyppopyProject instance we # pass the config dictionary to the constructor. project = HyppopyProject(config=config) # The user defined loss function def my_loss_function(x, y): return x**2+y**2 # create a solver instance solver = HyperoptSolver(project) # pass the loss function to the solver solver.blackbox = my_loss_function # run the solver solver.run() df, best = solver.get_results() print("\n") print("*"*100) print("Best Parameter Set:\n{}".format(best)) print("*"*100) diff --git a/hyppopy/BlackboxFunction.py b/hyppopy/BlackboxFunction.py index 658af48..32cce46 100644 --- a/hyppopy/BlackboxFunction.py +++ b/hyppopy/BlackboxFunction.py @@ -1,98 +1,135 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['BlackboxFunction'] + import os import logging import functools from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) def default_kwargs(**defaultKwargs): + """ + Decorator defining default args in **kwargs arguments + """ def actual_decorator(fn): @functools.wraps(fn) def g(*args, **kwargs): defaultKwargs.update(kwargs) return fn(*args, **defaultKwargs) return g return actual_decorator class BlackboxFunction(object): """ - This is the BlackboxFunction class doing blackbox function stuff + This class is a BlackboxFunction wrapper class encapsulating the loss function. Additional function pointer can be + set to get access at different pipelining steps: + + - dataloader_func: data loading, the function must return a data object and is called first when the solver is executed. + The data object returned will be the input of the blackbox function. + - preprocess_func: data preprocessing is called after dataloader_func, the functions signature must be foo(data, params) + and must return a data object. The input is the data object set directly or via dataloader_func, + the params are passed from constructor params. + - callback_func: this function is called at each iteration step getting passed the trail info content, can be used for + custom visualization + - data: add a data object directly """ @default_kwargs(blackbox_func=None, dataloader_func=None, preprocess_func=None, callback_func=None, data=None) def __init__(self, **kwargs): + """ + Constructor accepts function pointer or a data object which are all None by default. Additionally one can define + an arbitrary number of arg pairs. These are passed as input to each function pointer as arguments. + + :param dataloader_func: data loading function pointer, default=None + :param preprocess_func: data preprocessing function pointer, default=None + :param callback_func: callback function pointer, default=None + :param data: data object, default=None + :param kwargs: additional arg=value pairs + """ self._blackbox_func = None self._preprocess_func = None self._dataloader_func = None self._callback_func = None self._raw_data = None self._data = None self.setup(kwargs) def __call__(self, **kwargs): + """ + Call method calls blackbox_func passing the data object and the args passed + + :param kwargs: [dict] args + + :return: blackbox_func(data, kwargs) + """ return self.blackbox_func(self.data, kwargs) def setup(self, kwargs): + """ + Alternative to Constructor, kwargs signature see __init__ + + :param kwargs: (see __init__) + """ self._blackbox_func = kwargs['blackbox_func'] self._preprocess_func = kwargs['preprocess_func'] self._dataloader_func = kwargs['dataloader_func'] self._callback_func = kwargs['callback_func'] self._raw_data = kwargs['data'] self._data = self._raw_data del kwargs['blackbox_func'] del kwargs['preprocess_func'] del kwargs['dataloader_func'] del kwargs['data'] params = kwargs if self.dataloader_func is not None: self._raw_data = self.dataloader_func(params=params) assert self._raw_data is not None, "Missing data exception!" assert self.blackbox_func is not None, "Missing blackbox fucntion exception!" if self.preprocess_func is not None: result = self.preprocess_func(data=self._raw_data, params=params) if result is not None: self._data = result else: self._data = self._raw_data else: self._data = self._raw_data @property def blackbox_func(self): return self._blackbox_func @property def preprocess_func(self): return self._preprocess_func @property def dataloader_func(self): return self._dataloader_func @property def callback_func(self): return self._callback_func @property def raw_data(self): return self._raw_data @property def data(self): return self._data diff --git a/hyppopy/VirtualFunction.py b/hyppopy/FunctionSimulator.py similarity index 85% rename from hyppopy/VirtualFunction.py rename to hyppopy/FunctionSimulator.py index 295fcda..38174ff 100644 --- a/hyppopy/VirtualFunction.py +++ b/hyppopy/FunctionSimulator.py @@ -1,222 +1,240 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE ######################################################################################################################## # USAGE # -# The class VirtualFunction is meant to be a virtual energy function with an arbitrary dimensionality. The user can +# The class FunctionSimulator is meant to be a virtual energy function with an arbitrary dimensionality. The user can # simply scribble functions as a binary image using e.g. Gimp, defining their ranges using .cfg file and loading them -# into the VirtualFunction. An instance of the class can then be used like a normal function returning the sampling of +# into the FunctionSimulator. An instance of the class can then be used like a normal function returning the sampling of # each dimension loaded. # # 1. create binary images (IMPORTANT same shape for each), background black the function signature white, ensure that # each column has a white pixel. If more than one pixel appears in a column, only the lowest will be used. # # 2. create a .cfg file, see an example in hyppopy/virtualparameterspace # -# 3. vfunc = VirtualFunction() +# 3. vfunc = FunctionSimulator() # vfunc.load_images(path/of/your/binaryfiles/and/the/configfile) # # 4. use vfunc like a normal function, if you loaded 4 dimension binary images use it like f = vfunc(a,b,c,d) ######################################################################################################################## +__all__ = ['FunctionSimulator'] + import os import sys import numpy as np import configparser from glob import glob import matplotlib.pyplot as plt import matplotlib.image as mpimg -from hyppopy.globals import VFUNCDATAPATH +from hyppopy.globals import FUNCTIONSIMULATOR_DATAPATH + +class FunctionSimulator(object): + """ + The FunctionSimulator class serves as simulation tool for solver testing and evaluation purposes. It's designed to + simulate an energy functional by setting axis data for each dimension via binary image files. The binary image files + are sampled and a range interval is read from a config file. The class implements __call__ to act like a blackbox function + when initialized. -class VirtualFunction(object): + f=f(x1,x2,...,xn) [for n binary images and n range config files + as image input .png grayscale images are expected + as range config .cfg ascii files are expected containing + """ def __init__(self): self.config = None self.data = None self.axis = [] def __call__(self, *args, **kwargs): + """ + the call function expects the hyperparameter + :param args: + :param kwargs: + :return: + """ if len(kwargs) == self.dims(): args = [0]*len(kwargs) for key, value in kwargs.items(): index = int(key.split("_")[1]) args[index] = value assert len(args) == self.dims(), "wrong number of arguments!" for i in range(len(args)): assert self.axis[i][0] <= args[i] <= self.axis[i][1], "out of range access on axis {}!".format(i) lpos, rpos, fracs = self.pos_to_indices(args) fl = self.data[(list(range(self.dims())), lpos)] fr = self.data[(list(range(self.dims())), rpos)] return np.sum(fl*np.array(fracs) + fr*(1-np.array(fracs))) def clear(self): self.axis.clear() self.data = None self.config = None def dims(self): return self.data.shape[0] def size(self): return self.data.shape[1] def range(self, dim): return np.abs(self.axis[dim][1] - self.axis[dim][0]) def minima(self): glob_mins = [] for dim in range(self.dims()): x = [] fmin = np.min(self.data[dim, :]) for _x in range(self.size()): if self.data[dim, _x] <= fmin: x.append(_x/self.size()*(self.axis[dim][1]-self.axis[dim][0])+self.axis[dim][0]) glob_mins.append([x, fmin]) return glob_mins def pos_to_indices(self, positions): lpos = [] rpos = [] pfracs = [] for n in range(self.dims()): pos = positions[n] pos -= self.axis[n][0] pos /= np.abs(self.axis[n][1]-self.axis[n][0]) pos *= self.data.shape[1]-1 lp = int(np.floor(pos)) if lp < 0: lp = 0 rp = int(np.ceil(pos)) if rp > self.data.shape[1]-1: rp = self.data.shape[1]-1 pfracs.append(1.0-(pos-np.floor(pos))) lpos.append(lp) rpos.append(rp) return lpos, rpos, pfracs def plot(self, dim=None, title=""): if dim is None: dim = list(range(self.dims())) else: dim = [dim] fig = plt.figure(figsize=(10, 8)) for i in range(len(dim)): width = np.abs(self.axis[dim[i]][1]-self.axis[dim[i]][0]) ax = np.arange(self.axis[dim[i]][0], self.axis[dim[i]][1], width/self.size()) plt.plot(ax, self.data[dim[i], :], '.', label='axis_{}'.format(str(dim[i]).zfill(2))) plt.legend() plt.grid() plt.title(title) plt.show() def add_dimension(self, data, x_range): if self.data is None: self.data = data if len(self.data.shape) == 1: self.data = self.data.reshape((1, self.data.shape[0])) else: if len(data.shape) == 1: data = data.reshape((1, data.shape[0])) assert self.data.shape[1] == data.shape[1], "shape mismatch while adding dimension!" dims = self.data.shape[0] size = self.data.shape[1] tmp = np.append(self.data, data) self.data = tmp.reshape((dims+1, size)) self.axis.append(x_range) def load_default(self, name="3D"): - path = os.path.join(VFUNCDATAPATH, "{}".format(name)) + path = os.path.join(FUNCTIONSIMULATOR_DATAPATH, "{}".format(name)) if os.path.exists(path): self.load_images(path) else: - raise FileExistsError("No virtualfunction of dimension {} available".format(name)) + raise FileExistsError("No FunctionSimulator of dimension {} available".format(name)) def load_images(self, path): self.config = None self.data = None self.axis.clear() img_fnames = [] for f in glob(path + os.sep + "*"): if f.endswith(".png"): img_fnames.append(f) elif f.endswith(".cfg"): self.config = self.read_config(f) else: print("WARNING: files of type {} not supported, the file {} is ignored!".format(f.split(".")[-1], os.path.basename(f))) if self.config is None: print("Aborted, failed to read configfile!") sys.exit() sections = self.config.sections() if len(sections) != len(img_fnames): print("Aborted, inconsistent number of image tmplates and axis specifications!") sys.exit() img_fnames.sort() size_x = None size_y = None for n, fname in enumerate(img_fnames): img = mpimg.imread(fname) if len(img.shape) > 2: img = img[:, :, 0] if size_x is None: size_x = img.shape[1] if size_y is None: size_y = img.shape[0] self.data = np.zeros((len(img_fnames), size_x), dtype=np.float32) assert img.shape[0] == size_y, "Shape mismatch in dimension y {} is not {}".format(img.shape[0], size_y) assert img.shape[1] == size_x, "Shape mismatch in dimension x {} is not {}".format(img.shape[1], size_x) self.sample_image(img, n) def sample_image(self, img, dim): sec_name = "axis_{}".format(str(dim).zfill(2)) assert sec_name in self.config.sections(), "config section {} not found!".format(sec_name) settings = self.get_axis_settings(sec_name) self.axis.append([float(settings['min_x']), float(settings['max_x'])]) y_range = [float(settings['min_y']), float(settings['max_y'])] for x in range(img.shape[1]): candidates = np.where(img[:, x] > 0) assert len(candidates[0]) > 0, "non function value in image detected, ensure each column has at least one value > 0!" y_pos = candidates[0][0]/img.shape[0] self.data[dim, x] = 1-y_pos self.data[dim, :] *= np.abs(y_range[1] - y_range[0]) self.data[dim, :] += y_range[0] def read_config(self, fname): try: config = configparser.ConfigParser() config.read(fname) return config except Exception as e: print(e) return None def get_axis_settings(self, section): dict1 = {} options = self.config.options(section) for option in options: try: dict1[option] = self.config.get(section, option) if dict1[option] == -1: print("skip: %s" % option) except: print("exception on %s!" % option) dict1[option] = None return dict1 diff --git a/hyppopy/HyppopyProject.py b/hyppopy/HyppopyProject.py index 8da4843..2d3e116 100644 --- a/hyppopy/HyppopyProject.py +++ b/hyppopy/HyppopyProject.py @@ -1,76 +1,146 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['HyppopyProject'] + import copy from hyppopy.globals import * LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class HyppopyProject(object): + """ + The HyppopyProject class takes care of the optimization settings. An instance can be configured using a config + dictionary or by using the hyperparameter and settings methods. In case of initializing via dicts those can be + passed to the constructor or by using the set_config method. After initialization a HyppopyProject instance is + passed to a solver class which internally checks for consistency with it's needs. The class distinguished + between two categories, hyperparameter and general settings. + + The hyperparameter are a dictionary structure as follows and can be accessed via hyperparameter + {'param_name: {'domain': 'uniform', ...}, ...} + + General settings are internally converted to class attributes and can accessed directly or via settings + + An example config could look like: + config = {'hyperparameter': {'myparam': {'domain': 'uniform', 'data': [0, 100], 'type': float}, ...}, + 'my_setting_1': 3.1415, + 'my_setting_2': 'hello world'} + project = HyppopyProject(config) + + The same can be achieved using: + project = HyppopyProject() + project.add_hyperparameter(name='myparam', domain='uniform', data=[0, 100], type=float}) + project.add_setting('my_setting_1', 3.1415) + project.add_setting('my_setting_2', 'hello world') + """ def __init__(self, config=None): + """ + Constructor + + :param config: [dict] config dictionary of the form {'hyperparameter': {...}, ...} + """ self._data = {HYPERPARAMETERPATH: {}, SETTINGSPATH: {}} if config is not None: self.set_config(config) + def __parse_members(self): + """ + The function converts settings into class attributes + """ + for name, value in self.settings.items(): + if name not in self.__dict__.keys(): + setattr(self, name, value) + else: + self.__dict__[name] = value + def set_config(self, config): + """ + Set a config dict + + :param config: [dict] configuration dict defining hyperparameter and general settings + """ assert isinstance(config, dict), "precondition violation, config needs to be of type dict, got {}".format(type(config)) confic_cp = copy.deepcopy(config) if HYPERPARAMETERPATH in confic_cp.keys(): self._data[HYPERPARAMETERPATH] = confic_cp[HYPERPARAMETERPATH] del confic_cp[HYPERPARAMETERPATH] self._data[SETTINGSPATH] = confic_cp - self.parse_members() + self.__parse_members() def set_hyperparameter(self, params): + """ + This function can be used to set the hyperparameter description directly by passing the hyperparameter section + of a config dict (see class description). Alternatively use add_hyperparameter to add one after each other. + + :param params: [dict] configuration dict defining hyperparameter + """ assert isinstance(params, dict), "precondition violation, params needs to be of type dict, got {}".format(type(params)) self._data[HYPERPARAMETERPATH] = params - def set_settings(self, **kwargs): - self._data[SETTINGSPATH] = kwargs - self.parse_members() - def add_hyperparameter(self, name, **kwargs): + """ + This function can be used to set hyperparameter descriptions. Alternatively use set_hyperparameter to set all at + once. + + :param name: [str] hyperparameter name + :param **kwargs: [dict] configuration dict defining a hyperparameter e.g. domain='uniform', data=[1,100], ... + """ assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) self._data[HYPERPARAMETERPATH][name] = kwargs + def set_settings(self, **kwargs): + """ + This function can be used to set the general settings directly by passing the settings as name=value pairs. + Alternatively use add_setting to add one after each other. + + :param **kwargs: [dict] settings dict e.g. my_setting_1=3.1415, my_setting_2='hello world', ... + """ + self._data[SETTINGSPATH] = kwargs + self.__parse_members() + def add_setting(self, name, value): + """ + This function can be used to set a general settings. Alternatively use set_settings to set all at once. + + :param name: [str] setting name + :param value: [object] settings value + """ assert isinstance(name, str), "precondition violation, name needs to be of type str, got {}".format(type(name)) self._data[SETTINGSPATH][name] = value - self.parse_members() - - def parse_members(self): - for name, value in self.settings.items(): - if name not in self.__dict__.keys(): - setattr(self, name, value) - else: - self.__dict__[name] = value + self.__parse_members() def get_typeof(self, name): + """ + Returns a hyperparameter type by name + + :param name: [str] hyperparameter name + :return: [type] hyperparameter type + """ if not name in self.hyperparameter.keys(): raise LookupError("Typechecking failed, couldn't find hyperparameter {}!".format(name)) if not "type" in self.hyperparameter[name].keys(): raise LookupError("Typechecking failed, couldn't find hyperparameter signature type!") dtype = self.hyperparameter[name]["type"] return dtype @property def hyperparameter(self): return self._data[HYPERPARAMETERPATH] @property def settings(self): return self._data[SETTINGSPATH] diff --git a/hyppopy/ProjectManager.py b/hyppopy/ProjectManager.py index fe29a8e..f37b260 100644 --- a/hyppopy/ProjectManager.py +++ b/hyppopy/ProjectManager.py @@ -1,66 +1,68 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE from .Singleton import * import os import logging from hyppopy.HyppopyProject import HyppopyProject from hyppopy.globals import DEBUGLEVEL +__all__ = ['ProjectManager'] + LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) @singleton_object class ProjectManager(metaclass=Singleton): def __init__(self): self._current_project = None self._projects = {} def clear_all(self): pass def new_project(self, name="HyppopyProject", config=None): if name in self._projects.keys(): name = self.check_projectname(name) self._projects[name] = HyppopyProject(config) self._current_project = self._projects[name] return self._current_project def check_projectname(self, name): split = name.split(".") if len(split) == 0: return split[0] + "." + str(0).zfill(3) else: try: number = int(split[-1]) del split[-1] except: number = 0 return '.'.join(split) + "." + str(number).zfill(3) def get_current(self): if self._current_project is None: self.new_project() return self._current_project def get_project(self, name): if name in self._projects.keys(): self._current_project = self._projects[name] return self.get_current() return self.new_project(name) def get_projectnames(self): return self._projects.keys() diff --git a/hyppopy/SolverPool.py b/hyppopy/SolverPool.py index 74ffcee..d0d62de 100644 --- a/hyppopy/SolverPool.py +++ b/hyppopy/SolverPool.py @@ -1,78 +1,97 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['SolverPool'] + from .Singleton import * import os import logging from hyppopy.HyppopyProject import HyppopyProject from hyppopy.solvers.OptunaSolver import OptunaSolver from hyppopy.solvers.HyperoptSolver import HyperoptSolver from hyppopy.solvers.OptunitySolver import OptunitySolver from hyppopy.solvers.GridsearchSolver import GridsearchSolver from hyppopy.solvers.RandomsearchSolver import RandomsearchSolver from hyppopy.solvers.QuasiRandomsearchSolver import QuasiRandomsearchSolver from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) @singleton_object class SolverPool(metaclass=Singleton): + """ + The SolverPool is a helper singleton class to get the desired solver either by name and a HyppopyProject instance or + by a HyppopyProject instance only, if it defines a setting field called solver. + """ def __init__(self): self._solver_list = ["hyperopt", "optunity", "optuna", "randomsearch", "quasirandomsearch", "gridsearch"] def get_solver_names(self): + """ + Returns a list of available solvers + + :return: [list] solver list + """ return self._solver_list def get(self, solver_name=None, project=None): + """ + Get the configured solver instance + + :param solver_name: [str] solver name, if None, the project must have an attribute solver keeping the solver name, default=None + :param project: [HyppopyProject] HyppopyProject instance + + :return: [HyppopySolver] the configured solver instance + """ if solver_name is not None: assert isinstance(solver_name, str), "precondition violation, solver_name type str expected, got {} instead!".format(type(solver_name)) if project is not None: assert isinstance(project, HyppopyProject), "precondition violation, project type HyppopyProject expected, got {} instead!".format(type(project)) if "solver" in project.__dict__: solver_name = project.solver if solver_name not in self._solver_list: raise AssertionError("Solver named [{}] not implemented!".format(solver_name)) if solver_name == "hyperopt": if project is not None: return HyperoptSolver(project) return HyperoptSolver() elif solver_name == "optunity": if project is not None: return OptunitySolver(project) return OptunitySolver() elif solver_name == "optuna": if project is not None: return OptunaSolver(project) return OptunaSolver() elif solver_name == "gridsearch": if project is not None: return GridsearchSolver(project) return GridsearchSolver() elif solver_name == "randomsearch": if project is not None: return RandomsearchSolver(project) return RandomsearchSolver() elif solver_name == "quasirandomsearch": if project is not None: return QuasiRandomsearchSolver(project) return QuasiRandomsearchSolver() diff --git a/hyppopy/VisdomViewer.py b/hyppopy/VisdomViewer.py index 95d37d2..d21ee51 100644 --- a/hyppopy/VisdomViewer.py +++ b/hyppopy/VisdomViewer.py @@ -1,126 +1,160 @@ # Hyppopy - A Hyper-Parameter Optimization Toolbox # # Copyright (c) German Cancer Research Center, # Division of Medical Image Computing. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE +__all__ = ['VisdomViewer'] + import warnings import numpy as np from visdom import Visdom -import matplotlib.pyplot as plt def time_formatter(time_s): + """ + Formats time in seconds input to more intuitive form h, min, s or ms, depending on magnitude + :param time_s: [float] time in seconds + :return: + """ if time_s < 0.01: return int(time_s * 1000.0 * 1000) / 1000.0, "ms" elif 100 < time_s < 3600: return int(time_s / 60 * 1000) / 1000.0, "min" elif time_s >= 3600: return int(time_s / 3600 * 1000) / 1000.0, "h" else: return int(time_s * 1000) / 1000.0, "s" class VisdomViewer(object): - + """ + The VisdomViewer class implements the live viewer plots via visdom. When extending implement your plot as methos and + call it in update. Using this class make it necessary starting a visdom server beforehand $ python -m visdom.server + """ def __init__(self, project, port=8097, server="http://localhost"): self._viz = Visdom(port=port, server=server) self._enabled = self._viz.check_connection(timeout_seconds=3) if not self._enabled: warnings.warn("No connection to visdom server established. Visualization cannot be displayed!") self._project = project self._best_win = None self._best_loss = None self._loss_iter_plot = None self._status_report = None self._axis_tags = None self._axis_plots = None def plot_losshistory(self, input_data): + """ + This function plots the loss history loss over iteration + + :param input_data: [dict] trail infos + """ loss = np.array([input_data["loss"]]) iter = np.array([input_data["iterations"]]) if self._loss_iter_plot is None: self._loss_iter_plot = self._viz.line(loss, X=iter, opts=dict( markers=True, markersize=5, dash=np.array(['dashdot']), title="Loss History", xlabel='iteration', ylabel='loss' )) else: self._viz.line(loss, X=iter, win=self._loss_iter_plot, update='append') def plot_hyperparameter(self, input_data): + """ + This function plots each hyperparameter axis + + :param input_data: [dict] trail infos + """ if self._axis_plots is None: self._axis_tags = [] self._axis_plots = {} for item in input_data.keys(): if item == "refresh_time" or item == "book_time" or item == "iterations" or item == "status" or item == "loss": continue self._axis_tags.append(item) for axis in self._axis_tags: xlabel = "value" if isinstance(input_data[axis], str): if self._project.hyperparameter[axis]["domain"] == "categorical": xlabel = '-'.join(self._project.hyperparameter[axis]["data"]) input_data[axis] = self._project.hyperparameter[axis]["data"].index(input_data[axis]) axis_loss = np.array([input_data[axis], input_data["loss"]]).reshape(1, -1) self._axis_plots[axis] = self._viz.scatter(axis_loss, opts=dict( markersize=5, title=axis, xlabel=xlabel, ylabel='loss')) else: for axis in self._axis_tags: if isinstance(input_data[axis], str): if self._project.hyperparameter[axis]["domain"] == "categorical": input_data[axis] = self._project.hyperparameter[axis]["data"].index(input_data[axis]) axis_loss = np.array([input_data[axis], input_data["loss"]]).reshape(1, -1) self._viz.scatter(axis_loss, win=self._axis_plots[axis], update='append') def show_statusreport(self, input_data): + """ + This function prints status report per iteration + + :param input_data: [dict] trail infos + """ duration = input_data['refresh_time'] - input_data['book_time'] duration, time_format = time_formatter(duration.total_seconds()) report = "Iteration {}: {}{} -> {}\n".format(input_data["iterations"], duration, time_format, input_data["status"]) if self._status_report is None: self._status_report = self._viz.text(report) else: self._viz.text(report, win=self._status_report, append=True) def show_best(self, input_data): + """ + Shows best parameter set + + :param input_data: [dict] trail infos + """ if self._best_win is None: self._best_loss = input_data["loss"] txt = "Best Parameter Set: