diff --git a/examples/performance_test.py b/examples/performance_test.py index 26900c0..5041119 100644 --- a/examples/performance_test.py +++ b/examples/performance_test.py @@ -1,350 +1,529 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import shutil import tempfile import numpy as np import pandas as pd import seaborn as sns from sklearn.svm import SVC import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score from sklearn.ensemble import AdaBoostClassifier from sklearn.datasets import load_breast_cancer from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split +from sklearn.ensemble import GradientBoostingClassifier from hyppopy.projectmanager import ProjectManager from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase +from hyppopy.workflows.gradientboost_usecase.gradientboost_usecase import gradientboost_usecase sns.set(style="ticks") sns.set(style="darkgrid") class PerformanceTest(object): - def __init__(self): - self.root = os.path.join(tempfile.gettempdir(), 'test_data') + def __init__(self, root=None): + if root is None: + self.root = os.path.join(tempfile.gettempdir(), 'test_data') + else: + self.root = os.path.join(root, 'test_data') if not os.path.isdir(self.root): os.makedirs(self.root) self.test = None self.train = None self.config = None - self.iter_sequence = [5, 10, 25, 50, 100, 150, 300, 500] + self.iter_sequence = [5, 10, 25, 50, 100, 150, 300, 500, 800, 1200] + self.iter_sequence = [25] def run(self): self.set_up() #self.run_svc_usecase() + #self.run_gradientboost_usecase() self.run_randomforest_usecase() - self.run_adaboost_usecase() - self.run_knc_usecase() + #self.run_adaboost_usecase() + #self.run_knc_usecase() #self.clean_up() def set_hyperparameter(self, params): self.config["hyperparameter"] = params def set_iterations(self, value): self.config["settings"]["solver_plugin"]["max_iterations"] = value - def plot(self, df, name=""): + def find_loss_and_time(self, solver_output, results): + min_idx = solver_output["losses"].idxmin() + results["losses"].append(solver_output["losses"][min_idx]) + results["duration"].append(solver_output["duration"][min_idx]) + + def plot_matrix(self, df, name=""): sns_plot = sns.pairplot(df, height=1.8, aspect=1.8) fig = sns_plot.fig fig.subplots_adjust(top=0.93, wspace=0.3) t = fig.suptitle(name, fontsize=14) plt.show() return sns_plot + def plot(self, df, x, y, name="", save=None, show=True): + fig, axs = plt.subplots(nrows=len(y), ncols=len(x), figsize=(12.0, len(y)*3)) + fig.subplots_adjust(left=0.08, right=0.98, wspace=0.3) + + argmin = df["losses"].idxmin() + + for nx, _x in enumerate(x): + for ny, _y in enumerate(y): + ax = axs[ny, nx] + ax.plot(df[_x].values, df[_y].values, 'o') + ax.plot(df[_x].values[argmin], df[_y].values[argmin], 'ro') + ax.grid(True) + if nx == 0: + ax.set_ylabel(_y) + if ny == len(y)-1: + ax.set_xlabel(_x) + fig.suptitle(name, fontsize=16) + if save is not None: + if not os.path.isdir(os.path.dirname(save)): + os.makedirs(os.path.dirname(save)) + plt.savefig(save) + if show: + plt.show() + + def set_up(self): breast_cancer_data = load_breast_cancer() x = breast_cancer_data.data y = breast_cancer_data.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=23) x_train_fname = os.path.join(self.root, 'x_train.npy') y_train_fname = os.path.join(self.root, 'y_train.npy') if not os.path.isfile(x_train_fname): np.save(x_train_fname, x_train) if not os.path.isfile(y_train_fname): np.save(y_train_fname, y_train) self.train = [x_train, y_train] self.test = [x_test, y_test] self.config = { "hyperparameter": {}, "settings": { "solver_plugin": { - "max_iterations": 50, + "max_iterations": 1, "use_plugin": "hyperopt", "output_dir": os.path.join(self.root, 'test_results') }, "custom": { "data_path": self.root, "data_name": "x_train.npy", "labels_name": "y_train.npy" } }} def run_svc_usecase(self): print("\n") print("*" * 30) print("SVC Classifier") print("*" * 30) print("\n") hp = { "C": { "domain": "uniform", "data": [0.0001, 300.0], "type": "float" }, "kernel": { "domain": "categorical", "data": ["linear", "poly", "rbf"], "type": "str" } } self.set_hyperparameter(hp) - results = {"iterations": [], "C": [], "kernel": [], "accuracy": []} + results = {"iterations": [], "C": [], "kernel": [], "accuracy": [], "losses": [], "duration": []} for n in self.iter_sequence: self.set_iterations(n) ProjectManager.set_config(self.config) uc = svc_usecase() uc.run(save=False) res, best = uc.get_results() clf = SVC(C=best['n_estimators'], kernel=hp['kernel']['data'][best['kernel']]) clf.fit(self.train[0], self.train[1]) train_predictions = clf.predict(self.test[0]) acc = accuracy_score(self.test[1], train_predictions) results['accuracy'].append(acc) results['iterations'].append(n) results['kernel'].append(best['kernel']) results['C'].append(best['C']) + self.find_loss_and_time(res, results) + print("=" * 30) print("Number of iterations: {}".format(n)) print("Classifier: {}".format(clf.__class__.__name__)) print("=" * 30) print("=" * 30) for p in best.items(): print(p[0], ":", p[1]) print("=" * 30) print("Accuracy: {:.4%}".format(acc)) print("=" * 30) print("\n") df = pd.DataFrame.from_dict(results) df.to_csv(os.path.join(self.root, "final_{}.csv".format(clf.__class__.__name__))) - sns_plot = self.plot(df, name="Classifier: {}".format(clf.__class__.__name__)) - sns_plot.savefig(os.path.join(self.root, "final_{}.png".format(clf.__class__.__name__))) + self.plot(df, x=["iterations", "losses"], + y=['accuracy', 'duration'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final1_{}.png".format(clf.__class__.__name__)), + show=False) + self.plot(df, x=["iterations", "losses"], + y=['n_estimators', 'max_depth', 'max_features'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final2_{}.png".format(clf.__class__.__name__)), + show=False) def run_randomforest_usecase(self): print("\n") print("*" * 30) print("RandomForest Classifier") print("*" * 30) print("\n") hp = { "n_estimators": { "domain": "uniform", "data": [3, 500], "type": "int" }, "max_depth": { "domain": "uniform", "data": [1, 50], "type": "int" }, "max_features": { "domain": "categorical", "data": ["auto", "sqrt", "log2"], "type": "str" } } self.set_hyperparameter(hp) - results = {"iterations": [], "n_estimators": [], "max_depth": [], "max_features": [], "accuracy": []} + results = {"iterations": [], "n_estimators": [], "max_depth": [], "max_features": [], "accuracy": [], "losses": [], "duration": []} for n in self.iter_sequence: - self.set_iterations(n) - ProjectManager.set_config(self.config) - uc = randomforest_usecase() - uc.run(save=False) - res, best = uc.get_results() - clf = RandomForestClassifier(n_estimators=best['n_estimators'], - max_depth=best['max_depth'], - max_features=hp['max_features']['data'][best['max_features']]) - clf.fit(self.train[0], self.train[1]) - train_predictions = clf.predict(self.test[0]) - acc = accuracy_score(self.test[1], train_predictions) - - results['accuracy'].append(acc) - results['iterations'].append(n) - results['n_estimators'].append(best['n_estimators']) - results['max_depth'].append(best['max_depth']) - results['max_features'].append(best['max_features']) - - print("=" * 30) - print("Number of iterations: {}".format(n)) - print("Classifier: {}".format(clf.__class__.__name__)) - print("=" * 30) - print("=" * 30) - for p in best.items(): - print(p[0], ":", p[1]) - print("=" * 30) - print("Accuracy: {:.4%}".format(acc)) - print("=" * 30) - print("\n") + try: + self.set_iterations(n) + ProjectManager.set_config(self.config) + uc = randomforest_usecase() + uc.run(save=False) + res, best = uc.get_results() + clf = RandomForestClassifier(n_estimators=best['n_estimators'], + max_depth=best['max_depth'], + max_features=hp['max_features']['data'][best['max_features']]) + clf.fit(self.train[0], self.train[1]) + train_predictions = clf.predict(self.test[0]) + acc = accuracy_score(self.test[1], train_predictions) + + results['accuracy'].append(acc) + results['iterations'].append(n) + results['n_estimators'].append(best['n_estimators']) + results['max_depth'].append(best['max_depth']) + results['max_features'].append(best['max_features']) + + self.find_loss_and_time(res, results) + + print("=" * 30) + print("Number of iterations: {}".format(n)) + print("Classifier: {}".format(clf.__class__.__name__)) + print("=" * 30) + print("=" * 30) + for p in best.items(): + print(p[0], ":", p[1]) + print("=" * 30) + print("Accuracy: {:.4%}".format(acc)) + print("=" * 30) + print("\n") + except Exception as e: + print("Failed at iteration step {}, reason: {}".format(n, e)) df = pd.DataFrame.from_dict(results) df.to_csv(os.path.join(self.root, "final_{}.csv".format(clf.__class__.__name__))) - sns_plot = self.plot(df, name="Classifier: {}".format(clf.__class__.__name__)) - sns_plot.savefig(os.path.join(self.root, "final_{}.png".format(clf.__class__.__name__))) + self.plot(df, x=["iterations", "losses"], + y=['accuracy', 'duration'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final1_{}.png".format(clf.__class__.__name__)), + show=False) + self.plot(df, x=["iterations", "losses"], + y=['n_estimators', 'max_depth', 'max_features'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final2_{}.png".format(clf.__class__.__name__)), + show=False) def run_adaboost_usecase(self): print("\n") print("*"*30) print("AdaBoost Classifier") print("*"*30) print("\n") hp = { "n_estimators": { "domain": "uniform", "data": [1, 500], "type": "int" }, "learning_rate": { "domain": "uniform", "data": [0.001, 10], "type": "float" } } self.set_hyperparameter(hp) - results = {"iterations": [], "n_estimators": [], "learning_rate": [], "accuracy": []} + results = {"iterations": [], "n_estimators": [], "learning_rate": [], "accuracy": [], "losses": [], "duration": []} for n in self.iter_sequence: - self.set_iterations(n) - ProjectManager.set_config(self.config) - uc = adaboost_usecase() - uc.run(save=False) - res, best = uc.get_results() - clf = AdaBoostClassifier(n_estimators=best['n_estimators'], - learning_rate=best['learning_rate']) - clf.fit(self.train[0], self.train[1]) - train_predictions = clf.predict(self.test[0]) - acc = accuracy_score(self.test[1], train_predictions) - - results['accuracy'].append(acc) - results['iterations'].append(n) - results['n_estimators'].append(best['n_estimators']) - results['learning_rate'].append(best['learning_rate']) - - print("=" * 30) - print("Number of iterations: {}".format(n)) - print("Classifier: {}".format(clf.__class__.__name__)) - print("=" * 30) - print("=" * 30) - for p in best.items(): - print(p[0], ":", p[1]) - print("=" * 30) - print("Accuracy: {:.4%}".format(acc)) - print("=" * 30) - print("\n") + try: + self.set_iterations(n) + ProjectManager.set_config(self.config) + uc = adaboost_usecase() + uc.run(save=False) + res, best = uc.get_results() + clf = AdaBoostClassifier(n_estimators=best['n_estimators'], + learning_rate=best['learning_rate']) + clf.fit(self.train[0], self.train[1]) + train_predictions = clf.predict(self.test[0]) + acc = accuracy_score(self.test[1], train_predictions) + + results['accuracy'].append(acc) + results['iterations'].append(n) + results['n_estimators'].append(best['n_estimators']) + results['learning_rate'].append(best['learning_rate']) + + self.find_loss_and_time(res, results) + + print("=" * 30) + print("Number of iterations: {}".format(n)) + print("Classifier: {}".format(clf.__class__.__name__)) + print("=" * 30) + print("=" * 30) + for p in best.items(): + print(p[0], ":", p[1]) + print("=" * 30) + print("Accuracy: {:.4%}".format(acc)) + print("=" * 30) + print("\n") + except Exception as e: + print("Failed at iteration step {}, reason: {}".format(n, e)) df = pd.DataFrame.from_dict(results) df.to_csv(os.path.join(self.root, "final_{}.csv".format(clf.__class__.__name__))) - sns_plot = self.plot(df, name="Classifier: {}".format(clf.__class__.__name__)) - sns_plot.savefig(os.path.join(self.root, "final_{}.png".format(clf.__class__.__name__))) + self.plot(df, x=["iterations", "losses"], + y=['accuracy', 'duration'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final1_{}.png".format(clf.__class__.__name__)), + show=False) + self.plot(df, x=["iterations", "losses"], + y=['n_estimators', 'learning_rate'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final2_{}.png".format(clf.__class__.__name__)), + show=False) def run_knc_usecase(self): print("\n") print("*" * 30) print("KN Classifier") print("*" * 30) print("\n") hp = { "n_neighbors": { "domain": "uniform", "data": [1, 100], "type": "int" }, "weights": { "domain": "categorical", "data": ["uniform", "distance"], "type": "str" }, "algorithm": { "domain": "categorical", "data": ["auto", "ball_tree", "kd_tree", "brute"], "type": "str" } } self.set_hyperparameter(hp) - results = {"iterations": [], "n_neighbors": [], "weights": [], "algorithm": [], "accuracy": []} + results = {"iterations": [], "n_neighbors": [], "weights": [], "algorithm": [], "accuracy": [], "losses": [], "duration": []} for n in self.iter_sequence: - self.set_iterations(n) - ProjectManager.set_config(self.config) - uc = knc_usecase() - uc.run(save=False) - res, best = uc.get_results() - clf = KNeighborsClassifier(n_neighbors=best['n_neighbors'], - weights=hp["weights"]["data"][best['weights']], - algorithm=hp["algorithm"]["data"][best['algorithm']]) - clf.fit(self.train[0], self.train[1]) - train_predictions = clf.predict(self.test[0]) - acc = accuracy_score(self.test[1], train_predictions) + try: + self.set_iterations(n) + ProjectManager.set_config(self.config) + uc = knc_usecase() + uc.run(save=False) + res, best = uc.get_results() + clf = KNeighborsClassifier(n_neighbors=best['n_neighbors'], + weights=hp["weights"]["data"][best['weights']], + algorithm=hp["algorithm"]["data"][best['algorithm']]) + clf.fit(self.train[0], self.train[1]) + train_predictions = clf.predict(self.test[0]) + acc = accuracy_score(self.test[1], train_predictions) + + results['accuracy'].append(acc) + results['iterations'].append(n) + results['n_neighbors'].append(best['n_neighbors']) + results['weights'].append(best['weights']) + results['algorithm'].append(best['algorithm']) + + self.find_loss_and_time(res, results) + + print("=" * 30) + print("Number of iterations: {}".format(n)) + print("Classifier: {}".format(clf.__class__.__name__)) + print("=" * 30) + print("=" * 30) + for p in best.items(): + print(p[0], ":", p[1]) + print("=" * 30) + print("Accuracy: {:.4%}".format(acc)) + print("=" * 30) + print("\n") + except Exception as e: + print("Failed at iteration step {}, reason: {}".format(n, e)) - results['accuracy'].append(acc) - results['iterations'].append(n) - results['n_neighbors'].append(best['n_neighbors']) - results['weights'].append(best['weights']) - results['algorithm'].append(best['algorithm']) + df = pd.DataFrame.from_dict(results) + df.to_csv(os.path.join(self.root, "final_{}.csv".format(clf.__class__.__name__))) + self.plot(df, x=["iterations", "losses"], + y=['accuracy', 'duration'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final1_{}.png".format(clf.__class__.__name__)), + show=False) + self.plot(df, x=["iterations", "losses"], + y=['n_neighbors', 'weights', 'algorithm'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final2_{}.png".format(clf.__class__.__name__)), + show=False) + + def run_gradientboost_usecase(self): + print("\n") + print("*" * 30) + print("GradientBoost Classifier") + print("*" * 30) + print("\n") + hp = { + "n_estimators": { + "domain": "uniform", + "data": [3, 500], + "type": "int" + }, + "learning_rate": { + "domain": "uniform", + "data": [0.001, 10], + "type": "float" + }, + "min_samples_split": { + "domain": "uniform", + "data": [0.0, 1.0], + "type": "float" + }, + "min_samples_leaf": { + "domain": "uniform", + "data": [0.00001, 0.5], + "type": "float" + }, + "max_depth": { + "domain": "uniform", + "data": [1, 50], + "type": "int" + } + } - print("=" * 30) - print("Number of iterations: {}".format(n)) - print("Classifier: {}".format(clf.__class__.__name__)) - print("=" * 30) - print("=" * 30) - for p in best.items(): - print(p[0], ":", p[1]) - print("=" * 30) - print("Accuracy: {:.4%}".format(acc)) - print("=" * 30) - print("\n") + self.set_hyperparameter(hp) + + results = {"iterations": [], "n_estimators": [], "max_depth": [], + "learning_rate": [], "min_samples_split": [], "min_samples_leaf": [], + "accuracy": [], "losses": [], "duration": []} + for n in self.iter_sequence: + try: + self.set_iterations(n) + ProjectManager.set_config(self.config) + uc = gradientboost_usecase() + uc.run(save=False) + res, best = uc.get_results() + clf = GradientBoostingClassifier(n_estimators=best['n_estimators'], + max_depth=best['max_depth'], + learning_rate=best['learning_rate'], + min_samples_split=best['min_samples_split'], + min_samples_leaf=best['min_samples_leaf']) + clf.fit(self.train[0], self.train[1]) + train_predictions = clf.predict(self.test[0]) + acc = accuracy_score(self.test[1], train_predictions) + + results['accuracy'].append(acc) + results['iterations'].append(n) + results['n_estimators'].append(best['n_estimators']) + results['max_depth'].append(best['max_depth']) + results['learning_rate'].append(best['learning_rate']) + results['min_samples_split'].append(best['min_samples_split']) + results['min_samples_leaf'].append(best['min_samples_leaf']) + + self.find_loss_and_time(res, results) + + print("=" * 30) + print("Number of iterations: {}".format(n)) + print("Classifier: {}".format(clf.__class__.__name__)) + print("=" * 30) + print("=" * 30) + for p in best.items(): + print(p[0], ":", p[1]) + print("=" * 30) + print("Accuracy: {:.4%}".format(acc)) + print("=" * 30) + print("\n") + except Exception as e: + print("Failed at iteration step {}, reason: {}".format(n, e)) df = pd.DataFrame.from_dict(results) df.to_csv(os.path.join(self.root, "final_{}.csv".format(clf.__class__.__name__))) - sns_plot = self.plot(df, name="Classifier: {}".format(clf.__class__.__name__)) - sns_plot.savefig(os.path.join(self.root, "final_{}.png".format(clf.__class__.__name__))) + self.plot(df, x=["iterations", "losses"], + y=['accuracy', 'duration'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final1_{}.png".format(clf.__class__.__name__)), + show=False) + self.plot(df, x=["iterations", "losses"], + y=['n_estimators', 'max_depth', 'learning_rate', 'min_samples_split', 'min_samples_leaf'], + name="Classifier: {}".format(clf.__class__.__name__), + save=os.path.join(self.root, "final2_{}.png".format(clf.__class__.__name__)), + show=False) + def clean_up(self): if os.path.isdir(self.root): shutil.rmtree(self.root) if __name__ == "__main__": - performance_test = PerformanceTest() + performance_test = PerformanceTest(root="C:/Users/s635r/Desktop") performance_test.run() diff --git a/hyppopy/plugins/gridsearch_settings_plugin.py b/hyppopy/plugins/gridsearch_settings_plugin.py new file mode 100644 index 0000000..6fb7062 --- /dev/null +++ b/hyppopy/plugins/gridsearch_settings_plugin.py @@ -0,0 +1,143 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import logging +import numpy as np +from pprint import pformat +from hyppopy.globals import DEBUGLEVEL +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + +from yapsy.IPlugin import IPlugin + + +from hyppopy.settingspluginbase import SettingsPluginBase +from hyppopy.settingsparticle import split_categorical +from hyppopy.settingsparticle import SettingsParticle + + +def gaussian(x, mu, sigma): + return 1.0/(sigma * np.sqrt(2*np.pi))*np.exp(-(x-mu)**2/(2*sigma**2)) + + +def gaussian_axis_sampling(a, b, N): + center = a + (b - a) / 2.0 + delta = (b - a) / N + bn = b - center + xn = np.arange(0, bn, delta) + dn = [] + for x in xn: + dn.append(1/gaussian(x, 0, bn/2.5)) + dn = np.array(dn) + dn /= np.sum(dn) + dn *= bn + + axis = [0] + for x in dn: + axis.append(x+axis[-1]) + axis.insert(0, -axis[-1]) + axis = np.array(axis) + axis += center + return axis + + +def log_axis_sampling(a, b, N): + delta = (b - a) / N + logrange = np.arange(a, b + delta, delta) + for n in range(logrange.shape[0]): + logrange[n] = np.exp(logrange[n]) + return logrange + + +def sample(start, stop, count, ftype="uniform"): + assert stop > start, "Precondition Violation, stop <= start not allowed!" + assert count > 0, "Precondition Violation, N <= 0 not allowed!" + if ftype == 'uniform': + delta = (stop - start)/count + return np.arange(start, stop + delta, delta) + elif ftype == 'loguniform': + return log_axis_sampling(start, stop, count) + elif ftype == 'normal': + return gaussian_axis_sampling(start, stop, count) + raise IOError("Precondition Violation, unknown sampling function type!") + + +class gridsearch_Settings(SettingsPluginBase, IPlugin): + + def __init__(self): + SettingsPluginBase.__init__(self) + LOG.debug("initialized") + + def convert_parameter(self, input_dict): + LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) + + solution_space = {} + # split input in categorical and non-categorical data + cat, uni = split_categorical(input_dict) + # build up dictionary keeping all non-categorical data + uniforms = {} + for name, content in uni.items(): + particle = gridsearch_SettingsParticle(name=name) + for key, value in content.items(): + if key == 'domain': + particle.domain = value + elif key == 'data': + particle.data = value + elif key == 'type': + particle.dtype = value + uniforms[name] = particle.get() + + # build nested categorical structure + inner_level = uniforms + for key, value in cat.items(): + tmp = {} + tmp2 = {} + for key2, value2 in value.items(): + if key2 == 'data': + for elem in value2: + tmp[elem] = inner_level + tmp2[key] = tmp + inner_level = tmp2 + solution_space = tmp2 + return solution_space + + +class gridsearch_SettingsParticle(SettingsParticle): + + def __init__(self, name=None, domain=None, dtype=None, data=None): + SettingsParticle.__init__(self, name, domain, dtype, data) + + def convert(self): + assert isinstance(self.data, list), "Precondition Violation, invalid input type for data!" + if self.domain == "categorical": + return self.data + else: + assert len(self.data) >= 2, "Precondition Violation, invalid input data!" + if len(self.data) < 3: + self.data.append(10) + LOG.warning("Grid sampling has set number of samples automatically to 10!") + print("WARNING: Grid sampling has set number of samples automatically to 10!") + + samples = sample(start=self.data[0], stop=self.data[1], count=self.data[2], ftype=self.domain) + if self.dtype == "int": + data = [] + for s in samples: + val = int(np.round(s)) + if len(data) > 0: + if val == data[-1]: continue + data.append(val) + return data + return list(samples) diff --git a/hyppopy/plugins/gridsearch_settings_plugin.yapsy-plugin b/hyppopy/plugins/gridsearch_settings_plugin.yapsy-plugin new file mode 100644 index 0000000..9981474 --- /dev/null +++ b/hyppopy/plugins/gridsearch_settings_plugin.yapsy-plugin @@ -0,0 +1,9 @@ +[Core] +Name = gridsearch +Module = gridsearch_settings_plugin + +[Documentation] +Author = Sven Wanner +Version = 0.1 +Website = +Description = GridSearch Settings Plugin \ No newline at end of file diff --git a/hyppopy/plugins/gridsearch_solver_plugin.py b/hyppopy/plugins/gridsearch_solver_plugin.py new file mode 100644 index 0000000..91dda89 --- /dev/null +++ b/hyppopy/plugins/gridsearch_solver_plugin.py @@ -0,0 +1,84 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import logging +from hyppopy.globals import DEBUGLEVEL +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + +from pprint import pformat +from yapsy.IPlugin import IPlugin +from sklearn.model_selection import GridSearchCV + +from hyppopy.projectmanager import ProjectManager +from hyppopy.solverpluginbase import SolverPluginBase + + +class gridsearch_Solver(SolverPluginBase, IPlugin): + trials = None + best = None + + def __init__(self): + SolverPluginBase.__init__(self) + LOG.debug("initialized") + + def blackbox_function(self, params): + pass + # status = STATUS_FAIL + # try: + # loss = self.blackbox_function_template(self.data, params) + # if loss is not None: + # status = STATUS_OK + # except Exception as e: + # LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e)) + # status = STATUS_FAIL + # return {'loss': loss, 'status': status} + + def execute_solver(self, parameter): + pass + # LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter))) + # self.trials = Trials() + # + # try: + # self.best = fmin(fn=self.blackbox_function, + # space=parameter, + # algo=tpe.suggest, + # max_evals=ProjectManager.max_iterations, + # trials=self.trials) + # except Exception as e: + # msg = "internal error in hyperopt.fmin occured. {}".format(e) + # LOG.error(msg) + # raise BrokenPipeError(msg) + + def convert_results(self): + pass + # currently converting results in a way that this function returns a dict + # keeping all useful parameter as key/list item. This will be automatically + # converted to a pandas dataframe in the solver class + # results = {'duration': [], 'losses': []} + # pset = self.trials.trials[0]['misc']['vals'] + # for p in pset.keys(): + # results[p] = [] + # + # for n, trial in enumerate(self.trials.trials): + # t1 = trial['book_time'] + # t2 = trial['refresh_time'] + # results['duration'].append((t2 - t1).microseconds/1000.0) + # results['losses'].append(trial['result']['loss']) + # pset = trial['misc']['vals'] + # for p in pset.items(): + # results[p[0]].append(p[1][0]) + # return results, self.best diff --git a/hyppopy/plugins/gridsearch_solver_plugin.yapsy-plugin b/hyppopy/plugins/gridsearch_solver_plugin.yapsy-plugin new file mode 100644 index 0000000..efef3f4 --- /dev/null +++ b/hyppopy/plugins/gridsearch_solver_plugin.yapsy-plugin @@ -0,0 +1,9 @@ +[Core] +Name = gridsearch +Module = gridsearch_solver_plugin + +[Documentation] +Author = Sven Wanner +Version = 0.1 +Website = +Description = GridSearch Solver Plugin \ No newline at end of file diff --git a/hyppopy/plugins/optunity_settings_plugin.py b/hyppopy/plugins/optunity_settings_plugin.py index 4a9a9d6..6cce92b 100644 --- a/hyppopy/plugins/optunity_settings_plugin.py +++ b/hyppopy/plugins/optunity_settings_plugin.py @@ -1,117 +1,104 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from pprint import pformat try: import optunity from yapsy.IPlugin import IPlugin except: LOG.warning("optunity package not installed, will ignore this plugin!") print("optunity package not installed, will ignore this plugin!") from hyppopy.settingspluginbase import SettingsPluginBase -from hyppopy.settingsparticle import SettingsParticle +from hyppopy.settingsparticle import split_categorical class optunity_Settings(SettingsPluginBase, IPlugin): def __init__(self): SettingsPluginBase.__init__(self) LOG.debug("initialized") def convert_parameter(self, input_dict): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) - # define function spliting input dict - # into categorical and non-categorical - def split_categorical(pdict): - categorical = {} - uniform = {} - for name, pset in pdict.items(): - for key, value in pset.items(): - if key == 'domain' and value == 'categorical': - categorical[name] = pset - elif key == 'domain': - uniform[name] = pset - return categorical, uniform - solution_space = {} # split input in categorical and non-categorical data cat, uni = split_categorical(input_dict) # build up dictionary keeping all non-categorical data uniforms = {} for key, value in uni.items(): for key2, value2 in value.items(): if key2 == 'data': uniforms[key] = value2 # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} tmp2 = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level tmp2[key] = tmp inner_level = tmp2 solution_space = tmp2 return solution_space # class optunity_SettingsParticle(SettingsParticle): # # def __init__(self, name=None, domain=None, dtype=None, data=None): # SettingsParticle.__init__(self, name, domain, dtype, data) # # def convert(self): # if self.domain == "uniform": # if self.dtype == "float" or self.dtype == "double": # pass # elif self.dtype == "int": # pass # else: # msg = f"cannot convert the type {self.dtype} in domain {self.domain}" # LOG.error(msg) # raise LookupError(msg) # elif self.domain == "loguniform": # if self.dtype == "float" or self.dtype == "double": # pass # else: # msg = f"cannot convert the type {self.dtype} in domain {self.domain}" # LOG.error(msg) # raise LookupError(msg) # elif self.domain == "normal": # if self.dtype == "float" or self.dtype == "double": # pass # else: # msg = f"cannot convert the type {self.dtype} in domain {self.domain}" # LOG.error(msg) # raise LookupError(msg) # elif self.domain == "categorical": # if self.dtype == 'str': # pass # elif self.dtype == 'bool': # pass diff --git a/hyppopy/plugins/randomsearch_settings_plugin.py b/hyppopy/plugins/randomsearch_settings_plugin.py new file mode 100644 index 0000000..782552b --- /dev/null +++ b/hyppopy/plugins/randomsearch_settings_plugin.py @@ -0,0 +1,100 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import logging +import numpy as np +from hyppopy.globals import DEBUGLEVEL +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + +from pprint import pformat +from yapsy.IPlugin import IPlugin + + +from hyppopy.settingspluginbase import SettingsPluginBase +from hyppopy.settingsparticle import SettingsParticle + + +class randomsearch_Settings(SettingsPluginBase, IPlugin): + + def __init__(self): + SettingsPluginBase.__init__(self) + LOG.debug("initialized") + + def convert_parameter(self, input_dict): + pass + # LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) + # + # solution_space = {} + # for name, content in input_dict.items(): + # particle = hyperopt_SettingsParticle(name=name) + # for key, value in content.items(): + # if key == 'domain': + # particle.domain = value + # elif key == 'data': + # particle.data = value + # elif key == 'type': + # particle.dtype = value + # solution_space[name] = particle.get() + # return solution_space + + +class randomsearch_SettingsParticle(SettingsParticle): + + def __init__(self, name=None, domain=None, dtype=None, data=None): + SettingsParticle.__init__(self, name, domain, dtype, data) + + def convert(self): + pass + # if self.domain == "uniform": + # if self.dtype == "float" or self.dtype == "double": + # return hp.uniform(self.name, self.data[0], self.data[1]) + # elif self.dtype == "int": + # data = list(np.arange(int(self.data[0]), int(self.data[1]+1))) + # return hp.choice(self.name, data) + # else: + # msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain) + # LOG.error(msg) + # raise LookupError(msg) + # elif self.domain == "loguniform": + # if self.dtype == "float" or self.dtype == "double": + # return hp.loguniform(self.name, self.data[0], self.data[1]) + # else: + # msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain) + # LOG.error(msg) + # raise LookupError(msg) + # elif self.domain == "normal": + # if self.dtype == "float" or self.dtype == "double": + # return hp.normal(self.name, self.data[0], self.data[1]) + # else: + # msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain) + # LOG.error(msg) + # raise LookupError(msg) + # elif self.domain == "categorical": + # if self.dtype == 'str': + # return hp.choice(self.name, self.data) + # elif self.dtype == 'bool': + # data = [] + # for elem in self.data: + # if elem == "true" or elem == "True" or elem == 1 or elem == "1": + # data .append(True) + # elif elem == "false" or elem == "False" or elem == 0 or elem == "0": + # data .append(False) + # else: + # msg = "cannot convert the type {} in domain {}, unknown bool type value".format(self.dtype, self.domain) + # LOG.error(msg) + # raise LookupError(msg) + # return hp.choice(self.name, data) diff --git a/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin b/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin new file mode 100644 index 0000000..27d25fd --- /dev/null +++ b/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin @@ -0,0 +1,9 @@ +[Core] +Name = randomsearch +Module = randomsearch_settings_plugin + +[Documentation] +Author = Sven Wanner +Version = 0.1 +Website = https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html +Description = RandomSearch Settings Plugin \ No newline at end of file diff --git a/hyppopy/plugins/randomsearch_solver_plugin.py b/hyppopy/plugins/randomsearch_solver_plugin.py new file mode 100644 index 0000000..03b3f41 --- /dev/null +++ b/hyppopy/plugins/randomsearch_solver_plugin.py @@ -0,0 +1,84 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import logging +from hyppopy.globals import DEBUGLEVEL +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + +from pprint import pformat +from yapsy.IPlugin import IPlugin + + +from hyppopy.projectmanager import ProjectManager +from hyppopy.solverpluginbase import SolverPluginBase + + +class randomsearch_Solver(SolverPluginBase, IPlugin): + trials = None + best = None + + def __init__(self): + SolverPluginBase.__init__(self) + LOG.debug("initialized") + + def blackbox_function(self, params): + pass + # status = STATUS_FAIL + # try: + # loss = self.blackbox_function_template(self.data, params) + # if loss is not None: + # status = STATUS_OK + # except Exception as e: + # LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e)) + # status = STATUS_FAIL + # return {'loss': loss, 'status': status} + + def execute_solver(self, parameter): + pass + # LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter))) + # self.trials = Trials() + # + # try: + # self.best = fmin(fn=self.blackbox_function, + # space=parameter, + # algo=tpe.suggest, + # max_evals=ProjectManager.max_iterations, + # trials=self.trials) + # except Exception as e: + # msg = "internal error in hyperopt.fmin occured. {}".format(e) + # LOG.error(msg) + # raise BrokenPipeError(msg) + + def convert_results(self): + pass + # currently converting results in a way that this function returns a dict + # keeping all useful parameter as key/list item. This will be automatically + # converted to a pandas dataframe in the solver class + # results = {'duration': [], 'losses': []} + # pset = self.trials.trials[0]['misc']['vals'] + # for p in pset.keys(): + # results[p] = [] + # + # for n, trial in enumerate(self.trials.trials): + # t1 = trial['book_time'] + # t2 = trial['refresh_time'] + # results['duration'].append((t2 - t1).microseconds/1000.0) + # results['losses'].append(trial['result']['loss']) + # pset = trial['misc']['vals'] + # for p in pset.items(): + # results[p[0]].append(p[1][0]) + # return results, self.best diff --git a/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin b/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin new file mode 100644 index 0000000..e465d93 --- /dev/null +++ b/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin @@ -0,0 +1,9 @@ +[Core] +Name = randomsearch +Module = randomsearch_solver_plugin + +[Documentation] +Author = Sven Wanner +Version = 0.1 +Website = https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html +Description = RandomSearch Solver Plugin \ No newline at end of file diff --git a/hyppopy/settingsparticle.py b/hyppopy/settingsparticle.py index fc4c5cf..6d82b48 100644 --- a/hyppopy/settingsparticle.py +++ b/hyppopy/settingsparticle.py @@ -1,90 +1,104 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import abc import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) +# define function spliting input dict +# into categorical and non-categorical +def split_categorical(pdict): + categorical = {} + uniform = {} + for name, pset in pdict.items(): + for key, value in pset.items(): + if key == 'domain' and value == 'categorical': + categorical[name] = pset + elif key == 'domain': + uniform[name] = pset + return categorical, uniform + + class SettingsParticle(object): domains = ["uniform", "loguniform", "normal", "categorical"] _name = None _domain = None _dtype = None _data = None def __init__(self, name=None, domain=None, dtype=None, data=None): if name is not None: self.name = name if domain is not None: self.domain = domain if dtype is not None: self.dtype = dtype if data is not None: self.data = data @abc.abstractmethod def convert(self): raise NotImplementedError("the user has to implement this function") def get(self): msg = None if self.name is None: msg = "cannot convert unnamed parameter" if self.domain is None: msg = "cannot convert parameter of empty domain" if self.dtype is None: msg = "cannot convert parameter with unknown dtype" if self.data is None: msg = "cannot convert parameter having no data" if msg is not None: LOG.error(msg) raise LookupError(msg) return self.convert() @property def name(self): return self._name @name.setter def name(self, value): self._name = value @property def domain(self): return self._domain @domain.setter def domain(self, value): if not value in self.domains: msg = "domain named {} not available, check your domain name or implement new domain!".format(value) LOG.error(msg) raise LookupError(msg) self._domain = value @property def dtype(self): return self._dtype @dtype.setter def dtype(self, value): self._dtype = value @property def data(self): return self._data @data.setter def data(self, value): self._data = value diff --git a/hyppopy/settingspluginbase.py b/hyppopy/settingspluginbase.py index 8bb244e..6d5d995 100644 --- a/hyppopy/settingspluginbase.py +++ b/hyppopy/settingspluginbase.py @@ -1,97 +1,97 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import abc import os import copy import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from hyppopy.deepdict import DeepDict class SettingsPluginBase(object): _data = None _name = None def __init__(self): self._data = {} @abc.abstractmethod - def convert_parameter(self): + def convert_parameter(self, input_dict): raise NotImplementedError('users must define convert_parameter to use this base class') def get_hyperparameter(self): return self.convert_parameter(self.data) def set_hyperparameter(self, input_data): self.data.clear() self.data = copy.deepcopy(input_data) def get_type_of(self, name): if not name in self.data: msg = "hyperparameter named {} not found!".format(name) LOG.error(msg) raise LookupError(msg) return self.data[name]["type"] def get_domain_of(self, name): if not name in self.data: msg = "hyperparameter named {} not found!".format(name) LOG.error(msg) raise LookupError(msg) return self.data[name]["domain"] def get_data_of(self, name): if not name in self.data: msg = "hyperparameter named {} not found!".format(name) LOG.error(msg) raise LookupError(msg) return self.data[name]["data"] def read(self, fname): self.data.clear() self.data.from_file(fname) def write(self, fname): self.data.to_file(fname) @property def data(self): return self._data @data.setter def data(self, value): if isinstance(value, dict): self._data = value elif isinstance(value, DeepDict): self._data = value.data else: raise IOError("unexpected input type({}) for data, needs to be of type dict or DeepDict!".format(type(value))) @property def name(self): return self._name @name.setter def name(self, value): if not isinstance(value, str): LOG.error("Invalid input, str type expected for value, got {} instead".format(type(value))) raise IOError("Invalid input, str type expected for value, got {} instead".format(type(value))) self._name = value diff --git a/hyppopy/tests/test_settings_plugins.py b/hyppopy/tests/test_settings_plugins.py new file mode 100644 index 0000000..a4f2ba4 --- /dev/null +++ b/hyppopy/tests/test_settings_plugins.py @@ -0,0 +1,110 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import unittest + +from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_SettingsParticle +from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_Settings + +class ProjectManagerTestSuite(unittest.TestCase): + + def setUp(self): + self.hp = { + 'UniformFloat': { + 'domain': 'uniform', + 'data': [0, 1, 10], + 'type': 'float', + }, + 'UniformInt': { + 'domain': 'uniform', + 'data': [0, 7, 10], + 'type': 'int', + }, + 'NormalFloat': { + 'domain': 'normal', + 'data': [0, 1, 10], + 'type': 'float', + }, + 'NormalInt': { + 'domain': 'normal', + 'data': [0, 10, 10], + 'type': 'int', + }, + 'LogFloat': { + 'domain': 'loguniform', + 'data': [-5, 5, 10], + 'type': 'float', + }, + 'LogFloat': { + 'domain': 'loguniform', + 'data': [-5, 5, 10], + 'type': 'float', + }, + 'LogInt': { + 'domain': 'loguniform', + 'data': [0, 6, 10], + 'type': 'int', + }, + 'CategoricalStr': { + 'domain': 'categorical', + 'data': ['a', 'b'], + 'type': 'str', + }, + 'CategoricalInt': { + 'domain': 'categorical', + 'data': [0, 1], + 'type': 'int', + } + } + + self.truth = { + 'UniformFloat': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], + 'UniformInt': [0, 1, 2, 3, 4, 5, 6, 7, 8], + 'NormalFloat': [0.0, 0.2592443381276233, 0.3673134565097225, 0.4251586871937128, 0.4649150940720099, 0.5, + 0.5350849059279901, 0.5748413128062873, 0.6326865434902775, 0.7407556618723767, 1.0], + 'NormalInt': [0, 3, 4, 5, 6, 7, 10], + 'LogFloat': [0.006737946999085467, 0.01831563888873418, 0.049787068367863944, 0.1353352832366127, 0.36787944117144233, + 1.0, 2.718281828459045, 7.38905609893065, 20.085536923187668, 54.598150033144236, 148.4131591025766], + 'LogInt': [1, 2, 3, 6, 11, 20, 37, 67, 122, 221, 403], + 'CategoricalStr': ['a', 'b'], + 'CategoricalInt': [0, 1] + } + + + def test_gridsearch_settings(self): + gss = gridsearch_Settings() + gss.set_hyperparameter(self.hp) + res = gss.get_hyperparameter() + # TODO check... + + + def test_gridsearch_particle(self): + for name, data in self.hp.items(): + gsp = gridsearch_SettingsParticle(name=name, + domain=data['domain'], + dtype=data['dtype'], + data=data['data']) + data = gsp.get() + for n in range(len(self.truth[name])): + self.assertAlmostEqual(data[n], self.truth[name][n]) + + + def tearDown(self): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/hyppopy/workflows/gradientboost_usecase/__init__.py b/hyppopy/workflows/gradientboost_usecase/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hyppopy/workflows/gradientboost_usecase/gradientboost_usecase.py b/hyppopy/workflows/gradientboost_usecase/gradientboost_usecase.py new file mode 100644 index 0000000..65d8a7c --- /dev/null +++ b/hyppopy/workflows/gradientboost_usecase/gradientboost_usecase.py @@ -0,0 +1,36 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + + +from sklearn.ensemble import GradientBoostingClassifier +from sklearn.model_selection import cross_val_score + +from hyppopy.projectmanager import ProjectManager +from hyppopy.workflows.workflowbase import WorkflowBase +from hyppopy.workflows.dataloader.simpleloader import SimpleDataLoader + + +class gradientboost_usecase(WorkflowBase): + + def setup(self, **kwargs): + dl = SimpleDataLoader() + dl.start(path=ProjectManager.data_path, + data_name=ProjectManager.data_name, + labels_name=ProjectManager.labels_name) + self.solver.set_data(dl.data) + + def blackbox_function(self, data, params): + clf = GradientBoostingClassifier(**params) + return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean()