diff --git a/__main__.py b/__main__.py
index 552b78d..dae4064 100644
--- a/__main__.py
+++ b/__main__.py
@@ -1,95 +1,95 @@
#!/usr/bin/env python
#
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import sys
ROOT = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
sys.path.append(ROOT)
from hyppopy.projectmanager import ProjectManager
from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase
from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase
-from hyppopy.workflows.lda_usecase.lda_usecase import lda_usecase
+from hyppopy.workflows.lda_usecase.adaboost_usecase import lda_usecase
from hyppopy.workflows.unet_usecase.unet_usecase import unet_usecase
from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase
from hyppopy.workflows.imageregistration_usecase.imageregistration_usecase import imageregistration_usecase
import os
import sys
import time
import argparse
def print_warning(msg):
print("\n!!!!! WARNING !!!!!")
print(msg)
sys.exit()
def args_check(args):
if not args.workflow:
print_warning("No workflow specified, check --help")
if not args.config:
print_warning("Missing config parameter, check --help")
if not os.path.isfile(args.config):
print_warning(f"Couldn't find configfile ({args.config}), please check your input --config")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='UNet Hyppopy UseCase Example Optimization.')
parser.add_argument('-w', '--workflow', type=str, help='workflow to be executed')
parser.add_argument('-o', '--output', type=str, default=None, help='output path to store result')
parser.add_argument('-c', '--config', type=str, help='config filename, .xml or .json formats are supported.'
'pass a full path filename or the filename only if the'
'configfile is in the data folder')
args = parser.parse_args()
args_check(args)
ProjectManager.read_config(args.config)
if args.output is not None:
ProjectManager.register_member("output_dir", args.output)
if args.workflow == "svc_usecase":
uc = svc_usecase()
elif args.workflow == "randomforest_usecase":
uc = randomforest_usecase()
elif args.workflow == "knc_usecase":
uc = knc_usecase()
elif args.workflow == "lda_usecase":
uc = lda_usecase()
elif args.workflow == "unet_usecase":
uc = unet_usecase()
elif args.workflow == "imageregistration_usecase":
uc = imageregistration_usecase()
else:
print("No workflow called {} found!".format(args.workflow))
sys.exit()
print("\nStart optimization...")
start = time.process_time()
uc.run(save=True)
end = time.process_time()
print("Finished optimization!\n")
print("Total Time: {}s\n".format(end-start))
res, best = uc.get_results()
print("---- Optimal Parameter -----\n")
for p in best.items():
print(" - {}\t:\t{}".format(p[0], p[1]))
diff --git a/examples/performance_test.py b/examples/performance_test.py
new file mode 100644
index 0000000..26900c0
--- /dev/null
+++ b/examples/performance_test.py
@@ -0,0 +1,350 @@
+# DKFZ
+#
+#
+# Copyright (c) German Cancer Research Center,
+# Division of Medical and Biological Informatics.
+# All rights reserved.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# See LICENSE.txt or http://www.mitk.org for details.
+#
+# Author: Sven Wanner (s.wanner@dkfz.de)
+
+import os
+import shutil
+import tempfile
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from sklearn.svm import SVC
+import matplotlib.pyplot as plt
+from sklearn.metrics import accuracy_score
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.datasets import load_breast_cancer
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+
+from hyppopy.projectmanager import ProjectManager
+from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase
+from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase
+from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase
+from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase
+
+sns.set(style="ticks")
+sns.set(style="darkgrid")
+
+
+class PerformanceTest(object):
+
+ def __init__(self):
+ self.root = os.path.join(tempfile.gettempdir(), 'test_data')
+ if not os.path.isdir(self.root):
+ os.makedirs(self.root)
+ self.test = None
+ self.train = None
+ self.config = None
+ self.iter_sequence = [5, 10, 25, 50, 100, 150, 300, 500]
+
+ def run(self):
+ self.set_up()
+ #self.run_svc_usecase()
+ self.run_randomforest_usecase()
+ self.run_adaboost_usecase()
+ self.run_knc_usecase()
+ #self.clean_up()
+
+ def set_hyperparameter(self, params):
+ self.config["hyperparameter"] = params
+
+ def set_iterations(self, value):
+ self.config["settings"]["solver_plugin"]["max_iterations"] = value
+
+ def plot(self, df, name=""):
+ sns_plot = sns.pairplot(df, height=1.8, aspect=1.8)
+
+ fig = sns_plot.fig
+ fig.subplots_adjust(top=0.93, wspace=0.3)
+ t = fig.suptitle(name, fontsize=14)
+ plt.show()
+ return sns_plot
+
+ def set_up(self):
+ breast_cancer_data = load_breast_cancer()
+ x = breast_cancer_data.data
+ y = breast_cancer_data.target
+ x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=23)
+
+ x_train_fname = os.path.join(self.root, 'x_train.npy')
+ y_train_fname = os.path.join(self.root, 'y_train.npy')
+ if not os.path.isfile(x_train_fname):
+ np.save(x_train_fname, x_train)
+ if not os.path.isfile(y_train_fname):
+ np.save(y_train_fname, y_train)
+
+ self.train = [x_train, y_train]
+ self.test = [x_test, y_test]
+ self.config = {
+ "hyperparameter": {},
+ "settings": {
+ "solver_plugin": {
+ "max_iterations": 50,
+ "use_plugin": "hyperopt",
+ "output_dir": os.path.join(self.root, 'test_results')
+ },
+ "custom": {
+ "data_path": self.root,
+ "data_name": "x_train.npy",
+ "labels_name": "y_train.npy"
+ }
+ }}
+
+ def run_svc_usecase(self):
+ print("\n")
+ print("*" * 30)
+ print("SVC Classifier")
+ print("*" * 30)
+ print("\n")
+ hp = {
+ "C": {
+ "domain": "uniform",
+ "data": [0.0001, 300.0],
+ "type": "float"
+ },
+ "kernel": {
+ "domain": "categorical",
+ "data": ["linear", "poly", "rbf"],
+ "type": "str"
+ }
+ }
+
+ self.set_hyperparameter(hp)
+
+ results = {"iterations": [], "C": [], "kernel": [], "accuracy": []}
+ for n in self.iter_sequence:
+ self.set_iterations(n)
+ ProjectManager.set_config(self.config)
+ uc = svc_usecase()
+ uc.run(save=False)
+ res, best = uc.get_results()
+ clf = SVC(C=best['n_estimators'],
+ kernel=hp['kernel']['data'][best['kernel']])
+ clf.fit(self.train[0], self.train[1])
+ train_predictions = clf.predict(self.test[0])
+ acc = accuracy_score(self.test[1], train_predictions)
+
+ results['accuracy'].append(acc)
+ results['iterations'].append(n)
+ results['kernel'].append(best['kernel'])
+ results['C'].append(best['C'])
+
+ print("=" * 30)
+ print("Number of iterations: {}".format(n))
+ print("Classifier: {}".format(clf.__class__.__name__))
+ print("=" * 30)
+ print("=" * 30)
+ for p in best.items():
+ print(p[0], ":", p[1])
+ print("=" * 30)
+ print("Accuracy: {:.4%}".format(acc))
+ print("=" * 30)
+ print("\n")
+
+ df = pd.DataFrame.from_dict(results)
+ df.to_csv(os.path.join(self.root, "final_{}.csv".format(clf.__class__.__name__)))
+ sns_plot = self.plot(df, name="Classifier: {}".format(clf.__class__.__name__))
+ sns_plot.savefig(os.path.join(self.root, "final_{}.png".format(clf.__class__.__name__)))
+
+ def run_randomforest_usecase(self):
+ print("\n")
+ print("*" * 30)
+ print("RandomForest Classifier")
+ print("*" * 30)
+ print("\n")
+ hp = {
+ "n_estimators": {
+ "domain": "uniform",
+ "data": [3, 500],
+ "type": "int"
+ },
+ "max_depth": {
+ "domain": "uniform",
+ "data": [1, 50],
+ "type": "int"
+ },
+ "max_features": {
+ "domain": "categorical",
+ "data": ["auto", "sqrt", "log2"],
+ "type": "str"
+ }
+ }
+
+ self.set_hyperparameter(hp)
+
+ results = {"iterations": [], "n_estimators": [], "max_depth": [], "max_features": [], "accuracy": []}
+ for n in self.iter_sequence:
+ self.set_iterations(n)
+ ProjectManager.set_config(self.config)
+ uc = randomforest_usecase()
+ uc.run(save=False)
+ res, best = uc.get_results()
+ clf = RandomForestClassifier(n_estimators=best['n_estimators'],
+ max_depth=best['max_depth'],
+ max_features=hp['max_features']['data'][best['max_features']])
+ clf.fit(self.train[0], self.train[1])
+ train_predictions = clf.predict(self.test[0])
+ acc = accuracy_score(self.test[1], train_predictions)
+
+ results['accuracy'].append(acc)
+ results['iterations'].append(n)
+ results['n_estimators'].append(best['n_estimators'])
+ results['max_depth'].append(best['max_depth'])
+ results['max_features'].append(best['max_features'])
+
+ print("=" * 30)
+ print("Number of iterations: {}".format(n))
+ print("Classifier: {}".format(clf.__class__.__name__))
+ print("=" * 30)
+ print("=" * 30)
+ for p in best.items():
+ print(p[0], ":", p[1])
+ print("=" * 30)
+ print("Accuracy: {:.4%}".format(acc))
+ print("=" * 30)
+ print("\n")
+
+ df = pd.DataFrame.from_dict(results)
+ df.to_csv(os.path.join(self.root, "final_{}.csv".format(clf.__class__.__name__)))
+ sns_plot = self.plot(df, name="Classifier: {}".format(clf.__class__.__name__))
+ sns_plot.savefig(os.path.join(self.root, "final_{}.png".format(clf.__class__.__name__)))
+
+ def run_adaboost_usecase(self):
+ print("\n")
+ print("*"*30)
+ print("AdaBoost Classifier")
+ print("*"*30)
+ print("\n")
+ hp = {
+ "n_estimators": {
+ "domain": "uniform",
+ "data": [1, 500],
+ "type": "int"
+ },
+ "learning_rate": {
+ "domain": "uniform",
+ "data": [0.001, 10],
+ "type": "float"
+ }
+ }
+
+ self.set_hyperparameter(hp)
+
+ results = {"iterations": [], "n_estimators": [], "learning_rate": [], "accuracy": []}
+ for n in self.iter_sequence:
+ self.set_iterations(n)
+ ProjectManager.set_config(self.config)
+ uc = adaboost_usecase()
+ uc.run(save=False)
+ res, best = uc.get_results()
+ clf = AdaBoostClassifier(n_estimators=best['n_estimators'],
+ learning_rate=best['learning_rate'])
+ clf.fit(self.train[0], self.train[1])
+ train_predictions = clf.predict(self.test[0])
+ acc = accuracy_score(self.test[1], train_predictions)
+
+ results['accuracy'].append(acc)
+ results['iterations'].append(n)
+ results['n_estimators'].append(best['n_estimators'])
+ results['learning_rate'].append(best['learning_rate'])
+
+ print("=" * 30)
+ print("Number of iterations: {}".format(n))
+ print("Classifier: {}".format(clf.__class__.__name__))
+ print("=" * 30)
+ print("=" * 30)
+ for p in best.items():
+ print(p[0], ":", p[1])
+ print("=" * 30)
+ print("Accuracy: {:.4%}".format(acc))
+ print("=" * 30)
+ print("\n")
+
+ df = pd.DataFrame.from_dict(results)
+ df.to_csv(os.path.join(self.root, "final_{}.csv".format(clf.__class__.__name__)))
+ sns_plot = self.plot(df, name="Classifier: {}".format(clf.__class__.__name__))
+ sns_plot.savefig(os.path.join(self.root, "final_{}.png".format(clf.__class__.__name__)))
+
+ def run_knc_usecase(self):
+ print("\n")
+ print("*" * 30)
+ print("KN Classifier")
+ print("*" * 30)
+ print("\n")
+ hp = {
+ "n_neighbors": {
+ "domain": "uniform",
+ "data": [1, 100],
+ "type": "int"
+ },
+ "weights": {
+ "domain": "categorical",
+ "data": ["uniform", "distance"],
+ "type": "str"
+ },
+ "algorithm": {
+ "domain": "categorical",
+ "data": ["auto", "ball_tree", "kd_tree", "brute"],
+ "type": "str"
+ }
+ }
+
+ self.set_hyperparameter(hp)
+
+ results = {"iterations": [], "n_neighbors": [], "weights": [], "algorithm": [], "accuracy": []}
+ for n in self.iter_sequence:
+ self.set_iterations(n)
+ ProjectManager.set_config(self.config)
+ uc = knc_usecase()
+ uc.run(save=False)
+ res, best = uc.get_results()
+ clf = KNeighborsClassifier(n_neighbors=best['n_neighbors'],
+ weights=hp["weights"]["data"][best['weights']],
+ algorithm=hp["algorithm"]["data"][best['algorithm']])
+ clf.fit(self.train[0], self.train[1])
+ train_predictions = clf.predict(self.test[0])
+ acc = accuracy_score(self.test[1], train_predictions)
+
+ results['accuracy'].append(acc)
+ results['iterations'].append(n)
+ results['n_neighbors'].append(best['n_neighbors'])
+ results['weights'].append(best['weights'])
+ results['algorithm'].append(best['algorithm'])
+
+ print("=" * 30)
+ print("Number of iterations: {}".format(n))
+ print("Classifier: {}".format(clf.__class__.__name__))
+ print("=" * 30)
+ print("=" * 30)
+ for p in best.items():
+ print(p[0], ":", p[1])
+ print("=" * 30)
+ print("Accuracy: {:.4%}".format(acc))
+ print("=" * 30)
+ print("\n")
+
+ df = pd.DataFrame.from_dict(results)
+ df.to_csv(os.path.join(self.root, "final_{}.csv".format(clf.__class__.__name__)))
+ sns_plot = self.plot(df, name="Classifier: {}".format(clf.__class__.__name__))
+ sns_plot.savefig(os.path.join(self.root, "final_{}.png".format(clf.__class__.__name__)))
+
+ def clean_up(self):
+ if os.path.isdir(self.root):
+ shutil.rmtree(self.root)
+
+
+if __name__ == "__main__":
+ performance_test = PerformanceTest()
+ performance_test.run()
diff --git a/hyppopy/plugins/optunity_solver_plugin.py b/hyppopy/plugins/optunity_solver_plugin.py
index 1039874..82f4215 100644
--- a/hyppopy/plugins/optunity_solver_plugin.py
+++ b/hyppopy/plugins/optunity_solver_plugin.py
@@ -1,67 +1,68 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import logging
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
from pprint import pformat
import optunity
from yapsy.IPlugin import IPlugin
from hyppopy.projectmanager import ProjectManager
from hyppopy.solverpluginbase import SolverPluginBase
class optunity_Solver(SolverPluginBase, IPlugin):
+
solver_info = None
trials = None
best = None
status = None
def __init__(self):
SolverPluginBase.__init__(self)
LOG.debug("initialized")
def blackbox_function(self, **params):
try:
for key in params.keys():
if self.settings.get_type_of(key) == 'int':
params[key] = int(round(params[key]))
loss = self.blackbox_function_template(self.data, params)
self.status.append('ok')
return loss
except Exception as e:
LOG.error("computing loss failed due to:\n {}".format(e))
self.status.append('fail')
return 1e9
def execute_solver(self, parameter):
LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter)))
self.status = []
try:
self.best, self.trials, self.solver_info = optunity.minimize_structured(f=self.blackbox_function,
num_evals=ProjectManager.max_iterations,
search_space=parameter)
except Exception as e:
LOG.error("internal error in optunity.minimize_structured occured. {}".format(e))
raise BrokenPipeError("internal error in optunity.minimize_structured occured. {}".format(e))
def convert_results(self):
results = self.trials.call_log['args']
results['losses'] = self.trials.call_log['values']
return results, self.best
diff --git a/hyppopy/resultviewer.py b/hyppopy/resultviewer.py
index 52da45a..d39c640 100644
--- a/hyppopy/resultviewer.py
+++ b/hyppopy/resultviewer.py
@@ -1,83 +1,87 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import logging
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
sns.set(style="darkgrid")
class ResultViewer(object):
def __init__(self, fname=None, save_only=False):
self.df = None
self.has_duration = False
self.hyperparameter = None
self.save_only = save_only
self.path = None
self.appendix = None
if fname is not None:
self.read(fname)
+ def close_all(self):
+ plt.close('all')
+
def read(self, fname):
self.path = os.path.dirname(fname)
split = os.path.basename(fname).split("_")
self.appendix = split[-1]
self.appendix = self.appendix[:-4]
self.df = pd.read_csv(fname, index_col=0)
const_data = ["duration", "losses"]
hyperparameter_columns = [item for item in self.df.columns if item not in const_data]
self.hyperparameter = pd.DataFrame()
for key in hyperparameter_columns:
self.hyperparameter[key] = self.df[key]
self.has_duration = "duration" in self.df.columns
def show(self, save=True):
if self.has_duration:
sns_plot = sns.jointplot(y="duration", x="losses", data=self.df, kind="kde")
if not self.save_only:
plt.show()
if save:
save_name = os.path.join(self.path, "t_vs_loss_"+self.appendix+".png")
try:
sns_plot.savefig(save_name)
except Exception as e:
msg = "failed to save file {}, reason {}".format(save_name, e)
LOG.error(msg)
raise IOError(msg)
sns_plot = sns.pairplot(self.df, height=1.8, aspect=1.8,
plot_kws=dict(edgecolor="k", linewidth=0.5),
diag_kind="kde", diag_kws=dict(shade=True))
fig = sns_plot.fig
fig.subplots_adjust(top=0.93, wspace=0.3)
t = fig.suptitle('Pairwise Plots', fontsize=14)
if not self.save_only:
plt.show()
if save:
save_name = os.path.join(self.path, "matrixview_"+self.appendix+".png")
try:
sns_plot.savefig(save_name)
except Exception as e:
msg = "failed to save file {}, reason {}".format(save_name, e)
LOG.error(msg)
raise IOError(msg)
+
diff --git a/hyppopy/tests/data/Iris/rf_config.json b/hyppopy/tests/data/Iris/rf_config.json
index baa11c3..92a7b99 100644
--- a/hyppopy/tests/data/Iris/rf_config.json
+++ b/hyppopy/tests/data/Iris/rf_config.json
@@ -1,44 +1,29 @@
{"hyperparameter": {
"n_estimators": {
"domain": "uniform",
"data": "[3,500]",
"type": "int"
},
"criterion": {
"domain": "categorical",
"data": "[gini,entropy]",
"type": "str"
},
"max_depth": {
"domain": "uniform",
"data": "[3, 50]",
"type": "int"
- },
- "min_samples_split": {
- "domain": "uniform",
- "data": "[0.0001,1]",
- "type": "float"
- },
- "min_samples_leaf": {
- "domain": "uniform",
- "data": "[0.0001,0.5]",
- "type": "float"
- },
- "max_features": {
- "domain": "categorical",
- "data": "[auto,sqrt,log2]",
- "type": "str"
}
},
"settings": {
"solver_plugin": {
"max_iterations": "3",
"use_plugin" : "optunity",
"output_dir": "D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris"
},
"custom": {
"data_path": "D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris",
"data_name": "train_data.npy",
"labels_name": "train_labels.npy"
}
}}
\ No newline at end of file
diff --git a/hyppopy/tests/data/Iris/rf_config.xml b/hyppopy/tests/data/Iris/rf_config.xml
index 23c7747..8646864 100644
--- a/hyppopy/tests/data/Iris/rf_config.xml
+++ b/hyppopy/tests/data/Iris/rf_config.xml
@@ -1,46 +1,31 @@
uniform
[3,200]
int
categorical
[gini,entropy]
str
uniform
[3, 50]
int
-
- uniform
- [0.0001,1]
- float
-
-
- uniform
- [0.0001,0.5]
- float
-
-
- categorical
- [auto,sqrt,log2]
- str
-
3
optunity
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
train_data.npy
train_labels.npy
\ No newline at end of file
diff --git a/hyppopy/tests/data/Iris/svc_config.json b/hyppopy/tests/data/Iris/svc_config.json
index 02c4fd4..1a19aab 100644
--- a/hyppopy/tests/data/Iris/svc_config.json
+++ b/hyppopy/tests/data/Iris/svc_config.json
@@ -1,34 +1,29 @@
{"hyperparameter": {
"C": {
"domain": "uniform",
"data": "[0,20]",
"type": "float"
},
"gamma": {
"domain": "uniform",
"data": "[0.0001,20.0]",
"type": "float"
},
"kernel": {
"domain": "categorical",
- "data": "[linear, sigmoid, poly, rbf]",
- "type": "str"
- },
- "decision_function_shape": {
- "domain": "categorical",
- "data": "[ovo,ovr]",
+ "data": "[linear, poly, rbf]",
"type": "str"
}
},
"settings": {
"solver_plugin": {
"max_iterations": "3",
"use_plugin" : "optunity",
"output_dir": "D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris"
},
"custom": {
"data_path": "D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris",
"data_name": "train_data.npy",
"labels_name": "train_labels.npy"
}
}}
\ No newline at end of file
diff --git a/hyppopy/tests/data/Iris/svc_config.xml b/hyppopy/tests/data/Iris/svc_config.xml
index cc3bbca..0f018e6 100644
--- a/hyppopy/tests/data/Iris/svc_config.xml
+++ b/hyppopy/tests/data/Iris/svc_config.xml
@@ -1,36 +1,31 @@
uniform
[0,20]
float
uniform
[0.0001,20.0]
float
categorical
- [linear,sigmoid,poly,rbf]
+ [linear,poly,rbf]
str
-
- categorical
- [ovo,ovr]
- str
-
3
hyperopt
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
train_data.npy
train_labels.npy
\ No newline at end of file
diff --git a/hyppopy/tests/data/Titanic/svc_config.json b/hyppopy/tests/data/Titanic/svc_config.json
index 947ed37..67d9eac 100644
--- a/hyppopy/tests/data/Titanic/svc_config.json
+++ b/hyppopy/tests/data/Titanic/svc_config.json
@@ -1,34 +1,29 @@
{"hyperparameter": {
"C": {
"domain": "uniform",
"data": "[0,20]",
"type": "float"
},
"gamma": {
"domain": "uniform",
"data": "[0.0001,20.0]",
"type": "float"
},
"kernel": {
"domain": "categorical",
- "data": "[linear, sigmoid, poly, rbf]",
- "type": "str"
- },
- "decision_function_shape": {
- "domain": "categorical",
- "data": "[ovo,ovr]",
+ "data": "[linear, poly, rbf]",
"type": "str"
}
},
"settings": {
"solver_plugin": {
"max_iterations": "3",
"use_plugin" : "hyperopt",
"output_dir": "D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic"
},
"custom": {
"data_path": "D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic",
"data_name": "train_cleaned.csv",
"labels_name": "Survived"
}
}}
\ No newline at end of file
diff --git a/hyppopy/tests/data/Titanic/svc_config.xml b/hyppopy/tests/data/Titanic/svc_config.xml
index 094fcd1..1d491a3 100644
--- a/hyppopy/tests/data/Titanic/svc_config.xml
+++ b/hyppopy/tests/data/Titanic/svc_config.xml
@@ -1,36 +1,31 @@
uniform
[0,20]
float
uniform
[0.0001,20.0]
float
categorical
- [linear,sigmoid,poly,rbf]
+ [linear,poly,rbf]
str
-
- categorical
- [ovo,ovr]
- str
-
3
optunity
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
train_cleaned.csv
Survived
\ No newline at end of file
diff --git a/hyppopy/tests/test_usecases.py b/hyppopy/tests/test_usecases.py
index 0a80e6b..4846a4b 100644
--- a/hyppopy/tests/test_usecases.py
+++ b/hyppopy/tests/test_usecases.py
@@ -1,166 +1,181 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import shutil
import unittest
import tempfile
import numpy as np
+from sklearn.svm import SVC
+from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from hyppopy.projectmanager import ProjectManager
from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase
from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase
-from hyppopy.workflows.lda_usecase.lda_usecase import lda_usecase
+from hyppopy.workflows.lda_usecase.adaboost_usecase import lda_usecase
from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase
DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
class ProjectManagerTestSuite(unittest.TestCase):
def setUp(self):
breast_cancer_data = load_breast_cancer()
x = breast_cancer_data.data
y = breast_cancer_data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=23)
self.root = os.path.join(tempfile.gettempdir(), 'test_data')
if not os.path.isdir(self.root):
os.makedirs(self.root)
x_train_fname = os.path.join(self.root, 'x_train.npy')
y_train_fname = os.path.join(self.root, 'y_train.npy')
np.save(x_train_fname, x_train)
np.save(y_train_fname, y_train)
self.test = [x_test, y_test]
self.config = {
"hyperparameter": {},
"settings": {
"solver_plugin": {
- "max_iterations": 5,
+ "max_iterations": 50,
"use_plugin": "hyperopt",
"output_dir": os.path.join(self.root, 'test_results')
},
"custom": {
"data_path": self.root,
"data_name": "x_train.npy",
"labels_name": "y_train.npy"
}
}}
- # def test_svc_usecase(self):
- # hyperparameter = {
- # "C": {
- # "domain": "uniform",
- # "data": [0.0001, 300.0],
- # "type": "float"
- # }
- # }
- #
- # self.config["hyperparameter"] = hyperparameter
- # ProjectManager.set_config(self.config)
- # uc = svc_usecase()
- # uc.run(save=True)
- # res, best = uc.get_results()
- # print(best)
+ def test_svc_usecase(self):
+ hyperparameter = {
+ "C": {
+ "domain": "uniform",
+ "data": [0.0001, 300.0],
+ "type": "float"
+ },
+ "kernel": {
+ "domain": "categorical",
+ "data": ["linear", "poly", "rbf"],
+ "type": "str"
+ }
+ }
+
+ self.config["hyperparameter"] = hyperparameter
+ ProjectManager.set_config(self.config)
+ uc = svc_usecase()
+ uc.run(save=True)
+ res, best = uc.get_results()
+ print("="*30)
+ print(best)
+ print("=" * 30)
+ clf = SVC(**best)
+ train_predictions = clf.predict(self.test[0])
+ acc = accuracy_score(self.test[1], train_predictions)
+ print("Accuracy: {:.4%}".format(acc))
+ print("=" * 30)
def test_randomforest_usecase(self):
hyperparameter = {
"n_estimators": {
"domain": "uniform",
"data": [1, 500],
"type": "int"
},
"criterion": {
"domain": "categorical",
"data": ["gini", "entropy"],
"type": "str"
},
"max_depth": {
"domain": "uniform",
"data": [1, 50],
"type": "int"
},
"max_features": {
"domain": "categorical",
"data": ["auto", "sqrt", "log2"],
"type": "str"
}
}
self.config["hyperparameter"] = hyperparameter
ProjectManager.set_config(self.config)
uc = randomforest_usecase()
uc.run(save=True)
res, best = uc.get_results()
print(best)
- # def test_lda_usecase(self):
- # hyperparameter = {
- # "solver": {
- # "domain": "categorical",
- # "data": ["svd", "lsqr", "eigen"],
- # "type": "str"
- # },
- # "tol": {
- # "domain": "uniform",
- # "data": [0.00000001, 1.0],
- # "type": "float"
- # }
- # }
- #
- # self.config["hyperparameter"] = hyperparameter
- # ProjectManager.set_config(self.config)
- # uc = lda_usecase()
- # uc.run(save=True)
- # res, best = uc.get_results()
- # print(best)
+ def test_lda_usecase(self):
+ hyperparameter = {
+ "solver": {
+ "domain": "categorical",
+ "data": ["svd", "lsqr", "eigen"],
+ "type": "str"
+ },
+ "tol": {
+ "domain": "uniform",
+ "data": [0.00000001, 1.0],
+ "type": "float"
+ }
+ }
+
+ self.config["hyperparameter"] = hyperparameter
+ ProjectManager.set_config(self.config)
+ uc = lda_usecase()
+ uc.run(save=True)
+ res, best = uc.get_results()
+ print(best)
def test_knc_usecase(self):
hyperparameter = {
"n_neighbors": {
"domain": "uniform",
"data": [1, 100],
"type": "int"
},
"weights": {
"domain": "categorical",
"data": ["uniform", "distance"],
"type": "str"
},
"algorithm": {
"domain": "categorical",
"data": ["auto", "ball_tree", "kd_tree", "brute"],
"type": "str"
}
}
self.config["hyperparameter"] = hyperparameter
ProjectManager.set_config(self.config)
uc = knc_usecase()
uc.run(save=True)
res, best = uc.get_results()
print(best)
def tearDown(self):
- if os.path.isdir(self.root):
- shutil.rmtree(self.root)
+ pass
+ # if os.path.isdir(self.root):
+ # shutil.rmtree(self.root)
if __name__ == '__main__':
unittest.main()
diff --git a/hyppopy/tests/test_workflows.py b/hyppopy/tests/test_workflows.py
index 508e946..f8783d6 100644
--- a/hyppopy/tests/test_workflows.py
+++ b/hyppopy/tests/test_workflows.py
@@ -1,82 +1,120 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import unittest
from hyppopy.globals import TESTDATA_DIR
IRIS_DATA = os.path.join(TESTDATA_DIR, 'Iris')
TITANIC_DATA = os.path.join(TESTDATA_DIR, 'Titanic')
from hyppopy.projectmanager import ProjectManager
from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase
from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase
class WorkflowTestSuite(unittest.TestCase):
def setUp(self):
self.results = []
def test_workflow_svc_on_iris_from_xml(self):
ProjectManager.read_config(os.path.join(IRIS_DATA, 'svc_config.xml'))
uc = svc_usecase()
uc.run(False)
res, best = uc.get_results()
- self.assertEqual(len(best.keys()), 4)
+ self.assertTrue('C' in res.columns)
+ self.assertTrue('gamma' in res.columns)
+ self.assertTrue('kernel' in res.columns)
+ self.assertEqual(len(best.keys()), 3)
- def test_workflow_rf_on_iris_from_xml(self):
- ProjectManager.read_config(os.path.join(IRIS_DATA, 'rf_config.xml'))
+ def test_workflow_svc_on_iris_from_json(self):
+ ProjectManager.read_config(os.path.join(IRIS_DATA, 'svc_config.json'))
uc = svc_usecase()
uc.run(False)
res, best = uc.get_results()
- self.assertEqual(len(best.keys()), 6)
+ self.assertTrue('C' in res.columns)
+ self.assertTrue('gamma' in res.columns)
+ self.assertTrue('kernel' in res.columns)
+ self.assertEqual(len(best.keys()), 3)
- def test_workflow_svc_on_iris_from_json(self):
- ProjectManager.read_config(os.path.join(IRIS_DATA, 'svc_config.json'))
- uc = svc_usecase()
+ def test_workflow_rf_on_iris_from_xml(self):
+ ProjectManager.read_config(os.path.join(IRIS_DATA, 'rf_config.xml'))
+ uc = randomforest_usecase()
uc.run(False)
res, best = uc.get_results()
- self.assertEqual(len(best.keys()), 4)
+ self.assertTrue('n_estimators' in res.columns)
+ self.assertTrue('criterion' in res.columns)
+ self.assertTrue('max_depth' in res.columns)
+ self.assertEqual(len(best.keys()), 3)
def test_workflow_rf_on_iris_from_json(self):
ProjectManager.read_config(os.path.join(IRIS_DATA, 'rf_config.json'))
uc = randomforest_usecase()
uc.run(False)
res, best = uc.get_results()
- self.assertEqual(len(best.keys()), 6)
+ self.assertTrue('n_estimators' in res.columns)
+ self.assertTrue('criterion' in res.columns)
+ self.assertTrue('max_depth' in res.columns)
+ self.assertEqual(len(best.keys()), 3)
+
+ # def test_workflow_svc_on_titanic_from_xml(self):
+ # ProjectManager.read_config(os.path.join(TITANIC_DATA, 'svc_config.xml'))
+ # uc = svc_usecase()
+ # uc.run(False)
+ # res, best = uc.get_results()
+ # self.assertTrue('C' in res.columns)
+ # self.assertTrue('gamma' in res.columns)
+ # self.assertTrue('kernel' in res.columns)
+ # self.assertEqual(len(best.keys()), 3)
+ #
+ # def test_workflow_svc_on_titanic_from_json(self):
+ # ProjectManager.read_config(os.path.join(TITANIC_DATA, 'svc_config.json'))
+ # uc = svc_usecase()
+ # uc.run(False)
+ # res, best = uc.get_results()
+ # self.assertTrue('C' in res.columns)
+ # self.assertTrue('gamma' in res.columns)
+ # self.assertTrue('kernel' in res.columns)
+ # self.assertEqual(len(best.keys()), 3)
def test_workflow_rf_on_titanic_from_xml(self):
ProjectManager.read_config(os.path.join(TITANIC_DATA, 'rf_config.xml'))
uc = randomforest_usecase()
uc.run(False)
res, best = uc.get_results()
+ self.assertTrue('n_estimators' in res.columns)
+ self.assertTrue('criterion' in res.columns)
+ self.assertTrue('max_depth' in res.columns)
self.assertEqual(len(best.keys()), 3)
def test_workflow_rf_on_titanic_from_json(self):
ProjectManager.read_config(os.path.join(TITANIC_DATA, 'rf_config.json'))
uc = randomforest_usecase()
uc.run(False)
res, best = uc.get_results()
+ self.assertTrue('n_estimators' in res.columns)
+ self.assertTrue('criterion' in res.columns)
+ self.assertTrue('max_depth' in res.columns)
self.assertEqual(len(best.keys()), 3)
def tearDown(self):
print("")
for r in self.results:
print(r)
if __name__ == '__main__':
unittest.main()
diff --git a/hyppopy/workflows/lda_usecase/__init__.py b/hyppopy/workflows/adaboost_usecase/__init__.py
similarity index 100%
rename from hyppopy/workflows/lda_usecase/__init__.py
rename to hyppopy/workflows/adaboost_usecase/__init__.py
diff --git a/hyppopy/workflows/lda_usecase/lda_usecase.py b/hyppopy/workflows/adaboost_usecase/adaboost_usecase.py
similarity index 81%
rename from hyppopy/workflows/lda_usecase/lda_usecase.py
rename to hyppopy/workflows/adaboost_usecase/adaboost_usecase.py
index 2d8e646..5b904ca 100644
--- a/hyppopy/workflows/lda_usecase/lda_usecase.py
+++ b/hyppopy/workflows/adaboost_usecase/adaboost_usecase.py
@@ -1,35 +1,36 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
-from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_val_score
from hyppopy.projectmanager import ProjectManager
from hyppopy.workflows.workflowbase import WorkflowBase
from hyppopy.workflows.dataloader.simpleloader import SimpleDataLoader
-class lda_usecase(WorkflowBase):
+class adaboost_usecase(WorkflowBase):
def setup(self, **kwargs):
dl = SimpleDataLoader()
dl.start(path=ProjectManager.data_path,
data_name=ProjectManager.data_name,
labels_name=ProjectManager.labels_name)
self.solver.set_data(dl.data)
def blackbox_function(self, data, params):
- clf = LinearDiscriminantAnalysis(**params)
+ clf = AdaBoostClassifier(n_estimators=params['n_estimators'],
+ learning_rate=params['learning_rate'])
return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean()
diff --git a/hyppopy/workflows/svc_usecase/svc_usecase.py b/hyppopy/workflows/svc_usecase/svc_usecase.py
index 0e63f3f..f1ba78a 100644
--- a/hyppopy/workflows/svc_usecase/svc_usecase.py
+++ b/hyppopy/workflows/svc_usecase/svc_usecase.py
@@ -1,35 +1,60 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from hyppopy.projectmanager import ProjectManager
from hyppopy.workflows.workflowbase import WorkflowBase
from hyppopy.workflows.dataloader.simpleloader import SimpleDataLoader
class svc_usecase(WorkflowBase):
def setup(self, **kwargs):
dl = SimpleDataLoader()
dl.start(path=ProjectManager.data_path,
data_name=ProjectManager.data_name,
labels_name=ProjectManager.labels_name)
self.solver.set_data(dl.data)
def blackbox_function(self, data, params):
- clf = SVC(**params)
+ if 'C' not in params.keys():
+ print("Warning: missing parameter C, use default value 1.0!")
+ params['C'] = 1.0
+ if 'kernel' not in params.keys():
+ print("Warning: missing parameter kernel, use default value linear!")
+ params['kernel'] = 'linear'
+
+ if params['kernel'] == 'linear':
+ clf = SVC(kernel='linear', C=params['C'])
+ elif params['kernel'] == 'poly':
+ if 'degree' not in params.keys():
+ print("Warning: missing parameter degree, use default value 3!")
+ params['degree'] = 3
+ if 'coef0' not in params.keys():
+ print("Warning: missing parameter coef0, use default value 0.0!")
+ params['coef0'] = 0.0
+ clf = SVC(kernel='poly', C=params['C'], degree=params['degree'], coef0=params['coef0'])
+ elif params['kernel'] == 'rbf':
+ if 'gamma' not in params.keys():
+ print("Warning: missing parameter gamma, use default value 'auto'!")
+ params['gamma'] = 'scale'
+ clf = SVC(kernel='rbf', C=params['C'], gamma=params['gamma'])
+ else:
+ raise IOError("Unknown kernel function: %s".format(params['kernel']))
+
return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean()
+
diff --git a/requirements.txt b/requirements.txt
index 444d9f3..d5e39f9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,13 +1,15 @@
dicttoxml>=1.7.4
xmltodict>=0.11.0
hyperopt>=0.1.1
Optunity>=1.1.1
numpy>=1.16.0
matplotlib>=3.0.2
scikit-learn>=0.20.2
scipy>=1.2.0
Sphinx>=1.8.3
xmlrunner>=1.7.7
Yapsy>=1.11.223
pandas>=0.24.1
-seaborn>=0.9.0
\ No newline at end of file
+seaborn>=0.9.0
+deap>=1.2.2
+bayesian-optimization>=1.0.1
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 59b01a5..36f0629 100644
--- a/setup.py
+++ b/setup.py
@@ -1,61 +1,63 @@
# -*- coding: utf-8 -*-
import os
from setuptools import setup, find_packages
with open('README.rst') as f:
readme = f.read()
with open('LICENSE') as f:
license = f.read()
VERSION = "0.1.2dev"
ROOT = os.path.dirname(os.path.realpath(__file__))
new_init = []
with open(os.path.join(ROOT, *("hyppopy", "__init__.py")), "r") as infile:
for line in infile:
new_init.append(line)
for n in range(len(new_init)):
if new_init[n].startswith("__version__"):
split = line.split("=")
new_init[n] = "__version__ = '" + VERSION + "'\n"
with open(os.path.join(ROOT, *("hyppopy", "__init__.py")), "w") as outfile:
outfile.writelines(new_init)
setup(
name='hyppopy',
version=VERSION,
description='Hyper-Parameter Optimization Toolbox for Blackboxfunction Optimization',
long_description=readme,
# if you want, put your own name here
# (this would likely result in people sending you emails)
author='Sven Wanner',
author_email='s.wanner@dkfz.de',
url='',
license=license,
packages=find_packages(exclude=('*test*', 'doc')),
package_data={
'hyppopy.plugins': ['*.yapsy-plugin']
},
# the requirements to install this project.
# Since this one is so simple this is empty.
install_requires=[
'dicttoxml>=1.7.4',
'xmltodict>=0.11.0',
'hyperopt>=0.1.1',
'Optunity>=1.1.1',
'numpy>=1.16.0',
'matplotlib>=3.0.2',
'scikit-learn>=0.20.2',
'scipy>=1.2.0',
'Sphinx>=1.8.3',
'xmlrunner>=1.7.7',
'Yapsy>=1.11.223',
'pandas>=0.24.1',
- 'seaborn>=0.9.0'
+ 'seaborn>=0.9.0',
+ 'deap>=1.2.2',
+ 'bayesian-optimization>=1.0.1'
],
)