diff --git a/__main__.py b/__main__.py
index dae4064..f1d65ba 100644
--- a/__main__.py
+++ b/__main__.py
@@ -1,95 +1,91 @@
#!/usr/bin/env python
#
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import sys
+import time
+import argparse
ROOT = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
sys.path.append(ROOT)
from hyppopy.projectmanager import ProjectManager
from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase
from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase
-from hyppopy.workflows.lda_usecase.adaboost_usecase import lda_usecase
+from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase
from hyppopy.workflows.unet_usecase.unet_usecase import unet_usecase
from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase
from hyppopy.workflows.imageregistration_usecase.imageregistration_usecase import imageregistration_usecase
-import os
-import sys
-import time
-import argparse
-
-
def print_warning(msg):
print("\n!!!!! WARNING !!!!!")
print(msg)
sys.exit()
def args_check(args):
if not args.workflow:
print_warning("No workflow specified, check --help")
if not args.config:
print_warning("Missing config parameter, check --help")
if not os.path.isfile(args.config):
print_warning(f"Couldn't find configfile ({args.config}), please check your input --config")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='UNet Hyppopy UseCase Example Optimization.')
parser.add_argument('-w', '--workflow', type=str, help='workflow to be executed')
parser.add_argument('-o', '--output', type=str, default=None, help='output path to store result')
parser.add_argument('-c', '--config', type=str, help='config filename, .xml or .json formats are supported.'
'pass a full path filename or the filename only if the'
'configfile is in the data folder')
args = parser.parse_args()
args_check(args)
ProjectManager.read_config(args.config)
if args.output is not None:
ProjectManager.register_member("output_dir", args.output)
if args.workflow == "svc_usecase":
uc = svc_usecase()
elif args.workflow == "randomforest_usecase":
uc = randomforest_usecase()
elif args.workflow == "knc_usecase":
uc = knc_usecase()
- elif args.workflow == "lda_usecase":
- uc = lda_usecase()
+ elif args.workflow == "adaboost_usecase":
+ uc = adaboost_usecase()
elif args.workflow == "unet_usecase":
uc = unet_usecase()
elif args.workflow == "imageregistration_usecase":
uc = imageregistration_usecase()
else:
print("No workflow called {} found!".format(args.workflow))
sys.exit()
print("\nStart optimization...")
start = time.process_time()
uc.run(save=True)
end = time.process_time()
print("Finished optimization!\n")
print("Total Time: {}s\n".format(end-start))
res, best = uc.get_results()
print("---- Optimal Parameter -----\n")
for p in best.items():
print(" - {}\t:\t{}".format(p[0], p[1]))
diff --git a/hyppopy/helpers.py b/hyppopy/helpers.py
new file mode 100644
index 0000000..3fc9cf0
--- /dev/null
+++ b/hyppopy/helpers.py
@@ -0,0 +1,99 @@
+import copy
+import itertools
+from collections import OrderedDict, abc
+
+
+class NestedDictUnfolder(object):
+
+ def __init__(self, nested_dict):
+ self._nested_dict = nested_dict
+ self._categories = []
+ self._values = OrderedDict()
+ self._tree_leafs = []
+
+ NestedDictUnfolder.nested_dict_iter(self._nested_dict, self)
+
+ @staticmethod
+ def nested_dict_iter(nested, unfolder):
+ for key, value in nested.items():
+ if isinstance(value, abc.Mapping):
+ unfolder.add_category(key)
+ NestedDictUnfolder.nested_dict_iter(value, unfolder)
+ else:
+ unfolder.add_values(key, value)
+ unfolder.mark_leaf()
+
+ def find_parent_nodes(self, nested, node, last_node=""):
+ for key, value in nested.items():
+ if key == node:
+ self._tree_leafs.append(last_node)
+ return
+ else:
+ last_node = key
+ if isinstance(value, abc.Mapping):
+ self.find_parent_nodes(value, node, last_node)
+ else:
+ return
+
+ def find_parent_node(self, leaf_names):
+ if not isinstance(leaf_names, list):
+ leaf_names = [leaf_names]
+ for ln in leaf_names:
+ try:
+ pos = self._categories.index(ln) - 1
+ candidate = self._categories[pos]
+ if candidate not in leaf_names:
+ return candidate
+ except:
+ pass
+ return None
+
+ def add_category(self, name):
+ self._categories.append(name)
+
+ def add_values(self, name, values):
+ self._values[name] = values
+
+ def mark_leaf(self):
+ if len(self._categories) > 0:
+ if not self._categories[-1] in self._tree_leafs:
+ self._tree_leafs.append(self._categories[-1])
+
+ def permutate_values(self):
+ pset = list(self._values.values())
+ pset = list(itertools.product(*pset))
+ permutations = []
+ okeys = list(self._values.keys())
+ for ps in pset:
+ permutations.append({})
+ for i in range(len(okeys)):
+ permutations[-1][okeys[i]] = ps[i]
+ return permutations
+
+ def add_categories(self, values_permutated):
+ while True:
+ parent = self.find_parent_node(self._tree_leafs)
+ if parent is None:
+ return
+ result = []
+ for tl in self._tree_leafs:
+ for elem in values_permutated:
+ new = copy.deepcopy(elem)
+ new[parent] = tl
+ result.append(new)
+ while tl in self._categories:
+ self._categories.remove(tl)
+ while parent in self._categories:
+ self._categories.remove(parent)
+ self._tree_leafs = []
+ self.find_parent_nodes(self._nested_dict, parent)
+ if len(self._tree_leafs) == 1 and self._tree_leafs[0] == "":
+ break
+ values_permutated = copy.deepcopy(result)
+ return result
+
+ def unfold(self):
+ values_permutated = self.permutate_values()
+ if len(self._categories) > 0:
+ return self.add_categories(values_permutated)
+ return values_permutated
diff --git a/hyppopy/plugins/gridsearch_settings_plugin.py b/hyppopy/plugins/gridsearch_settings_plugin.py
index 6fb7062..94e51e2 100644
--- a/hyppopy/plugins/gridsearch_settings_plugin.py
+++ b/hyppopy/plugins/gridsearch_settings_plugin.py
@@ -1,143 +1,146 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import logging
import numpy as np
from pprint import pformat
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
from yapsy.IPlugin import IPlugin
from hyppopy.settingspluginbase import SettingsPluginBase
from hyppopy.settingsparticle import split_categorical
from hyppopy.settingsparticle import SettingsParticle
def gaussian(x, mu, sigma):
return 1.0/(sigma * np.sqrt(2*np.pi))*np.exp(-(x-mu)**2/(2*sigma**2))
def gaussian_axis_sampling(a, b, N):
center = a + (b - a) / 2.0
delta = (b - a) / N
bn = b - center
xn = np.arange(0, bn, delta)
dn = []
for x in xn:
dn.append(1/gaussian(x, 0, bn/2.5))
dn = np.array(dn)
dn /= np.sum(dn)
dn *= bn
axis = [0]
for x in dn:
axis.append(x+axis[-1])
axis.insert(0, -axis[-1])
axis = np.array(axis)
axis += center
return axis
def log_axis_sampling(a, b, N):
delta = (b - a) / N
logrange = np.arange(a, b + delta, delta)
for n in range(logrange.shape[0]):
logrange[n] = np.exp(logrange[n])
return logrange
def sample(start, stop, count, ftype="uniform"):
assert stop > start, "Precondition Violation, stop <= start not allowed!"
assert count > 0, "Precondition Violation, N <= 0 not allowed!"
if ftype == 'uniform':
delta = (stop - start)/count
return np.arange(start, stop + delta, delta)
elif ftype == 'loguniform':
return log_axis_sampling(start, stop, count)
elif ftype == 'normal':
return gaussian_axis_sampling(start, stop, count)
raise IOError("Precondition Violation, unknown sampling function type!")
class gridsearch_Settings(SettingsPluginBase, IPlugin):
def __init__(self):
SettingsPluginBase.__init__(self)
LOG.debug("initialized")
def convert_parameter(self, input_dict):
LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict)))
solution_space = {}
# split input in categorical and non-categorical data
cat, uni = split_categorical(input_dict)
# build up dictionary keeping all non-categorical data
uniforms = {}
for name, content in uni.items():
particle = gridsearch_SettingsParticle(name=name)
for key, value in content.items():
if key == 'domain':
particle.domain = value
elif key == 'data':
particle.data = value
elif key == 'type':
particle.dtype = value
uniforms[name] = particle.get()
# build nested categorical structure
inner_level = uniforms
for key, value in cat.items():
tmp = {}
tmp2 = {}
for key2, value2 in value.items():
if key2 == 'data':
for elem in value2:
tmp[elem] = inner_level
tmp2[key] = tmp
inner_level = tmp2
- solution_space = tmp2
+ if len(cat) > 0:
+ solution_space = tmp2
+ else:
+ solution_space = inner_level
return solution_space
class gridsearch_SettingsParticle(SettingsParticle):
def __init__(self, name=None, domain=None, dtype=None, data=None):
SettingsParticle.__init__(self, name, domain, dtype, data)
def convert(self):
assert isinstance(self.data, list), "Precondition Violation, invalid input type for data!"
if self.domain == "categorical":
return self.data
else:
assert len(self.data) >= 2, "Precondition Violation, invalid input data!"
if len(self.data) < 3:
self.data.append(10)
LOG.warning("Grid sampling has set number of samples automatically to 10!")
print("WARNING: Grid sampling has set number of samples automatically to 10!")
samples = sample(start=self.data[0], stop=self.data[1], count=self.data[2], ftype=self.domain)
if self.dtype == "int":
data = []
for s in samples:
val = int(np.round(s))
if len(data) > 0:
if val == data[-1]: continue
data.append(val)
return data
return list(samples)
diff --git a/hyppopy/plugins/gridsearch_solver_plugin.py b/hyppopy/plugins/gridsearch_solver_plugin.py
index 91dda89..66e474d 100644
--- a/hyppopy/plugins/gridsearch_solver_plugin.py
+++ b/hyppopy/plugins/gridsearch_solver_plugin.py
@@ -1,84 +1,140 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
+import time
import logging
+from numpy import argmin, argmax, unique
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
from pprint import pformat
from yapsy.IPlugin import IPlugin
-from sklearn.model_selection import GridSearchCV
-from hyppopy.projectmanager import ProjectManager
+from hyppopy.helpers import NestedDictUnfolder
from hyppopy.solverpluginbase import SolverPluginBase
+class Trials(object):
+
+ def __init__(self):
+ self.loss = []
+ self.duration = []
+ self.status = []
+ self.parameter = []
+ self.best = None
+ self._tick = None
+
+ def start_iteration(self):
+ self._tick = time.process_time()
+
+ def stop_iteration(self):
+ if self._tick is None:
+ return
+ self.duration.append(time.process_time()-self._tick)
+ self._tick = None
+
+ def set_status(self, status=True):
+ self.status.append(status)
+
+ def set_parameter(self, params):
+ self.parameter.append(params)
+
+ def set_loss(self, value):
+ self.loss.append(value)
+
+ def get(self):
+ if len(self.loss) <= 0:
+ raise Exception("Empty solver results!")
+ if len(self.loss) != len(self.duration) or len(self.loss) != len(self.parameter) or len(self.loss) != len(self.status):
+ raise Exception("Inconsistent results in gridsearch solver!")
+ best_index = argmin(self.loss)
+ best = self.parameter[best_index]
+ worst_loss = self.loss[argmax(self.loss)]
+ for n in range(len(self.status)):
+ if not self.status[n]:
+ self.loss[n] = worst_loss
+
+ res = {
+ 'losses': self.loss,
+ 'duration': self.duration
+ }
+ is_string = []
+ for key, value in self.parameter[0].items():
+ res[key] = []
+ if isinstance(value, str):
+ is_string.append(key)
+
+ for p in self.parameter:
+ for key, value in p.items():
+ res[key].append(value)
+
+ for key in is_string:
+ uniques = unique(res[key])
+ lookup = {}
+ for n, p in enumerate(uniques):
+ lookup[p] = n
+ for n in range(len(res[key])):
+ res[key][n] = lookup[res[key][n]]
+
+ return res, best
+
+
class gridsearch_Solver(SolverPluginBase, IPlugin):
trials = None
best = None
def __init__(self):
SolverPluginBase.__init__(self)
LOG.debug("initialized")
def blackbox_function(self, params):
- pass
- # status = STATUS_FAIL
- # try:
- # loss = self.blackbox_function_template(self.data, params)
- # if loss is not None:
- # status = STATUS_OK
- # except Exception as e:
- # LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e))
- # status = STATUS_FAIL
- # return {'loss': loss, 'status': status}
+ loss = None
+ self.trials.set_parameter(params)
+ try:
+ self.trials.start_iteration()
+ loss = self.blackbox_function_template(self.data, params)
+ self.trials.stop_iteration()
+ if loss is None:
+ self.trials.set_status(False)
+ except Exception as e:
+ LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e))
+ self.trials.set_status(False)
+ self.trials.stop_iteration()
+ self.trials.set_status(True)
+ self.trials.set_loss(loss)
+ return
def execute_solver(self, parameter):
- pass
- # LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter)))
- # self.trials = Trials()
- #
- # try:
- # self.best = fmin(fn=self.blackbox_function,
- # space=parameter,
- # algo=tpe.suggest,
- # max_evals=ProjectManager.max_iterations,
- # trials=self.trials)
- # except Exception as e:
- # msg = "internal error in hyperopt.fmin occured. {}".format(e)
- # LOG.error(msg)
- # raise BrokenPipeError(msg)
+ LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter)))
+
+ self.trials = Trials()
+ unfolder = NestedDictUnfolder(parameter)
+ parameter_set = unfolder.unfold()
+ N = len(parameter_set)
+ print("")
+ try:
+ for n, params in enumerate(parameter_set):
+ self.blackbox_function(params)
+ print("\r{}% done".format(int(round(100.0/N*n))), end="")
+ except Exception as e:
+ msg = "internal error in gridsearch execute_solver occured. {}".format(e)
+ LOG.error(msg)
+ raise BrokenPipeError(msg)
+ print("")
def convert_results(self):
- pass
- # currently converting results in a way that this function returns a dict
- # keeping all useful parameter as key/list item. This will be automatically
- # converted to a pandas dataframe in the solver class
- # results = {'duration': [], 'losses': []}
- # pset = self.trials.trials[0]['misc']['vals']
- # for p in pset.keys():
- # results[p] = []
- #
- # for n, trial in enumerate(self.trials.trials):
- # t1 = trial['book_time']
- # t2 = trial['refresh_time']
- # results['duration'].append((t2 - t1).microseconds/1000.0)
- # results['losses'].append(trial['result']['loss'])
- # pset = trial['misc']['vals']
- # for p in pset.items():
- # results[p[0]].append(p[1][0])
- # return results, self.best
+ return self.trials.get()
diff --git a/hyppopy/plugins/randomsearch_settings_plugin.py b/hyppopy/plugins/randomsearch_settings_plugin.py
deleted file mode 100644
index 782552b..0000000
--- a/hyppopy/plugins/randomsearch_settings_plugin.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# DKFZ
-#
-#
-# Copyright (c) German Cancer Research Center,
-# Division of Medical and Biological Informatics.
-# All rights reserved.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR
-# A PARTICULAR PURPOSE.
-#
-# See LICENSE.txt or http://www.mitk.org for details.
-#
-# Author: Sven Wanner (s.wanner@dkfz.de)
-
-import os
-import logging
-import numpy as np
-from hyppopy.globals import DEBUGLEVEL
-LOG = logging.getLogger(os.path.basename(__file__))
-LOG.setLevel(DEBUGLEVEL)
-
-from pprint import pformat
-from yapsy.IPlugin import IPlugin
-
-
-from hyppopy.settingspluginbase import SettingsPluginBase
-from hyppopy.settingsparticle import SettingsParticle
-
-
-class randomsearch_Settings(SettingsPluginBase, IPlugin):
-
- def __init__(self):
- SettingsPluginBase.__init__(self)
- LOG.debug("initialized")
-
- def convert_parameter(self, input_dict):
- pass
- # LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict)))
- #
- # solution_space = {}
- # for name, content in input_dict.items():
- # particle = hyperopt_SettingsParticle(name=name)
- # for key, value in content.items():
- # if key == 'domain':
- # particle.domain = value
- # elif key == 'data':
- # particle.data = value
- # elif key == 'type':
- # particle.dtype = value
- # solution_space[name] = particle.get()
- # return solution_space
-
-
-class randomsearch_SettingsParticle(SettingsParticle):
-
- def __init__(self, name=None, domain=None, dtype=None, data=None):
- SettingsParticle.__init__(self, name, domain, dtype, data)
-
- def convert(self):
- pass
- # if self.domain == "uniform":
- # if self.dtype == "float" or self.dtype == "double":
- # return hp.uniform(self.name, self.data[0], self.data[1])
- # elif self.dtype == "int":
- # data = list(np.arange(int(self.data[0]), int(self.data[1]+1)))
- # return hp.choice(self.name, data)
- # else:
- # msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain)
- # LOG.error(msg)
- # raise LookupError(msg)
- # elif self.domain == "loguniform":
- # if self.dtype == "float" or self.dtype == "double":
- # return hp.loguniform(self.name, self.data[0], self.data[1])
- # else:
- # msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain)
- # LOG.error(msg)
- # raise LookupError(msg)
- # elif self.domain == "normal":
- # if self.dtype == "float" or self.dtype == "double":
- # return hp.normal(self.name, self.data[0], self.data[1])
- # else:
- # msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain)
- # LOG.error(msg)
- # raise LookupError(msg)
- # elif self.domain == "categorical":
- # if self.dtype == 'str':
- # return hp.choice(self.name, self.data)
- # elif self.dtype == 'bool':
- # data = []
- # for elem in self.data:
- # if elem == "true" or elem == "True" or elem == 1 or elem == "1":
- # data .append(True)
- # elif elem == "false" or elem == "False" or elem == 0 or elem == "0":
- # data .append(False)
- # else:
- # msg = "cannot convert the type {} in domain {}, unknown bool type value".format(self.dtype, self.domain)
- # LOG.error(msg)
- # raise LookupError(msg)
- # return hp.choice(self.name, data)
diff --git a/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin b/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin
deleted file mode 100644
index 27d25fd..0000000
--- a/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin
+++ /dev/null
@@ -1,9 +0,0 @@
-[Core]
-Name = randomsearch
-Module = randomsearch_settings_plugin
-
-[Documentation]
-Author = Sven Wanner
-Version = 0.1
-Website = https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html
-Description = RandomSearch Settings Plugin
\ No newline at end of file
diff --git a/hyppopy/plugins/randomsearch_solver_plugin.py b/hyppopy/plugins/randomsearch_solver_plugin.py
deleted file mode 100644
index 03b3f41..0000000
--- a/hyppopy/plugins/randomsearch_solver_plugin.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# DKFZ
-#
-#
-# Copyright (c) German Cancer Research Center,
-# Division of Medical and Biological Informatics.
-# All rights reserved.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR
-# A PARTICULAR PURPOSE.
-#
-# See LICENSE.txt or http://www.mitk.org for details.
-#
-# Author: Sven Wanner (s.wanner@dkfz.de)
-
-import os
-import logging
-from hyppopy.globals import DEBUGLEVEL
-LOG = logging.getLogger(os.path.basename(__file__))
-LOG.setLevel(DEBUGLEVEL)
-
-from pprint import pformat
-from yapsy.IPlugin import IPlugin
-
-
-from hyppopy.projectmanager import ProjectManager
-from hyppopy.solverpluginbase import SolverPluginBase
-
-
-class randomsearch_Solver(SolverPluginBase, IPlugin):
- trials = None
- best = None
-
- def __init__(self):
- SolverPluginBase.__init__(self)
- LOG.debug("initialized")
-
- def blackbox_function(self, params):
- pass
- # status = STATUS_FAIL
- # try:
- # loss = self.blackbox_function_template(self.data, params)
- # if loss is not None:
- # status = STATUS_OK
- # except Exception as e:
- # LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e))
- # status = STATUS_FAIL
- # return {'loss': loss, 'status': status}
-
- def execute_solver(self, parameter):
- pass
- # LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter)))
- # self.trials = Trials()
- #
- # try:
- # self.best = fmin(fn=self.blackbox_function,
- # space=parameter,
- # algo=tpe.suggest,
- # max_evals=ProjectManager.max_iterations,
- # trials=self.trials)
- # except Exception as e:
- # msg = "internal error in hyperopt.fmin occured. {}".format(e)
- # LOG.error(msg)
- # raise BrokenPipeError(msg)
-
- def convert_results(self):
- pass
- # currently converting results in a way that this function returns a dict
- # keeping all useful parameter as key/list item. This will be automatically
- # converted to a pandas dataframe in the solver class
- # results = {'duration': [], 'losses': []}
- # pset = self.trials.trials[0]['misc']['vals']
- # for p in pset.keys():
- # results[p] = []
- #
- # for n, trial in enumerate(self.trials.trials):
- # t1 = trial['book_time']
- # t2 = trial['refresh_time']
- # results['duration'].append((t2 - t1).microseconds/1000.0)
- # results['losses'].append(trial['result']['loss'])
- # pset = trial['misc']['vals']
- # for p in pset.items():
- # results[p[0]].append(p[1][0])
- # return results, self.best
diff --git a/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin b/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin
deleted file mode 100644
index e465d93..0000000
--- a/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin
+++ /dev/null
@@ -1,9 +0,0 @@
-[Core]
-Name = randomsearch
-Module = randomsearch_solver_plugin
-
-[Documentation]
-Author = Sven Wanner
-Version = 0.1
-Website = https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html
-Description = RandomSearch Solver Plugin
\ No newline at end of file
diff --git a/hyppopy/resultviewer.py b/hyppopy/resultviewer.py
index d39c640..a718c52 100644
--- a/hyppopy/resultviewer.py
+++ b/hyppopy/resultviewer.py
@@ -1,87 +1,174 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
+import copy
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import logging
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
sns.set(style="darkgrid")
class ResultViewer(object):
def __init__(self, fname=None, save_only=False):
self.df = None
self.has_duration = False
self.hyperparameter = None
self.save_only = save_only
self.path = None
self.appendix = None
if fname is not None:
self.read(fname)
def close_all(self):
plt.close('all')
def read(self, fname):
self.path = os.path.dirname(fname)
split = os.path.basename(fname).split("_")
self.appendix = split[-1]
self.appendix = self.appendix[:-4]
self.df = pd.read_csv(fname, index_col=0)
const_data = ["duration", "losses"]
hyperparameter_columns = [item for item in self.df.columns if item not in const_data]
self.hyperparameter = pd.DataFrame()
for key in hyperparameter_columns:
self.hyperparameter[key] = self.df[key]
self.has_duration = "duration" in self.df.columns
- def show(self, save=True):
- if self.has_duration:
- sns_plot = sns.jointplot(y="duration", x="losses", data=self.df, kind="kde")
- if not self.save_only:
- plt.show()
+ def plot_XYGrid(self, df, x, y, name="", save=None, show=True):
+ argmin = df["losses"].idxmin()
+ grid = [len(x), len(y)]
+ if grid[0] == 1 and grid[1] == 1:
+ fig = plt.figure(figsize=(10.0, 8))
+ plt.plot(df[x[0]].values, df[y[0]].values, 'o')
+ plt.plot(df[x[0]].values[argmin], df[y[0]].values[argmin], 'ro')
+ plt.grid(True)
+ plt.ylabel(y[0])
+ plt.xlabel(x[0])
+ plt.title(name, fontsize=16)
+ else:
+ if grid[0] > 1 and grid[1] == 1:
+ fig, axs = plt.subplots(ncols=grid[0], figsize=(10.0, grid[1] * 3.5))
+ elif grid[0] == 1 and grid[1] > 1:
+ fig, axs = plt.subplots(nrows=grid[1], figsize=(10.0, grid[1] * 3.5))
+ else:
+ fig, axs = plt.subplots(nrows=grid[1], ncols=grid[0], figsize=(10.0, grid[1] * 3.5))
+ fig.subplots_adjust(left=0.08, right=0.98, wspace=0.3)
+
+ for nx, _x in enumerate(x):
+ for ny, _y in enumerate(y):
+ if grid[0] > 1 and grid[1] == 1:
+ ax = axs[nx]
+ elif grid[0] == 1 and grid[1] > 1:
+ ax = axs[ny]
+ else:
+ ax = axs[ny, nx]
+ ax.plot(df[_x].values, df[_y].values, 'o')
+ ax.plot(df[_x].values[argmin], df[_y].values[argmin], 'ro')
+ ax.grid(True)
+ if nx == 0:
+ ax.set_ylabel(_y)
+ if ny == len(y)-1:
+ ax.set_xlabel(_x)
+ fig.suptitle(name, fontsize=16)
+ if save is not None:
+ if not os.path.isdir(os.path.dirname(save)):
+ os.makedirs(os.path.dirname(save))
+ plt.savefig(save)
+ if show:
+ plt.show()
+
+ def plot_performance_and_feature_grids(self, save=True):
+ x_axis = []
+ if 'losses' in self.df.columns:
+ x_axis.append('losses')
+ if 'iterations' in self.df.columns:
+ x_axis.append('iterations')
+ y_axis_performance = []
+ if 'accuracy' in self.df.columns:
+ y_axis_performance.append('accuracy')
+ if 'duration' in self.df.columns:
+ y_axis_performance.append('duration')
+ features = []
+ for cit in self.df.columns:
+ if cit not in x_axis and cit not in y_axis_performance:
+ features.append(cit)
+
+ save_name = None
+ if save:
+ save_name = os.path.join(self.path, "performance" + self.appendix + ".png")
+ self.plot_XYGrid(self.df, x=x_axis,
+ y=y_axis_performance,
+ name="Performance",
+ save=save_name,
+ show=not self.save_only)
+
+ chunks = [features[x:x + 3] for x in range(0, len(features), 3)]
+ for n, chunk in enumerate(chunks):
+ save_name = None
if save:
- save_name = os.path.join(self.path, "t_vs_loss_"+self.appendix+".png")
- try:
- sns_plot.savefig(save_name)
- except Exception as e:
- msg = "failed to save file {}, reason {}".format(save_name, e)
- LOG.error(msg)
- raise IOError(msg)
+ save_name = os.path.join(self.path, "features_{}_".format(str(n).zfill(3)) + self.appendix + ".png")
+ self.plot_XYGrid(self.df, x=x_axis,
+ y=chunk,
+ name="Feature set {}".format(n+1),
+ save=save_name,
+ show=not self.save_only)
+
+ def plot_feature_matrix(self, save=True):
sns_plot = sns.pairplot(self.df, height=1.8, aspect=1.8,
plot_kws=dict(edgecolor="k", linewidth=0.5),
diag_kind="kde", diag_kws=dict(shade=True))
fig = sns_plot.fig
fig.subplots_adjust(top=0.93, wspace=0.3)
t = fig.suptitle('Pairwise Plots', fontsize=14)
if not self.save_only:
plt.show()
if save:
save_name = os.path.join(self.path, "matrixview_"+self.appendix+".png")
try:
sns_plot.savefig(save_name)
except Exception as e:
msg = "failed to save file {}, reason {}".format(save_name, e)
LOG.error(msg)
raise IOError(msg)
+ def plot_duration(self, save=True):
+ if "duration" in self.df.columns:
+ sns_plot = sns.jointplot(y="duration", x="losses", data=self.df, kind="kde")
+ if not self.save_only:
+ plt.show()
+ if save:
+ save_name = os.path.join(self.path, "t_vs_loss_" + self.appendix + ".png")
+ try:
+ sns_plot.savefig(save_name)
+ except Exception as e:
+ msg = "failed to save file {}, reason {}".format(save_name, e)
+ LOG.error(msg)
+ raise IOError(msg)
+
+ def show(self, save=True):
+ self.plot_duration(save)
+ self.plot_feature_matrix(save)
+ self.plot_performance_and_feature_grids(save)
+
diff --git a/hyppopy/tests/data/Titanic/lda_config.xml b/hyppopy/tests/data/Iris/adaboost_config.xml
similarity index 73%
copy from hyppopy/tests/data/Titanic/lda_config.xml
copy to hyppopy/tests/data/Iris/adaboost_config.xml
index 556ff45..48b7b88 100644
--- a/hyppopy/tests/data/Titanic/lda_config.xml
+++ b/hyppopy/tests/data/Iris/adaboost_config.xml
@@ -1,26 +1,26 @@
-
- categorical
- [svd,lsqr,eigen]
- str
-
-
+
uniform
- [0.0,1.0]
+ [1, 100]
+ int
+
+
+ loguniform
+ [-10,3]
float
-
+
3
hyperopt
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
train_cleaned.csv
Survived
\ No newline at end of file
diff --git a/hyppopy/tests/data/Iris/knc_config.xml b/hyppopy/tests/data/Iris/knc_config.xml
index eb253bb..8407a90 100644
--- a/hyppopy/tests/data/Iris/knc_config.xml
+++ b/hyppopy/tests/data/Iris/knc_config.xml
@@ -1,36 +1,36 @@
uniform
[1,50]
int
-
+
uniform
[1,100]
int
-
+
categorical
[uniform,distance]
str
-
+
categorical
[auto,ball_tree,kd_tree,brute]
str
-
+
3
hyperopt
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
train_data.npy
train_labels.npy
\ No newline at end of file
diff --git a/hyppopy/tests/data/Iris/lda_config.xml b/hyppopy/tests/data/Iris/lda_config.xml
deleted file mode 100644
index b6b2fc4..0000000
--- a/hyppopy/tests/data/Iris/lda_config.xml
+++ /dev/null
@@ -1,26 +0,0 @@
-
-
-
- categorical
- [svd,lsqr,eigen]
- str
-
-
- uniform
- [0.0,1.0]
- float
-
-
-
-
- 3
- hyperopt
- D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
-
-
- D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
- train_cleaned.csv
- Survived
-
-
-
\ No newline at end of file
diff --git a/hyppopy/tests/data/Iris/knc_config.xml b/hyppopy/tests/data/Iris/rf_grid_config.xml
similarity index 57%
copy from hyppopy/tests/data/Iris/knc_config.xml
copy to hyppopy/tests/data/Iris/rf_grid_config.xml
index eb253bb..2d5c55f 100644
--- a/hyppopy/tests/data/Iris/knc_config.xml
+++ b/hyppopy/tests/data/Iris/rf_grid_config.xml
@@ -1,36 +1,30 @@
-
+
uniform
- [1,50]
+ [1,300,10]
int
-
-
- uniform
- [1,100]
- int
-
-
- categorical
- [uniform,distance]
- str
-
-
+
+
categorical
- [auto,ball_tree,kd_tree,brute]
+ [gini,entropy]
str
-
+
+
+ uniform
+ [1,50,10]
+ int
+
- 3
- hyperopt
+ gridsearch
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
train_data.npy
train_labels.npy
\ No newline at end of file
diff --git a/hyppopy/tests/data/Titanic/lda_config.xml b/hyppopy/tests/data/Titanic/adaboost_config.xml
similarity index 73%
rename from hyppopy/tests/data/Titanic/lda_config.xml
rename to hyppopy/tests/data/Titanic/adaboost_config.xml
index 556ff45..48b7b88 100644
--- a/hyppopy/tests/data/Titanic/lda_config.xml
+++ b/hyppopy/tests/data/Titanic/adaboost_config.xml
@@ -1,26 +1,26 @@
-
- categorical
- [svd,lsqr,eigen]
- str
-
-
+
uniform
- [0.0,1.0]
+ [1, 100]
+ int
+
+
+ loguniform
+ [-10,3]
float
-
+
3
hyperopt
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
train_cleaned.csv
Survived
\ No newline at end of file
diff --git a/hyppopy/tests/data/Titanic/knc_config.xml b/hyppopy/tests/data/Titanic/knc_config.xml
index 641beb6..91c3d59 100644
--- a/hyppopy/tests/data/Titanic/knc_config.xml
+++ b/hyppopy/tests/data/Titanic/knc_config.xml
@@ -1,36 +1,36 @@
uniform
[1,50]
int
uniform
[1,100]
int
categorical
[uniform,distance]
str
-
+
categorical
[auto,ball_tree,kd_tree,brute]
str
-
+
3
hyperopt
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
train_cleaned.csv
Survived
\ No newline at end of file
diff --git a/hyppopy/tests/test_helpers.py b/hyppopy/tests/test_helpers.py
new file mode 100644
index 0000000..e1071b3
--- /dev/null
+++ b/hyppopy/tests/test_helpers.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+#
+# DKFZ
+#
+#
+# Copyright (c) German Cancer Research Center,
+# Division of Medical and Biological Informatics.
+# All rights reserved.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.
+#
+# See LICENSE.txt or http://www.mitk.org for details.
+#
+# Author: Sven Wanner (s.wanner@dkfz.de)
+
+import unittest
+
+from hyppopy.helpers import NestedDictUnfolder
+
+
+class SolverFactoryTestSuite(unittest.TestCase):
+
+ def setUp(self):
+ self.p1 = {"uni1": [1, 2], "uni2": [11, 12]}
+ self.p2 = {"cat": {"a": {"uni1": [1, 2], "uni2": [11, 12]}, "b": {"uni1": [1, 2], "uni2": [11, 12]}}}
+ self.p3 = {"cat1": {
+ "a1": {"cat2": {"a2": {"uni1": [1, 2], "uni2": [11, 12]}, "b2": {"uni1": [1, 2], "uni2": [11, 12]}}},
+ "b1": {"cat2": {"a2": {"uni1": [1, 2], "uni2": [11, 12]}, "b2": {"uni1": [1, 2], "uni2": [11, 12]}}}}}
+
+ self.output_p3 = [{'cat1': 'a1', 'cat2': 'a2', 'uni1': 1, 'uni2': 11},
+ {'cat1': 'a1', 'cat2': 'a2', 'uni1': 1, 'uni2': 12},
+ {'cat1': 'a1', 'cat2': 'a2', 'uni1': 2, 'uni2': 11},
+ {'cat1': 'a1', 'cat2': 'a2', 'uni1': 2, 'uni2': 12},
+ {'cat1': 'a1', 'cat2': 'b2', 'uni1': 1, 'uni2': 11},
+ {'cat1': 'a1', 'cat2': 'b2', 'uni1': 1, 'uni2': 12},
+ {'cat1': 'a1', 'cat2': 'b2', 'uni1': 2, 'uni2': 11},
+ {'cat1': 'a1', 'cat2': 'b2', 'uni1': 2, 'uni2': 12},
+ {'cat1': 'b1', 'cat2': 'a2', 'uni1': 1, 'uni2': 11},
+ {'cat1': 'b1', 'cat2': 'a2', 'uni1': 1, 'uni2': 12},
+ {'cat1': 'b1', 'cat2': 'a2', 'uni1': 2, 'uni2': 11},
+ {'cat1': 'b1', 'cat2': 'a2', 'uni1': 2, 'uni2': 12},
+ {'cat1': 'b1', 'cat2': 'b2', 'uni1': 1, 'uni2': 11},
+ {'cat1': 'b1', 'cat2': 'b2', 'uni1': 1, 'uni2': 12},
+ {'cat1': 'b1', 'cat2': 'b2', 'uni1': 2, 'uni2': 11},
+ {'cat1': 'b1', 'cat2': 'b2', 'uni1': 2, 'uni2': 12}]
+
+ self.output_p2 = [{'cat': 'a', 'uni1': 1, 'uni2': 11},
+ {'cat': 'a', 'uni1': 1, 'uni2': 12},
+ {'cat': 'a', 'uni1': 2, 'uni2': 11},
+ {'cat': 'a', 'uni1': 2, 'uni2': 12},
+ {'cat': 'b', 'uni1': 1, 'uni2': 11},
+ {'cat': 'b', 'uni1': 1, 'uni2': 12},
+ {'cat': 'b', 'uni1': 2, 'uni2': 11},
+ {'cat': 'b', 'uni1': 2, 'uni2': 12}]
+
+ self.output_p1 = [{'uni1': 1, 'uni2': 11},
+ {'uni1': 1, 'uni2': 12},
+ {'uni1': 2, 'uni2': 11},
+ {'uni1': 2, 'uni2': 12}]
+
+ def test_nested_dict_unfolder_p1(self):
+ unfolder = NestedDictUnfolder(self.p1)
+ unfolded = unfolder.unfold()
+
+ for it1, it2 in zip(unfolded, self.output_p1):
+ self.assertEqual(it1, it2)
+
+ def test_nested_dict_unfolder_p2(self):
+ unfolder = NestedDictUnfolder(self.p2)
+ unfolded = unfolder.unfold()
+
+ for it1, it2 in zip(unfolded, self.output_p2):
+ self.assertEqual(it1, it2)
+
+ def test_nested_dict_unfolder_p3(self):
+ unfolder = NestedDictUnfolder(self.p3)
+ unfolded = unfolder.unfold()
+ for it1, it2 in zip(unfolded, self.output_p3):
+ self.assertEqual(it1, it2)
+
+
+
+if __name__ == '__main__':
+ unittest.main()
+
diff --git a/hyppopy/tests/test_settings_plugins.py b/hyppopy/tests/test_settings_plugins.py
index a4f2ba4..984c7c5 100644
--- a/hyppopy/tests/test_settings_plugins.py
+++ b/hyppopy/tests/test_settings_plugins.py
@@ -1,110 +1,129 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
-import os
import unittest
from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_SettingsParticle
from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_Settings
+
class ProjectManagerTestSuite(unittest.TestCase):
def setUp(self):
self.hp = {
'UniformFloat': {
'domain': 'uniform',
'data': [0, 1, 10],
'type': 'float',
},
'UniformInt': {
'domain': 'uniform',
'data': [0, 7, 10],
'type': 'int',
},
'NormalFloat': {
'domain': 'normal',
'data': [0, 1, 10],
'type': 'float',
},
'NormalInt': {
'domain': 'normal',
'data': [0, 10, 10],
'type': 'int',
},
'LogFloat': {
'domain': 'loguniform',
'data': [-5, 5, 10],
'type': 'float',
},
'LogFloat': {
'domain': 'loguniform',
'data': [-5, 5, 10],
'type': 'float',
},
'LogInt': {
'domain': 'loguniform',
'data': [0, 6, 10],
'type': 'int',
},
'CategoricalStr': {
'domain': 'categorical',
'data': ['a', 'b'],
'type': 'str',
},
'CategoricalInt': {
'domain': 'categorical',
'data': [0, 1],
'type': 'int',
}
}
self.truth = {
'UniformFloat': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
'UniformInt': [0, 1, 2, 3, 4, 5, 6, 7, 8],
'NormalFloat': [0.0, 0.2592443381276233, 0.3673134565097225, 0.4251586871937128, 0.4649150940720099, 0.5,
0.5350849059279901, 0.5748413128062873, 0.6326865434902775, 0.7407556618723767, 1.0],
'NormalInt': [0, 3, 4, 5, 6, 7, 10],
'LogFloat': [0.006737946999085467, 0.01831563888873418, 0.049787068367863944, 0.1353352832366127, 0.36787944117144233,
1.0, 2.718281828459045, 7.38905609893065, 20.085536923187668, 54.598150033144236, 148.4131591025766],
'LogInt': [1, 2, 3, 6, 11, 20, 37, 67, 122, 221, 403],
'CategoricalStr': ['a', 'b'],
'CategoricalInt': [0, 1]
}
-
def test_gridsearch_settings(self):
gss = gridsearch_Settings()
gss.set_hyperparameter(self.hp)
res = gss.get_hyperparameter()
- # TODO check...
+ self.assertTrue('CategoricalInt' in res.keys())
+ self.assertTrue(len(res) == 1)
+ self.assertTrue(0 in res['CategoricalInt'].keys())
+ self.assertTrue(1 in res['CategoricalInt'].keys())
+ self.assertTrue(len(res['CategoricalInt']) == 2)
+ self.assertTrue('a' in res['CategoricalInt'][0]['CategoricalStr'].keys())
+ self.assertTrue('b' in res['CategoricalInt'][0]['CategoricalStr'].keys())
+ self.assertTrue(len(res['CategoricalInt'][0]['CategoricalStr']) == 2)
+ self.assertTrue('a' in res['CategoricalInt'][1]['CategoricalStr'].keys())
+ self.assertTrue('b' in res['CategoricalInt'][1]['CategoricalStr'].keys())
+ self.assertTrue(len(res['CategoricalInt'][1]['CategoricalStr']) == 2)
+ def check_truth(input_dict):
+ for key, value in self.truth.items():
+ if not key.startswith('Categorical'):
+ self.assertTrue(key in input_dict.keys())
+ for n, v in enumerate(self.truth[key]):
+ self.assertAlmostEqual(v, input_dict[key][n])
+
+ check_truth(res['CategoricalInt'][0]['CategoricalStr']['a'])
+ check_truth(res['CategoricalInt'][1]['CategoricalStr']['a'])
+ check_truth(res['CategoricalInt'][0]['CategoricalStr']['b'])
+ check_truth(res['CategoricalInt'][1]['CategoricalStr']['b'])
def test_gridsearch_particle(self):
for name, data in self.hp.items():
gsp = gridsearch_SettingsParticle(name=name,
domain=data['domain'],
- dtype=data['dtype'],
+ dtype=data['type'],
data=data['data'])
data = gsp.get()
for n in range(len(self.truth[name])):
self.assertAlmostEqual(data[n], self.truth[name][n])
-
def tearDown(self):
pass
if __name__ == '__main__':
unittest.main()
diff --git a/hyppopy/tests/test_usecases.py b/hyppopy/tests/test_usecases.py
index 4846a4b..bdc7da2 100644
--- a/hyppopy/tests/test_usecases.py
+++ b/hyppopy/tests/test_usecases.py
@@ -1,181 +1,216 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import shutil
import unittest
import tempfile
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from hyppopy.projectmanager import ProjectManager
from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase
from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase
-from hyppopy.workflows.lda_usecase.adaboost_usecase import lda_usecase
+from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase
from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase
DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
class ProjectManagerTestSuite(unittest.TestCase):
def setUp(self):
breast_cancer_data = load_breast_cancer()
x = breast_cancer_data.data
y = breast_cancer_data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=23)
self.root = os.path.join(tempfile.gettempdir(), 'test_data')
- if not os.path.isdir(self.root):
- os.makedirs(self.root)
+ if os.path.isdir(self.root):
+ shutil.rmtree(self.root)
+ os.makedirs(self.root)
+
x_train_fname = os.path.join(self.root, 'x_train.npy')
y_train_fname = os.path.join(self.root, 'y_train.npy')
np.save(x_train_fname, x_train)
np.save(y_train_fname, y_train)
+ self.train = [x_train, y_train]
self.test = [x_test, y_test]
self.config = {
"hyperparameter": {},
"settings": {
"solver_plugin": {
- "max_iterations": 50,
+ "max_iterations": 3,
"use_plugin": "hyperopt",
"output_dir": os.path.join(self.root, 'test_results')
},
"custom": {
"data_path": self.root,
"data_name": "x_train.npy",
"labels_name": "y_train.npy"
}
}}
- def test_svc_usecase(self):
- hyperparameter = {
- "C": {
- "domain": "uniform",
- "data": [0.0001, 300.0],
- "type": "float"
- },
- "kernel": {
- "domain": "categorical",
- "data": ["linear", "poly", "rbf"],
- "type": "str"
- }
- }
-
- self.config["hyperparameter"] = hyperparameter
- ProjectManager.set_config(self.config)
- uc = svc_usecase()
- uc.run(save=True)
- res, best = uc.get_results()
- print("="*30)
- print(best)
- print("=" * 30)
- clf = SVC(**best)
- train_predictions = clf.predict(self.test[0])
- acc = accuracy_score(self.test[1], train_predictions)
- print("Accuracy: {:.4%}".format(acc))
- print("=" * 30)
+ # def test_svc_usecase(self):
+ # hyperparameter = {
+ # "C": {
+ # "domain": "uniform",
+ # "data": [0.0001, 300.0],
+ # "type": "float"
+ # },
+ # "kernel": {
+ # "domain": "categorical",
+ # "data": ["linear", "poly", "rbf"],
+ # "type": "str"
+ # }
+ # }
+ #
+ # self.config["hyperparameter"] = hyperparameter
+ # ProjectManager.set_config(self.config)
+ # uc = svc_usecase()
+ # uc.run(save=True)
+ # res, best = uc.get_results()
+ # print("="*30)
+ # print(best)
+ # print("=" * 30)
+ # clf = SVC(C=best['C'], kernel=hyperparameter['kernel']['data'][best['kernel']])
+ # clf.fit(self.train[0], self.train[1])
+ # train_predictions = clf.predict(self.test[0])
+ # acc = accuracy_score(self.test[1], train_predictions)
+ # print("Accuracy: {:.4%}".format(acc))
+ # print("=" * 30)
def test_randomforest_usecase(self):
hyperparameter = {
"n_estimators": {
"domain": "uniform",
"data": [1, 500],
"type": "int"
},
"criterion": {
"domain": "categorical",
"data": ["gini", "entropy"],
"type": "str"
},
"max_depth": {
"domain": "uniform",
"data": [1, 50],
"type": "int"
},
"max_features": {
"domain": "categorical",
"data": ["auto", "sqrt", "log2"],
"type": "str"
}
}
self.config["hyperparameter"] = hyperparameter
ProjectManager.set_config(self.config)
uc = randomforest_usecase()
- uc.run(save=True)
+ uc.run(save=False)
res, best = uc.get_results()
+ print("=" * 30)
print(best)
+ print("=" * 30)
+ clf = RandomForestClassifier(n_estimators=best['n_estimators'],
+ criterion=hyperparameter['criterion']['data'][best['criterion']],
+ max_depth=best['max_depth'],
+ max_features=best['max_features'])
+ clf.fit(self.train[0], self.train[1])
+ print("feature importance:\n", clf.feature_importances_)
+ train_predictions = clf.predict(self.test[0])
+ acc = accuracy_score(self.test[1], train_predictions)
+ print("Accuracy: {:.4%}".format(acc))
+ print("=" * 30)
- def test_lda_usecase(self):
+ def test_adaboost_usecase(self):
hyperparameter = {
- "solver": {
- "domain": "categorical",
- "data": ["svd", "lsqr", "eigen"],
- "type": "str"
- },
- "tol": {
+ "n_estimators": {
"domain": "uniform",
- "data": [0.00000001, 1.0],
+ "data": [1, 300],
+ "type": "int"
+ },
+ "learning_rate": {
+ "domain": "loguniform",
+ "data": [-10, 3],
"type": "float"
}
}
self.config["hyperparameter"] = hyperparameter
ProjectManager.set_config(self.config)
- uc = lda_usecase()
+ uc = adaboost_usecase()
uc.run(save=True)
res, best = uc.get_results()
+ print("=" * 30)
print(best)
+ print("=" * 30)
+ clf = AdaBoostClassifier(n_estimators=best['n_estimators'], learning_rate=best['learning_rate'])
+ clf.fit(self.train[0], self.train[1])
+ train_predictions = clf.predict(self.test[0])
+ acc = accuracy_score(self.test[1], train_predictions)
+ print("Accuracy: {:.4%}".format(acc))
+ print("=" * 30)
def test_knc_usecase(self):
hyperparameter = {
"n_neighbors": {
"domain": "uniform",
"data": [1, 100],
"type": "int"
},
"weights": {
"domain": "categorical",
"data": ["uniform", "distance"],
"type": "str"
},
"algorithm": {
"domain": "categorical",
"data": ["auto", "ball_tree", "kd_tree", "brute"],
"type": "str"
}
}
self.config["hyperparameter"] = hyperparameter
ProjectManager.set_config(self.config)
uc = knc_usecase()
uc.run(save=True)
res, best = uc.get_results()
+ print("=" * 30)
print(best)
+ print("=" * 30)
+ clf = KNeighborsClassifier(n_neighbors=best['n_neighbors'],
+ weights=hyperparameter['weights']['data'][best['weights']],
+ algorithm=hyperparameter['algorithm']['data'][best['algorithm']])
+ clf.fit(self.train[0], self.train[1])
+ train_predictions = clf.predict(self.test[0])
+ acc = accuracy_score(self.test[1], train_predictions)
+ print("Accuracy: {:.4%}".format(acc))
+ print("=" * 30)
def tearDown(self):
pass
- # if os.path.isdir(self.root):
- # shutil.rmtree(self.root)
if __name__ == '__main__':
unittest.main()
diff --git a/hyppopy/tests/test_workflows.py b/hyppopy/tests/test_workflows.py
index f8783d6..2866495 100644
--- a/hyppopy/tests/test_workflows.py
+++ b/hyppopy/tests/test_workflows.py
@@ -1,120 +1,151 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import unittest
from hyppopy.globals import TESTDATA_DIR
IRIS_DATA = os.path.join(TESTDATA_DIR, 'Iris')
TITANIC_DATA = os.path.join(TESTDATA_DIR, 'Titanic')
from hyppopy.projectmanager import ProjectManager
from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase
+from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase
+from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase
from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase
class WorkflowTestSuite(unittest.TestCase):
def setUp(self):
self.results = []
def test_workflow_svc_on_iris_from_xml(self):
ProjectManager.read_config(os.path.join(IRIS_DATA, 'svc_config.xml'))
uc = svc_usecase()
uc.run(False)
res, best = uc.get_results()
self.assertTrue('C' in res.columns)
self.assertTrue('gamma' in res.columns)
self.assertTrue('kernel' in res.columns)
self.assertEqual(len(best.keys()), 3)
def test_workflow_svc_on_iris_from_json(self):
ProjectManager.read_config(os.path.join(IRIS_DATA, 'svc_config.json'))
uc = svc_usecase()
uc.run(False)
res, best = uc.get_results()
self.assertTrue('C' in res.columns)
self.assertTrue('gamma' in res.columns)
self.assertTrue('kernel' in res.columns)
self.assertEqual(len(best.keys()), 3)
def test_workflow_rf_on_iris_from_xml(self):
ProjectManager.read_config(os.path.join(IRIS_DATA, 'rf_config.xml'))
uc = randomforest_usecase()
uc.run(False)
res, best = uc.get_results()
self.assertTrue('n_estimators' in res.columns)
self.assertTrue('criterion' in res.columns)
self.assertTrue('max_depth' in res.columns)
self.assertEqual(len(best.keys()), 3)
def test_workflow_rf_on_iris_from_json(self):
ProjectManager.read_config(os.path.join(IRIS_DATA, 'rf_config.json'))
uc = randomforest_usecase()
uc.run(False)
res, best = uc.get_results()
self.assertTrue('n_estimators' in res.columns)
self.assertTrue('criterion' in res.columns)
self.assertTrue('max_depth' in res.columns)
self.assertEqual(len(best.keys()), 3)
+ def test_workflow_rf_on_iris_from_grid_xml(self):
+ ProjectManager.read_config(os.path.join(IRIS_DATA, 'rf_grid_config.xml'))
+ uc = randomforest_usecase()
+ uc.run(False)
+ res, best = uc.get_results()
+ self.assertTrue('n_estimators' in res.columns)
+ self.assertTrue('criterion' in res.columns)
+ self.assertTrue('max_depth' in res.columns)
+ self.assertEqual(len(best.keys()), 3)
+
# def test_workflow_svc_on_titanic_from_xml(self):
# ProjectManager.read_config(os.path.join(TITANIC_DATA, 'svc_config.xml'))
# uc = svc_usecase()
# uc.run(False)
# res, best = uc.get_results()
# self.assertTrue('C' in res.columns)
# self.assertTrue('gamma' in res.columns)
# self.assertTrue('kernel' in res.columns)
# self.assertEqual(len(best.keys()), 3)
#
# def test_workflow_svc_on_titanic_from_json(self):
# ProjectManager.read_config(os.path.join(TITANIC_DATA, 'svc_config.json'))
# uc = svc_usecase()
# uc.run(False)
# res, best = uc.get_results()
# self.assertTrue('C' in res.columns)
# self.assertTrue('gamma' in res.columns)
# self.assertTrue('kernel' in res.columns)
# self.assertEqual(len(best.keys()), 3)
def test_workflow_rf_on_titanic_from_xml(self):
ProjectManager.read_config(os.path.join(TITANIC_DATA, 'rf_config.xml'))
uc = randomforest_usecase()
uc.run(False)
res, best = uc.get_results()
self.assertTrue('n_estimators' in res.columns)
self.assertTrue('criterion' in res.columns)
self.assertTrue('max_depth' in res.columns)
self.assertEqual(len(best.keys()), 3)
def test_workflow_rf_on_titanic_from_json(self):
ProjectManager.read_config(os.path.join(TITANIC_DATA, 'rf_config.json'))
uc = randomforest_usecase()
uc.run(False)
res, best = uc.get_results()
self.assertTrue('n_estimators' in res.columns)
self.assertTrue('criterion' in res.columns)
self.assertTrue('max_depth' in res.columns)
self.assertEqual(len(best.keys()), 3)
+ def test_workflow_adaboost_on_titanic_from_xml(self):
+ ProjectManager.read_config(os.path.join(TITANIC_DATA, 'adaboost_config.xml'))
+ uc = adaboost_usecase()
+ uc.run(False)
+ res, best = uc.get_results()
+ self.assertTrue('n_estimators' in res.columns)
+ self.assertTrue('learning_rate' in res.columns)
+ self.assertEqual(len(best.keys()), 2)
+
+ def test_workflow_knc_on_titanic_from_xml(self):
+ ProjectManager.read_config(os.path.join(TITANIC_DATA, 'knc_config.xml'))
+ uc = knc_usecase()
+ uc.run(False)
+ res, best = uc.get_results()
+ self.assertTrue('n_neighbors' in res.columns)
+ self.assertTrue('leaf_size' in res.columns)
+ self.assertTrue('weights' in res.columns)
+ self.assertEqual(len(best.keys()), 4)
+
def tearDown(self):
print("")
for r in self.results:
print(r)
if __name__ == '__main__':
unittest.main()
diff --git a/setup.py b/setup.py
index 36f0629..885c37d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,63 +1,63 @@
# -*- coding: utf-8 -*-
import os
from setuptools import setup, find_packages
with open('README.rst') as f:
readme = f.read()
with open('LICENSE') as f:
license = f.read()
-VERSION = "0.1.2dev"
+VERSION = "0.2.0"
ROOT = os.path.dirname(os.path.realpath(__file__))
new_init = []
with open(os.path.join(ROOT, *("hyppopy", "__init__.py")), "r") as infile:
for line in infile:
new_init.append(line)
for n in range(len(new_init)):
if new_init[n].startswith("__version__"):
split = line.split("=")
new_init[n] = "__version__ = '" + VERSION + "'\n"
with open(os.path.join(ROOT, *("hyppopy", "__init__.py")), "w") as outfile:
outfile.writelines(new_init)
setup(
name='hyppopy',
version=VERSION,
description='Hyper-Parameter Optimization Toolbox for Blackboxfunction Optimization',
long_description=readme,
# if you want, put your own name here
# (this would likely result in people sending you emails)
author='Sven Wanner',
author_email='s.wanner@dkfz.de',
url='',
license=license,
packages=find_packages(exclude=('*test*', 'doc')),
package_data={
'hyppopy.plugins': ['*.yapsy-plugin']
},
# the requirements to install this project.
# Since this one is so simple this is empty.
install_requires=[
'dicttoxml>=1.7.4',
'xmltodict>=0.11.0',
'hyperopt>=0.1.1',
'Optunity>=1.1.1',
'numpy>=1.16.0',
'matplotlib>=3.0.2',
'scikit-learn>=0.20.2',
'scipy>=1.2.0',
'Sphinx>=1.8.3',
'xmlrunner>=1.7.7',
'Yapsy>=1.11.223',
'pandas>=0.24.1',
'seaborn>=0.9.0',
'deap>=1.2.2',
'bayesian-optimization>=1.0.1'
],
)