diff --git a/hyppopy/helpers.py b/hyppopy/helpers.py
index 83cbdff..db68299 100644
--- a/hyppopy/helpers.py
+++ b/hyppopy/helpers.py
@@ -1,212 +1,222 @@
import copy
import time
import itertools
import numpy as np
from numpy import argmin, argmax, unique
from collections import OrderedDict, abc
def gaussian(x, mu, sigma):
return 1.0/(sigma * np.sqrt(2*np.pi))*np.exp(-(x-mu)**2/(2*sigma**2))
def gaussian_axis_sampling(a, b, N):
center = a + (b - a) / 2.0
delta = (b - a) / N
bn = b - center
xn = np.arange(0, bn, delta)
dn = []
for x in xn:
dn.append(1/gaussian(x, 0, bn/2.5))
dn = np.array(dn)
dn /= np.sum(dn)
dn *= bn
axis = [0]
for x in dn:
axis.append(x+axis[-1])
axis.insert(0, -axis[-1])
axis = np.array(axis)
axis += center
return axis
def log_axis_sampling(a, b, N):
- delta = (b - a) / N
- logrange = np.arange(a, b + delta, delta)
+ if a == 0:
+ a += 1e-23
+ assert a > 0, "Precondition Violation, a < 0!"
+ assert a < b, "Precondition Violation, a > b!"
+ assert b > 0, "Precondition Violation, b < 0!"
+ lexp = np.log(a)
+ rexp = np.log(b)
+ assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!"
+ assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!"
+
+ delta = (rexp - lexp) / N
+ logrange = np.arange(lexp, rexp + delta, delta)
for n in range(logrange.shape[0]):
logrange[n] = np.exp(logrange[n])
return logrange
def sample_domain(start, stop, count, ftype="uniform"):
assert stop > start, "Precondition Violation, stop <= start not allowed!"
assert count > 0, "Precondition Violation, N <= 0 not allowed!"
if ftype == 'uniform':
delta = (stop - start)/count
return np.arange(start, stop + delta, delta)
elif ftype == 'loguniform':
return log_axis_sampling(start, stop, count)
elif ftype == 'normal':
return gaussian_axis_sampling(start, stop, count)
raise IOError("Precondition Violation, unknown sampling function type!")
class Trials(object):
def __init__(self):
self.loss = []
self.duration = []
self.status = []
self.parameter = []
self.best = None
self._tick = None
def start_iteration(self):
self._tick = time.process_time()
def stop_iteration(self):
if self._tick is None:
return
self.duration.append(time.process_time()-self._tick)
self._tick = None
def set_status(self, status=True):
self.status.append(status)
def set_parameter(self, params):
self.parameter.append(params)
def set_loss(self, value):
self.loss.append(value)
def get(self):
if len(self.loss) <= 0:
raise Exception("Empty solver results!")
if len(self.loss) != len(self.duration) or len(self.loss) != len(self.parameter) or len(self.loss) != len(self.status):
raise Exception("Inconsistent results in gridsearch solver!")
best_index = argmin(self.loss)
best = self.parameter[best_index]
worst_loss = self.loss[argmax(self.loss)]
for n in range(len(self.status)):
if not self.status[n]:
self.loss[n] = worst_loss
res = {
'losses': self.loss,
'duration': self.duration
}
is_string = []
for key, value in self.parameter[0].items():
res[key] = []
if isinstance(value, str):
is_string.append(key)
for p in self.parameter:
for key, value in p.items():
res[key].append(value)
for key in is_string:
uniques = unique(res[key])
lookup = {}
for n, p in enumerate(uniques):
lookup[p] = n
for n in range(len(res[key])):
res[key][n] = lookup[res[key][n]]
return res, best
class NestedDictUnfolder(object):
def __init__(self, nested_dict):
self._nested_dict = nested_dict
self._categories = []
self._values = OrderedDict()
self._tree_leafs = []
NestedDictUnfolder.nested_dict_iter(self._nested_dict, self)
@staticmethod
def nested_dict_iter(nested, unfolder):
for key, value in nested.items():
if isinstance(value, abc.Mapping):
unfolder.add_category(key)
NestedDictUnfolder.nested_dict_iter(value, unfolder)
else:
unfolder.add_values(key, value)
unfolder.mark_leaf()
def find_parent_nodes(self, nested, node, last_node=""):
for key, value in nested.items():
if key == node:
self._tree_leafs.append(last_node)
return
else:
last_node = key
if isinstance(value, abc.Mapping):
self.find_parent_nodes(value, node, last_node)
else:
return
def find_parent_node(self, leaf_names):
if not isinstance(leaf_names, list):
leaf_names = [leaf_names]
for ln in leaf_names:
try:
pos = self._categories.index(ln) - 1
candidate = self._categories[pos]
if candidate not in leaf_names:
return candidate
except:
pass
return None
def add_category(self, name):
self._categories.append(name)
def add_values(self, name, values):
self._values[name] = values
def mark_leaf(self):
if len(self._categories) > 0:
if not self._categories[-1] in self._tree_leafs:
self._tree_leafs.append(self._categories[-1])
def permutate_values(self):
pset = list(self._values.values())
pset = list(itertools.product(*pset))
permutations = []
okeys = list(self._values.keys())
for ps in pset:
permutations.append({})
for i in range(len(okeys)):
permutations[-1][okeys[i]] = ps[i]
return permutations
def add_categories(self, values_permutated):
while True:
parent = self.find_parent_node(self._tree_leafs)
if parent is None:
return
result = []
for tl in self._tree_leafs:
for elem in values_permutated:
new = copy.deepcopy(elem)
new[parent] = tl
result.append(new)
while tl in self._categories:
self._categories.remove(tl)
while parent in self._categories:
self._categories.remove(parent)
self._tree_leafs = []
self.find_parent_nodes(self._nested_dict, parent)
if len(self._tree_leafs) == 1 and self._tree_leafs[0] == "":
break
values_permutated = copy.deepcopy(result)
return result
def unfold(self):
values_permutated = self.permutate_values()
if len(self._categories) > 0:
return self.add_categories(values_permutated)
return values_permutated
diff --git a/hyppopy/plugins/hyperopt_settings_plugin.py b/hyppopy/plugins/hyperopt_settings_plugin.py
index 9aad0ac..6ceafa6 100644
--- a/hyppopy/plugins/hyperopt_settings_plugin.py
+++ b/hyppopy/plugins/hyperopt_settings_plugin.py
@@ -1,105 +1,115 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import logging
import numpy as np
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
from pprint import pformat
try:
from hyperopt import hp
from yapsy.IPlugin import IPlugin
except:
LOG.warning("hyperopt package not installed, will ignore this plugin!")
print("hyperopt package not installed, will ignore this plugin!")
from hyppopy.settingspluginbase import SettingsPluginBase
from hyppopy.settingsparticle import SettingsParticle
class hyperopt_Settings(SettingsPluginBase, IPlugin):
def __init__(self):
SettingsPluginBase.__init__(self)
LOG.debug("initialized")
def convert_parameter(self, input_dict):
LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict)))
solution_space = {}
for name, content in input_dict.items():
particle = hyperopt_SettingsParticle(name=name)
for key, value in content.items():
if key == 'domain':
particle.domain = value
elif key == 'data':
particle.data = value
elif key == 'type':
particle.dtype = value
solution_space[name] = particle.get()
return solution_space
class hyperopt_SettingsParticle(SettingsParticle):
def __init__(self, name=None, domain=None, dtype=None, data=None):
SettingsParticle.__init__(self, name, domain, dtype, data)
def convert(self):
if self.domain == "uniform":
if self.dtype == "float" or self.dtype == "double":
return hp.uniform(self.name, self.data[0], self.data[1])
elif self.dtype == "int":
data = list(np.arange(int(self.data[0]), int(self.data[1]+1)))
return hp.choice(self.name, data)
else:
msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain)
LOG.error(msg)
raise LookupError(msg)
elif self.domain == "loguniform":
if self.dtype == "float" or self.dtype == "double":
- return hp.loguniform(self.name, self.data[0], self.data[1])
+ if self.data[0] == 0:
+ self.data[0] += 1e-23
+ assert self.data[0] > 0, "Precondition Violation, a < 0!"
+ assert self.data[0] < self.data[1], "Precondition Violation, a > b!"
+ assert self.data[1] > 0, "Precondition Violation, b < 0!"
+ lexp = np.log(self.data[0])
+ rexp = np.log(self.data[1])
+ assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!"
+ assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!"
+
+ return hp.loguniform(self.name, lexp, rexp)
else:
msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain)
LOG.error(msg)
raise LookupError(msg)
elif self.domain == "normal":
if self.dtype == "float" or self.dtype == "double":
mu = (self.data[1] - self.data[0])/2.0
sigma = mu/3
return hp.normal(self.name, mu, sigma)
else:
msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain)
LOG.error(msg)
raise LookupError(msg)
elif self.domain == "categorical":
if self.dtype == 'str':
return hp.choice(self.name, self.data)
elif self.dtype == 'bool':
data = []
for elem in self.data:
if elem == "true" or elem == "True" or elem == 1 or elem == "1":
data .append(True)
elif elem == "false" or elem == "False" or elem == 0 or elem == "0":
data .append(False)
else:
msg = "cannot convert the type {} in domain {}, unknown bool type value".format(self.dtype, self.domain)
LOG.error(msg)
raise LookupError(msg)
return hp.choice(self.name, data)
diff --git a/hyppopy/plugins/randomsearch_settings_plugin.py b/hyppopy/plugins/randomsearch_settings_plugin.py
index 75c5350..8aa5827 100644
--- a/hyppopy/plugins/randomsearch_settings_plugin.py
+++ b/hyppopy/plugins/randomsearch_settings_plugin.py
@@ -1,95 +1,35 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
-import random
import logging
-import numpy as np
from pprint import pformat
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
from yapsy.IPlugin import IPlugin
-
-from hyppopy.helpers import sample_domain
-from hyppopy.projectmanager import ProjectManager
-from hyppopy.settingsparticle import SettingsParticle
from hyppopy.settingspluginbase import SettingsPluginBase
-from hyppopy.globals import RANDOMSAMPLES, DEFAULTITERATIONS
class randomsearch_Settings(SettingsPluginBase, IPlugin):
def __init__(self):
SettingsPluginBase.__init__(self)
LOG.debug("initialized")
def convert_parameter(self, input_dict):
LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict)))
-
- solution_space = {}
- for name, content in input_dict.items():
- particle = randomsearch_SettingsParticle(name=name)
- for key, value in content.items():
- if key == 'domain':
- particle.domain = value
- elif key == 'data':
- particle.data = value
- elif key == 'type':
- particle.dtype = value
- solution_space[name] = particle.get()
- return solution_space
-
-
-class randomsearch_SettingsParticle(SettingsParticle):
-
- def __init__(self, name=None, domain=None, dtype=None, data=None):
- SettingsParticle.__init__(self, name, domain, dtype, data)
-
- def convert(self):
- assert isinstance(self.data, list), "Precondition Violation, invalid input type for data!"
- N = DEFAULTITERATIONS
- if "max_iterations" in ProjectManager.__dict__.keys():
- N = ProjectManager.max_iterations
- else:
- setattr(ProjectManager, 'max_iterations', N)
- ProjectManager.max_iterations
- msg = "No max_iterrations set, set it to default [{}]".format(DEFAULTITERATIONS)
- LOG.warning(msg)
- print("WARNING: {}".format(msg))
-
- if self.domain == "categorical":
- samples = []
- for n in range(N):
- samples.append(random.sample(self.data, 1)[0])
- return samples
- else:
- assert len(self.data) >= 2, "Precondition Violation, invalid input data!"
-
- full_range = list(sample_domain(start=self.data[0], stop=self.data[1], count=RANDOMSAMPLES, ftype=self.domain))
- if self.dtype == "int":
- data = []
- for s in full_range:
- val = int(np.round(s))
- if len(data) > 0:
- if val == data[-1]:
- continue
- data.append(val)
- full_range = data
- samples = []
- for n in range(N):
- samples.append(random.sample(full_range, 1)[0])
- return samples
+ return input_dict
diff --git a/hyppopy/plugins/randomsearch_solver_plugin.py b/hyppopy/plugins/randomsearch_solver_plugin.py
index ea8d579..9b389ce 100644
--- a/hyppopy/plugins/randomsearch_solver_plugin.py
+++ b/hyppopy/plugins/randomsearch_solver_plugin.py
@@ -1,75 +1,134 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
+import copy
+import random
import logging
+import numpy as np
from hyppopy.globals import DEBUGLEVEL
LOG = logging.getLogger(os.path.basename(__file__))
LOG.setLevel(DEBUGLEVEL)
from pprint import pformat
from yapsy.IPlugin import IPlugin
from hyppopy.helpers import Trials
+from hyppopy.globals import DEFAULTITERATIONS
from hyppopy.projectmanager import ProjectManager
from hyppopy.solverpluginbase import SolverPluginBase
+def drawUniformSample(param):
+ assert param['type'] != 'str', "Cannot sample a string list uniformly!"
+ assert param['data'][0] < param['data'][1], "Precondition violation: data[0] > data[1]!"
+ s = random.random()
+ s *= np.abs(param['data'][1]-param['data'][0])
+ s += param['data'][0]
+ if param['type'] == 'int':
+ s = int(np.round(s))
+ if s < param['data'][0]:
+ s = int(param['data'][0])
+ if s > param['data'][1]:
+ s = int(param['data'][1])
+ return s
+
+
+def drawNormalSample(param):
+ mu = (param['data'][1]-param['data'][0])/2
+ sigma = mu/3
+ s = np.random.normal(loc=mu, scale=sigma)
+ return s
+
+
+def drawLoguniformSample(param):
+ p = copy.deepcopy(param)
+ p['data'][0] = np.log(param['data'][0])
+ p['data'][1] = np.log(param['data'][1])
+ assert p['data'][0] is not np.nan, "Precondition violation, left bound input error, results in nan!"
+ assert p['data'][1] is not np.nan, "Precondition violation, right bound input error, results in nan!"
+ x = drawUniformSample(p)
+ s = np.exp(x)
+ return s
+
+
+def drawCategoricalSample(param):
+ return random.sample(param['data'], 1)[0]
+
+
+def drawSample(param):
+ if param['domain'] == "uniform":
+ return drawUniformSample(param)
+ elif param['domain'] == "normal":
+ return drawNormalSample(param)
+ elif param['domain'] == "loguniform":
+ return drawLoguniformSample(param)
+ elif param['domain'] == "categorical":
+ return drawCategoricalSample(param)
+ else:
+ raise LookupError("Unknown domain {}".format(param['domain']))
+
+
class randomsearch_Solver(SolverPluginBase, IPlugin):
trials = None
best = None
def __init__(self):
SolverPluginBase.__init__(self)
LOG.debug("initialized")
def blackbox_function(self, params):
loss = None
self.trials.set_parameter(params)
try:
self.trials.start_iteration()
loss = self.blackbox_function_template(self.data, params)
self.trials.stop_iteration()
if loss is None:
self.trials.set_status(False)
except Exception as e:
LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e))
self.trials.set_status(False)
self.trials.stop_iteration()
self.trials.set_status(True)
self.trials.set_loss(loss)
return
def execute_solver(self, parameter):
LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter)))
self.trials = Trials()
+ if 'max_iterations' not in ProjectManager.__dict__:
+ msg = "Missing max_iteration entry in config, used default {}!".format(DEFAULTITERATIONS)
+ LOG.warning(msg)
+ print("WARNING: {}".format(msg))
+ setattr(ProjectManager, 'max_iterations', DEFAULTITERATIONS)
N = ProjectManager.max_iterations
print("")
try:
for n in range(N):
params = {}
- for key, value in parameter.items():
- params[key] = value[n]
+ for name, p in parameter.items():
+ params[name] = drawSample(p)
self.blackbox_function(params)
print("\r{}% done".format(int(round(100.0 / N * n))), end="")
except Exception as e:
msg = "internal error in randomsearch execute_solver occured. {}".format(e)
LOG.error(msg)
raise BrokenPipeError(msg)
print("\r{}% done".format(100), end="")
print("")
def convert_results(self):
return self.trials.get()
diff --git a/hyppopy/tests/data/Iris/rf_grid_config.xml b/hyppopy/tests/data/Iris/rf_grid_config.xml
index 2d5c55f..c0e8ed0 100644
--- a/hyppopy/tests/data/Iris/rf_grid_config.xml
+++ b/hyppopy/tests/data/Iris/rf_grid_config.xml
@@ -1,30 +1,30 @@
uniform
- [1,300,10]
+ [1,300,3]
int
categorical
[gini,entropy]
str
uniform
- [1,50,10]
+ [1,50,3]
int
gridsearch
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris
train_data.npy
train_labels.npy
\ No newline at end of file
diff --git a/hyppopy/tests/data/Titanic/adaboost_config.xml b/hyppopy/tests/data/Titanic/adaboost_config.xml
index 48b7b88..5840926 100644
--- a/hyppopy/tests/data/Titanic/adaboost_config.xml
+++ b/hyppopy/tests/data/Titanic/adaboost_config.xml
@@ -1,26 +1,26 @@
uniform
[1, 100]
int
loguniform
- [-10,3]
+ [0.0001,100]
float
3
hyperopt
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic
train_cleaned.csv
Survived
\ No newline at end of file
diff --git a/hyppopy/tests/test_settings_plugins.py b/hyppopy/tests/test_settings_plugins.py
index 984c7c5..8aec1c7 100644
--- a/hyppopy/tests/test_settings_plugins.py
+++ b/hyppopy/tests/test_settings_plugins.py
@@ -1,129 +1,136 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import unittest
+import numpy as np
from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_SettingsParticle
from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_Settings
class ProjectManagerTestSuite(unittest.TestCase):
def setUp(self):
self.hp = {
'UniformFloat': {
'domain': 'uniform',
'data': [0, 1, 10],
'type': 'float',
},
'UniformInt': {
'domain': 'uniform',
'data': [0, 7, 10],
'type': 'int',
},
'NormalFloat': {
'domain': 'normal',
'data': [0, 1, 10],
'type': 'float',
},
'NormalInt': {
'domain': 'normal',
'data': [0, 10, 10],
'type': 'int',
},
'LogFloat': {
'domain': 'loguniform',
- 'data': [-5, 5, 10],
+ 'data': [0.01, np.e, 10],
'type': 'float',
},
'LogFloat': {
'domain': 'loguniform',
- 'data': [-5, 5, 10],
+ 'data': [0.01, np.e, 10],
'type': 'float',
},
'LogInt': {
'domain': 'loguniform',
- 'data': [0, 6, 10],
+ 'data': [0, 1000000, 10],
'type': 'int',
},
'CategoricalStr': {
'domain': 'categorical',
'data': ['a', 'b'],
'type': 'str',
},
'CategoricalInt': {
'domain': 'categorical',
'data': [0, 1],
'type': 'int',
}
}
+
self.truth = {
'UniformFloat': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
'UniformInt': [0, 1, 2, 3, 4, 5, 6, 7, 8],
'NormalFloat': [0.0, 0.2592443381276233, 0.3673134565097225, 0.4251586871937128, 0.4649150940720099, 0.5,
0.5350849059279901, 0.5748413128062873, 0.6326865434902775, 0.7407556618723767, 1.0],
'NormalInt': [0, 3, 4, 5, 6, 7, 10],
- 'LogFloat': [0.006737946999085467, 0.01831563888873418, 0.049787068367863944, 0.1353352832366127, 0.36787944117144233,
- 1.0, 2.718281828459045, 7.38905609893065, 20.085536923187668, 54.598150033144236, 148.4131591025766],
- 'LogInt': [1, 2, 3, 6, 11, 20, 37, 67, 122, 221, 403],
+ 'LogFloat': [0.010000000000000004, 0.017515778645640943, 0.030680250156309114, 0.053738847053080116,
+ 0.0941277749653705, 0.16487212707001322, 0.28878636825943366, 0.5058318102310787,
+ 0.8860038019931427, 1.551904647490817, 2.7182818284590575],
+ 'LogInt': [0, 2, 1259, 1000000],
'CategoricalStr': ['a', 'b'],
'CategoricalInt': [0, 1]
}
def test_gridsearch_settings(self):
gss = gridsearch_Settings()
gss.set_hyperparameter(self.hp)
res = gss.get_hyperparameter()
self.assertTrue('CategoricalInt' in res.keys())
self.assertTrue(len(res) == 1)
self.assertTrue(0 in res['CategoricalInt'].keys())
self.assertTrue(1 in res['CategoricalInt'].keys())
self.assertTrue(len(res['CategoricalInt']) == 2)
self.assertTrue('a' in res['CategoricalInt'][0]['CategoricalStr'].keys())
self.assertTrue('b' in res['CategoricalInt'][0]['CategoricalStr'].keys())
self.assertTrue(len(res['CategoricalInt'][0]['CategoricalStr']) == 2)
self.assertTrue('a' in res['CategoricalInt'][1]['CategoricalStr'].keys())
self.assertTrue('b' in res['CategoricalInt'][1]['CategoricalStr'].keys())
self.assertTrue(len(res['CategoricalInt'][1]['CategoricalStr']) == 2)
def check_truth(input_dict):
for key, value in self.truth.items():
if not key.startswith('Categorical'):
self.assertTrue(key in input_dict.keys())
+ if key == 'LogFloat':
+ a=0
+ if key == 'LogInt':
+ a=0
for n, v in enumerate(self.truth[key]):
self.assertAlmostEqual(v, input_dict[key][n])
check_truth(res['CategoricalInt'][0]['CategoricalStr']['a'])
check_truth(res['CategoricalInt'][1]['CategoricalStr']['a'])
check_truth(res['CategoricalInt'][0]['CategoricalStr']['b'])
check_truth(res['CategoricalInt'][1]['CategoricalStr']['b'])
def test_gridsearch_particle(self):
for name, data in self.hp.items():
gsp = gridsearch_SettingsParticle(name=name,
domain=data['domain'],
dtype=data['type'],
data=data['data'])
data = gsp.get()
for n in range(len(self.truth[name])):
self.assertAlmostEqual(data[n], self.truth[name][n])
def tearDown(self):
pass
if __name__ == '__main__':
unittest.main()
diff --git a/hyppopy/tests/test_usecases.py b/hyppopy/tests/test_usecases.py
index bdc7da2..cc727a4 100644
--- a/hyppopy/tests/test_usecases.py
+++ b/hyppopy/tests/test_usecases.py
@@ -1,216 +1,217 @@
# DKFZ
#
#
# Copyright (c) German Cancer Research Center,
# Division of Medical and Biological Informatics.
# All rights reserved.
#
# This software is distributed WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.
#
# See LICENSE.txt or http://www.mitk.org for details.
#
# Author: Sven Wanner (s.wanner@dkfz.de)
import os
import shutil
import unittest
import tempfile
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from hyppopy.projectmanager import ProjectManager
from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase
from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase
from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase
from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase
DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
class ProjectManagerTestSuite(unittest.TestCase):
def setUp(self):
breast_cancer_data = load_breast_cancer()
x = breast_cancer_data.data
y = breast_cancer_data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=23)
self.root = os.path.join(tempfile.gettempdir(), 'test_data')
- if os.path.isdir(self.root):
- shutil.rmtree(self.root)
- os.makedirs(self.root)
+ #if os.path.isdir(self.root):
+ #shutil.rmtree(self.root)
+ if not os.path.isdir(self.root):
+ os.makedirs(self.root)
x_train_fname = os.path.join(self.root, 'x_train.npy')
y_train_fname = os.path.join(self.root, 'y_train.npy')
np.save(x_train_fname, x_train)
np.save(y_train_fname, y_train)
self.train = [x_train, y_train]
self.test = [x_test, y_test]
self.config = {
"hyperparameter": {},
"settings": {
"solver_plugin": {
"max_iterations": 3,
"use_plugin": "hyperopt",
"output_dir": os.path.join(self.root, 'test_results')
},
"custom": {
"data_path": self.root,
"data_name": "x_train.npy",
"labels_name": "y_train.npy"
}
}}
# def test_svc_usecase(self):
# hyperparameter = {
# "C": {
# "domain": "uniform",
# "data": [0.0001, 300.0],
# "type": "float"
# },
# "kernel": {
# "domain": "categorical",
# "data": ["linear", "poly", "rbf"],
# "type": "str"
# }
# }
#
# self.config["hyperparameter"] = hyperparameter
# ProjectManager.set_config(self.config)
# uc = svc_usecase()
# uc.run(save=True)
# res, best = uc.get_results()
# print("="*30)
# print(best)
# print("=" * 30)
# clf = SVC(C=best['C'], kernel=hyperparameter['kernel']['data'][best['kernel']])
# clf.fit(self.train[0], self.train[1])
# train_predictions = clf.predict(self.test[0])
# acc = accuracy_score(self.test[1], train_predictions)
# print("Accuracy: {:.4%}".format(acc))
# print("=" * 30)
def test_randomforest_usecase(self):
hyperparameter = {
"n_estimators": {
"domain": "uniform",
"data": [1, 500],
"type": "int"
},
"criterion": {
"domain": "categorical",
"data": ["gini", "entropy"],
"type": "str"
},
"max_depth": {
"domain": "uniform",
"data": [1, 50],
"type": "int"
},
"max_features": {
"domain": "categorical",
"data": ["auto", "sqrt", "log2"],
"type": "str"
}
}
self.config["hyperparameter"] = hyperparameter
ProjectManager.set_config(self.config)
uc = randomforest_usecase()
uc.run(save=False)
res, best = uc.get_results()
print("=" * 30)
print(best)
print("=" * 30)
clf = RandomForestClassifier(n_estimators=best['n_estimators'],
criterion=hyperparameter['criterion']['data'][best['criterion']],
max_depth=best['max_depth'],
max_features=best['max_features'])
clf.fit(self.train[0], self.train[1])
print("feature importance:\n", clf.feature_importances_)
train_predictions = clf.predict(self.test[0])
acc = accuracy_score(self.test[1], train_predictions)
print("Accuracy: {:.4%}".format(acc))
print("=" * 30)
def test_adaboost_usecase(self):
hyperparameter = {
"n_estimators": {
"domain": "uniform",
"data": [1, 300],
"type": "int"
},
"learning_rate": {
"domain": "loguniform",
- "data": [-10, 3],
+ "data": [0.01, 100],
"type": "float"
}
}
self.config["hyperparameter"] = hyperparameter
ProjectManager.set_config(self.config)
uc = adaboost_usecase()
uc.run(save=True)
res, best = uc.get_results()
print("=" * 30)
print(best)
print("=" * 30)
clf = AdaBoostClassifier(n_estimators=best['n_estimators'], learning_rate=best['learning_rate'])
clf.fit(self.train[0], self.train[1])
train_predictions = clf.predict(self.test[0])
acc = accuracy_score(self.test[1], train_predictions)
print("Accuracy: {:.4%}".format(acc))
print("=" * 30)
def test_knc_usecase(self):
hyperparameter = {
"n_neighbors": {
"domain": "uniform",
"data": [1, 100],
"type": "int"
},
"weights": {
"domain": "categorical",
"data": ["uniform", "distance"],
"type": "str"
},
"algorithm": {
"domain": "categorical",
"data": ["auto", "ball_tree", "kd_tree", "brute"],
"type": "str"
}
}
self.config["hyperparameter"] = hyperparameter
ProjectManager.set_config(self.config)
uc = knc_usecase()
uc.run(save=True)
res, best = uc.get_results()
print("=" * 30)
print(best)
print("=" * 30)
clf = KNeighborsClassifier(n_neighbors=best['n_neighbors'],
weights=hyperparameter['weights']['data'][best['weights']],
algorithm=hyperparameter['algorithm']['data'][best['algorithm']])
clf.fit(self.train[0], self.train[1])
train_predictions = clf.predict(self.test[0])
acc = accuracy_score(self.test[1], train_predictions)
print("Accuracy: {:.4%}".format(acc))
print("=" * 30)
def tearDown(self):
pass
if __name__ == '__main__':
unittest.main()