diff --git a/hyppopy/helpers.py b/hyppopy/helpers.py index 83cbdff..db68299 100644 --- a/hyppopy/helpers.py +++ b/hyppopy/helpers.py @@ -1,212 +1,222 @@ import copy import time import itertools import numpy as np from numpy import argmin, argmax, unique from collections import OrderedDict, abc def gaussian(x, mu, sigma): return 1.0/(sigma * np.sqrt(2*np.pi))*np.exp(-(x-mu)**2/(2*sigma**2)) def gaussian_axis_sampling(a, b, N): center = a + (b - a) / 2.0 delta = (b - a) / N bn = b - center xn = np.arange(0, bn, delta) dn = [] for x in xn: dn.append(1/gaussian(x, 0, bn/2.5)) dn = np.array(dn) dn /= np.sum(dn) dn *= bn axis = [0] for x in dn: axis.append(x+axis[-1]) axis.insert(0, -axis[-1]) axis = np.array(axis) axis += center return axis def log_axis_sampling(a, b, N): - delta = (b - a) / N - logrange = np.arange(a, b + delta, delta) + if a == 0: + a += 1e-23 + assert a > 0, "Precondition Violation, a < 0!" + assert a < b, "Precondition Violation, a > b!" + assert b > 0, "Precondition Violation, b < 0!" + lexp = np.log(a) + rexp = np.log(b) + assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!" + assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!" + + delta = (rexp - lexp) / N + logrange = np.arange(lexp, rexp + delta, delta) for n in range(logrange.shape[0]): logrange[n] = np.exp(logrange[n]) return logrange def sample_domain(start, stop, count, ftype="uniform"): assert stop > start, "Precondition Violation, stop <= start not allowed!" assert count > 0, "Precondition Violation, N <= 0 not allowed!" if ftype == 'uniform': delta = (stop - start)/count return np.arange(start, stop + delta, delta) elif ftype == 'loguniform': return log_axis_sampling(start, stop, count) elif ftype == 'normal': return gaussian_axis_sampling(start, stop, count) raise IOError("Precondition Violation, unknown sampling function type!") class Trials(object): def __init__(self): self.loss = [] self.duration = [] self.status = [] self.parameter = [] self.best = None self._tick = None def start_iteration(self): self._tick = time.process_time() def stop_iteration(self): if self._tick is None: return self.duration.append(time.process_time()-self._tick) self._tick = None def set_status(self, status=True): self.status.append(status) def set_parameter(self, params): self.parameter.append(params) def set_loss(self, value): self.loss.append(value) def get(self): if len(self.loss) <= 0: raise Exception("Empty solver results!") if len(self.loss) != len(self.duration) or len(self.loss) != len(self.parameter) or len(self.loss) != len(self.status): raise Exception("Inconsistent results in gridsearch solver!") best_index = argmin(self.loss) best = self.parameter[best_index] worst_loss = self.loss[argmax(self.loss)] for n in range(len(self.status)): if not self.status[n]: self.loss[n] = worst_loss res = { 'losses': self.loss, 'duration': self.duration } is_string = [] for key, value in self.parameter[0].items(): res[key] = [] if isinstance(value, str): is_string.append(key) for p in self.parameter: for key, value in p.items(): res[key].append(value) for key in is_string: uniques = unique(res[key]) lookup = {} for n, p in enumerate(uniques): lookup[p] = n for n in range(len(res[key])): res[key][n] = lookup[res[key][n]] return res, best class NestedDictUnfolder(object): def __init__(self, nested_dict): self._nested_dict = nested_dict self._categories = [] self._values = OrderedDict() self._tree_leafs = [] NestedDictUnfolder.nested_dict_iter(self._nested_dict, self) @staticmethod def nested_dict_iter(nested, unfolder): for key, value in nested.items(): if isinstance(value, abc.Mapping): unfolder.add_category(key) NestedDictUnfolder.nested_dict_iter(value, unfolder) else: unfolder.add_values(key, value) unfolder.mark_leaf() def find_parent_nodes(self, nested, node, last_node=""): for key, value in nested.items(): if key == node: self._tree_leafs.append(last_node) return else: last_node = key if isinstance(value, abc.Mapping): self.find_parent_nodes(value, node, last_node) else: return def find_parent_node(self, leaf_names): if not isinstance(leaf_names, list): leaf_names = [leaf_names] for ln in leaf_names: try: pos = self._categories.index(ln) - 1 candidate = self._categories[pos] if candidate not in leaf_names: return candidate except: pass return None def add_category(self, name): self._categories.append(name) def add_values(self, name, values): self._values[name] = values def mark_leaf(self): if len(self._categories) > 0: if not self._categories[-1] in self._tree_leafs: self._tree_leafs.append(self._categories[-1]) def permutate_values(self): pset = list(self._values.values()) pset = list(itertools.product(*pset)) permutations = [] okeys = list(self._values.keys()) for ps in pset: permutations.append({}) for i in range(len(okeys)): permutations[-1][okeys[i]] = ps[i] return permutations def add_categories(self, values_permutated): while True: parent = self.find_parent_node(self._tree_leafs) if parent is None: return result = [] for tl in self._tree_leafs: for elem in values_permutated: new = copy.deepcopy(elem) new[parent] = tl result.append(new) while tl in self._categories: self._categories.remove(tl) while parent in self._categories: self._categories.remove(parent) self._tree_leafs = [] self.find_parent_nodes(self._nested_dict, parent) if len(self._tree_leafs) == 1 and self._tree_leafs[0] == "": break values_permutated = copy.deepcopy(result) return result def unfold(self): values_permutated = self.permutate_values() if len(self._categories) > 0: return self.add_categories(values_permutated) return values_permutated diff --git a/hyppopy/plugins/hyperopt_settings_plugin.py b/hyppopy/plugins/hyperopt_settings_plugin.py index 9aad0ac..6ceafa6 100644 --- a/hyppopy/plugins/hyperopt_settings_plugin.py +++ b/hyppopy/plugins/hyperopt_settings_plugin.py @@ -1,105 +1,115 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging import numpy as np from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from pprint import pformat try: from hyperopt import hp from yapsy.IPlugin import IPlugin except: LOG.warning("hyperopt package not installed, will ignore this plugin!") print("hyperopt package not installed, will ignore this plugin!") from hyppopy.settingspluginbase import SettingsPluginBase from hyppopy.settingsparticle import SettingsParticle class hyperopt_Settings(SettingsPluginBase, IPlugin): def __init__(self): SettingsPluginBase.__init__(self) LOG.debug("initialized") def convert_parameter(self, input_dict): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) solution_space = {} for name, content in input_dict.items(): particle = hyperopt_SettingsParticle(name=name) for key, value in content.items(): if key == 'domain': particle.domain = value elif key == 'data': particle.data = value elif key == 'type': particle.dtype = value solution_space[name] = particle.get() return solution_space class hyperopt_SettingsParticle(SettingsParticle): def __init__(self, name=None, domain=None, dtype=None, data=None): SettingsParticle.__init__(self, name, domain, dtype, data) def convert(self): if self.domain == "uniform": if self.dtype == "float" or self.dtype == "double": return hp.uniform(self.name, self.data[0], self.data[1]) elif self.dtype == "int": data = list(np.arange(int(self.data[0]), int(self.data[1]+1))) return hp.choice(self.name, data) else: msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain) LOG.error(msg) raise LookupError(msg) elif self.domain == "loguniform": if self.dtype == "float" or self.dtype == "double": - return hp.loguniform(self.name, self.data[0], self.data[1]) + if self.data[0] == 0: + self.data[0] += 1e-23 + assert self.data[0] > 0, "Precondition Violation, a < 0!" + assert self.data[0] < self.data[1], "Precondition Violation, a > b!" + assert self.data[1] > 0, "Precondition Violation, b < 0!" + lexp = np.log(self.data[0]) + rexp = np.log(self.data[1]) + assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!" + assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!" + + return hp.loguniform(self.name, lexp, rexp) else: msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain) LOG.error(msg) raise LookupError(msg) elif self.domain == "normal": if self.dtype == "float" or self.dtype == "double": mu = (self.data[1] - self.data[0])/2.0 sigma = mu/3 return hp.normal(self.name, mu, sigma) else: msg = "cannot convert the type {} in domain {}".format(self.dtype, self.domain) LOG.error(msg) raise LookupError(msg) elif self.domain == "categorical": if self.dtype == 'str': return hp.choice(self.name, self.data) elif self.dtype == 'bool': data = [] for elem in self.data: if elem == "true" or elem == "True" or elem == 1 or elem == "1": data .append(True) elif elem == "false" or elem == "False" or elem == 0 or elem == "0": data .append(False) else: msg = "cannot convert the type {} in domain {}, unknown bool type value".format(self.dtype, self.domain) LOG.error(msg) raise LookupError(msg) return hp.choice(self.name, data) diff --git a/hyppopy/plugins/randomsearch_settings_plugin.py b/hyppopy/plugins/randomsearch_settings_plugin.py index 75c5350..8aa5827 100644 --- a/hyppopy/plugins/randomsearch_settings_plugin.py +++ b/hyppopy/plugins/randomsearch_settings_plugin.py @@ -1,95 +1,35 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os -import random import logging -import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from yapsy.IPlugin import IPlugin - -from hyppopy.helpers import sample_domain -from hyppopy.projectmanager import ProjectManager -from hyppopy.settingsparticle import SettingsParticle from hyppopy.settingspluginbase import SettingsPluginBase -from hyppopy.globals import RANDOMSAMPLES, DEFAULTITERATIONS class randomsearch_Settings(SettingsPluginBase, IPlugin): def __init__(self): SettingsPluginBase.__init__(self) LOG.debug("initialized") def convert_parameter(self, input_dict): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) - - solution_space = {} - for name, content in input_dict.items(): - particle = randomsearch_SettingsParticle(name=name) - for key, value in content.items(): - if key == 'domain': - particle.domain = value - elif key == 'data': - particle.data = value - elif key == 'type': - particle.dtype = value - solution_space[name] = particle.get() - return solution_space - - -class randomsearch_SettingsParticle(SettingsParticle): - - def __init__(self, name=None, domain=None, dtype=None, data=None): - SettingsParticle.__init__(self, name, domain, dtype, data) - - def convert(self): - assert isinstance(self.data, list), "Precondition Violation, invalid input type for data!" - N = DEFAULTITERATIONS - if "max_iterations" in ProjectManager.__dict__.keys(): - N = ProjectManager.max_iterations - else: - setattr(ProjectManager, 'max_iterations', N) - ProjectManager.max_iterations - msg = "No max_iterrations set, set it to default [{}]".format(DEFAULTITERATIONS) - LOG.warning(msg) - print("WARNING: {}".format(msg)) - - if self.domain == "categorical": - samples = [] - for n in range(N): - samples.append(random.sample(self.data, 1)[0]) - return samples - else: - assert len(self.data) >= 2, "Precondition Violation, invalid input data!" - - full_range = list(sample_domain(start=self.data[0], stop=self.data[1], count=RANDOMSAMPLES, ftype=self.domain)) - if self.dtype == "int": - data = [] - for s in full_range: - val = int(np.round(s)) - if len(data) > 0: - if val == data[-1]: - continue - data.append(val) - full_range = data - samples = [] - for n in range(N): - samples.append(random.sample(full_range, 1)[0]) - return samples + return input_dict diff --git a/hyppopy/plugins/randomsearch_solver_plugin.py b/hyppopy/plugins/randomsearch_solver_plugin.py index ea8d579..9b389ce 100644 --- a/hyppopy/plugins/randomsearch_solver_plugin.py +++ b/hyppopy/plugins/randomsearch_solver_plugin.py @@ -1,75 +1,134 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os +import copy +import random import logging +import numpy as np from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from pprint import pformat from yapsy.IPlugin import IPlugin from hyppopy.helpers import Trials +from hyppopy.globals import DEFAULTITERATIONS from hyppopy.projectmanager import ProjectManager from hyppopy.solverpluginbase import SolverPluginBase +def drawUniformSample(param): + assert param['type'] != 'str', "Cannot sample a string list uniformly!" + assert param['data'][0] < param['data'][1], "Precondition violation: data[0] > data[1]!" + s = random.random() + s *= np.abs(param['data'][1]-param['data'][0]) + s += param['data'][0] + if param['type'] == 'int': + s = int(np.round(s)) + if s < param['data'][0]: + s = int(param['data'][0]) + if s > param['data'][1]: + s = int(param['data'][1]) + return s + + +def drawNormalSample(param): + mu = (param['data'][1]-param['data'][0])/2 + sigma = mu/3 + s = np.random.normal(loc=mu, scale=sigma) + return s + + +def drawLoguniformSample(param): + p = copy.deepcopy(param) + p['data'][0] = np.log(param['data'][0]) + p['data'][1] = np.log(param['data'][1]) + assert p['data'][0] is not np.nan, "Precondition violation, left bound input error, results in nan!" + assert p['data'][1] is not np.nan, "Precondition violation, right bound input error, results in nan!" + x = drawUniformSample(p) + s = np.exp(x) + return s + + +def drawCategoricalSample(param): + return random.sample(param['data'], 1)[0] + + +def drawSample(param): + if param['domain'] == "uniform": + return drawUniformSample(param) + elif param['domain'] == "normal": + return drawNormalSample(param) + elif param['domain'] == "loguniform": + return drawLoguniformSample(param) + elif param['domain'] == "categorical": + return drawCategoricalSample(param) + else: + raise LookupError("Unknown domain {}".format(param['domain'])) + + class randomsearch_Solver(SolverPluginBase, IPlugin): trials = None best = None def __init__(self): SolverPluginBase.__init__(self) LOG.debug("initialized") def blackbox_function(self, params): loss = None self.trials.set_parameter(params) try: self.trials.start_iteration() loss = self.blackbox_function_template(self.data, params) self.trials.stop_iteration() if loss is None: self.trials.set_status(False) except Exception as e: LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e)) self.trials.set_status(False) self.trials.stop_iteration() self.trials.set_status(True) self.trials.set_loss(loss) return def execute_solver(self, parameter): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter))) self.trials = Trials() + if 'max_iterations' not in ProjectManager.__dict__: + msg = "Missing max_iteration entry in config, used default {}!".format(DEFAULTITERATIONS) + LOG.warning(msg) + print("WARNING: {}".format(msg)) + setattr(ProjectManager, 'max_iterations', DEFAULTITERATIONS) N = ProjectManager.max_iterations print("") try: for n in range(N): params = {} - for key, value in parameter.items(): - params[key] = value[n] + for name, p in parameter.items(): + params[name] = drawSample(p) self.blackbox_function(params) print("\r{}% done".format(int(round(100.0 / N * n))), end="") except Exception as e: msg = "internal error in randomsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) print("\r{}% done".format(100), end="") print("") def convert_results(self): return self.trials.get() diff --git a/hyppopy/tests/data/Iris/rf_grid_config.xml b/hyppopy/tests/data/Iris/rf_grid_config.xml index 2d5c55f..c0e8ed0 100644 --- a/hyppopy/tests/data/Iris/rf_grid_config.xml +++ b/hyppopy/tests/data/Iris/rf_grid_config.xml @@ -1,30 +1,30 @@ uniform - [1,300,10] + [1,300,3] int categorical [gini,entropy] str uniform - [1,50,10] + [1,50,3] int gridsearch D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris D:/Projects/Python/hyppopy/hyppopy/tests/data/Iris train_data.npy train_labels.npy \ No newline at end of file diff --git a/hyppopy/tests/data/Titanic/adaboost_config.xml b/hyppopy/tests/data/Titanic/adaboost_config.xml index 48b7b88..5840926 100644 --- a/hyppopy/tests/data/Titanic/adaboost_config.xml +++ b/hyppopy/tests/data/Titanic/adaboost_config.xml @@ -1,26 +1,26 @@ uniform [1, 100] int loguniform - [-10,3] + [0.0001,100] float 3 hyperopt D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic D:/Projects/Python/hyppopy/hyppopy/tests/data/Titanic train_cleaned.csv Survived \ No newline at end of file diff --git a/hyppopy/tests/test_settings_plugins.py b/hyppopy/tests/test_settings_plugins.py index 984c7c5..8aec1c7 100644 --- a/hyppopy/tests/test_settings_plugins.py +++ b/hyppopy/tests/test_settings_plugins.py @@ -1,129 +1,136 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import unittest +import numpy as np from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_SettingsParticle from hyppopy.plugins.gridsearch_settings_plugin import gridsearch_Settings class ProjectManagerTestSuite(unittest.TestCase): def setUp(self): self.hp = { 'UniformFloat': { 'domain': 'uniform', 'data': [0, 1, 10], 'type': 'float', }, 'UniformInt': { 'domain': 'uniform', 'data': [0, 7, 10], 'type': 'int', }, 'NormalFloat': { 'domain': 'normal', 'data': [0, 1, 10], 'type': 'float', }, 'NormalInt': { 'domain': 'normal', 'data': [0, 10, 10], 'type': 'int', }, 'LogFloat': { 'domain': 'loguniform', - 'data': [-5, 5, 10], + 'data': [0.01, np.e, 10], 'type': 'float', }, 'LogFloat': { 'domain': 'loguniform', - 'data': [-5, 5, 10], + 'data': [0.01, np.e, 10], 'type': 'float', }, 'LogInt': { 'domain': 'loguniform', - 'data': [0, 6, 10], + 'data': [0, 1000000, 10], 'type': 'int', }, 'CategoricalStr': { 'domain': 'categorical', 'data': ['a', 'b'], 'type': 'str', }, 'CategoricalInt': { 'domain': 'categorical', 'data': [0, 1], 'type': 'int', } } + self.truth = { 'UniformFloat': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], 'UniformInt': [0, 1, 2, 3, 4, 5, 6, 7, 8], 'NormalFloat': [0.0, 0.2592443381276233, 0.3673134565097225, 0.4251586871937128, 0.4649150940720099, 0.5, 0.5350849059279901, 0.5748413128062873, 0.6326865434902775, 0.7407556618723767, 1.0], 'NormalInt': [0, 3, 4, 5, 6, 7, 10], - 'LogFloat': [0.006737946999085467, 0.01831563888873418, 0.049787068367863944, 0.1353352832366127, 0.36787944117144233, - 1.0, 2.718281828459045, 7.38905609893065, 20.085536923187668, 54.598150033144236, 148.4131591025766], - 'LogInt': [1, 2, 3, 6, 11, 20, 37, 67, 122, 221, 403], + 'LogFloat': [0.010000000000000004, 0.017515778645640943, 0.030680250156309114, 0.053738847053080116, + 0.0941277749653705, 0.16487212707001322, 0.28878636825943366, 0.5058318102310787, + 0.8860038019931427, 1.551904647490817, 2.7182818284590575], + 'LogInt': [0, 2, 1259, 1000000], 'CategoricalStr': ['a', 'b'], 'CategoricalInt': [0, 1] } def test_gridsearch_settings(self): gss = gridsearch_Settings() gss.set_hyperparameter(self.hp) res = gss.get_hyperparameter() self.assertTrue('CategoricalInt' in res.keys()) self.assertTrue(len(res) == 1) self.assertTrue(0 in res['CategoricalInt'].keys()) self.assertTrue(1 in res['CategoricalInt'].keys()) self.assertTrue(len(res['CategoricalInt']) == 2) self.assertTrue('a' in res['CategoricalInt'][0]['CategoricalStr'].keys()) self.assertTrue('b' in res['CategoricalInt'][0]['CategoricalStr'].keys()) self.assertTrue(len(res['CategoricalInt'][0]['CategoricalStr']) == 2) self.assertTrue('a' in res['CategoricalInt'][1]['CategoricalStr'].keys()) self.assertTrue('b' in res['CategoricalInt'][1]['CategoricalStr'].keys()) self.assertTrue(len(res['CategoricalInt'][1]['CategoricalStr']) == 2) def check_truth(input_dict): for key, value in self.truth.items(): if not key.startswith('Categorical'): self.assertTrue(key in input_dict.keys()) + if key == 'LogFloat': + a=0 + if key == 'LogInt': + a=0 for n, v in enumerate(self.truth[key]): self.assertAlmostEqual(v, input_dict[key][n]) check_truth(res['CategoricalInt'][0]['CategoricalStr']['a']) check_truth(res['CategoricalInt'][1]['CategoricalStr']['a']) check_truth(res['CategoricalInt'][0]['CategoricalStr']['b']) check_truth(res['CategoricalInt'][1]['CategoricalStr']['b']) def test_gridsearch_particle(self): for name, data in self.hp.items(): gsp = gridsearch_SettingsParticle(name=name, domain=data['domain'], dtype=data['type'], data=data['data']) data = gsp.get() for n in range(len(self.truth[name])): self.assertAlmostEqual(data[n], self.truth[name][n]) def tearDown(self): pass if __name__ == '__main__': unittest.main() diff --git a/hyppopy/tests/test_usecases.py b/hyppopy/tests/test_usecases.py index bdc7da2..cc727a4 100644 --- a/hyppopy/tests/test_usecases.py +++ b/hyppopy/tests/test_usecases.py @@ -1,216 +1,217 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import shutil import unittest import tempfile import numpy as np from sklearn.svm import SVC from sklearn.metrics import accuracy_score from sklearn.datasets import load_breast_cancer from sklearn.ensemble import AdaBoostClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from hyppopy.projectmanager import ProjectManager from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") class ProjectManagerTestSuite(unittest.TestCase): def setUp(self): breast_cancer_data = load_breast_cancer() x = breast_cancer_data.data y = breast_cancer_data.target x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=23) self.root = os.path.join(tempfile.gettempdir(), 'test_data') - if os.path.isdir(self.root): - shutil.rmtree(self.root) - os.makedirs(self.root) + #if os.path.isdir(self.root): + #shutil.rmtree(self.root) + if not os.path.isdir(self.root): + os.makedirs(self.root) x_train_fname = os.path.join(self.root, 'x_train.npy') y_train_fname = os.path.join(self.root, 'y_train.npy') np.save(x_train_fname, x_train) np.save(y_train_fname, y_train) self.train = [x_train, y_train] self.test = [x_test, y_test] self.config = { "hyperparameter": {}, "settings": { "solver_plugin": { "max_iterations": 3, "use_plugin": "hyperopt", "output_dir": os.path.join(self.root, 'test_results') }, "custom": { "data_path": self.root, "data_name": "x_train.npy", "labels_name": "y_train.npy" } }} # def test_svc_usecase(self): # hyperparameter = { # "C": { # "domain": "uniform", # "data": [0.0001, 300.0], # "type": "float" # }, # "kernel": { # "domain": "categorical", # "data": ["linear", "poly", "rbf"], # "type": "str" # } # } # # self.config["hyperparameter"] = hyperparameter # ProjectManager.set_config(self.config) # uc = svc_usecase() # uc.run(save=True) # res, best = uc.get_results() # print("="*30) # print(best) # print("=" * 30) # clf = SVC(C=best['C'], kernel=hyperparameter['kernel']['data'][best['kernel']]) # clf.fit(self.train[0], self.train[1]) # train_predictions = clf.predict(self.test[0]) # acc = accuracy_score(self.test[1], train_predictions) # print("Accuracy: {:.4%}".format(acc)) # print("=" * 30) def test_randomforest_usecase(self): hyperparameter = { "n_estimators": { "domain": "uniform", "data": [1, 500], "type": "int" }, "criterion": { "domain": "categorical", "data": ["gini", "entropy"], "type": "str" }, "max_depth": { "domain": "uniform", "data": [1, 50], "type": "int" }, "max_features": { "domain": "categorical", "data": ["auto", "sqrt", "log2"], "type": "str" } } self.config["hyperparameter"] = hyperparameter ProjectManager.set_config(self.config) uc = randomforest_usecase() uc.run(save=False) res, best = uc.get_results() print("=" * 30) print(best) print("=" * 30) clf = RandomForestClassifier(n_estimators=best['n_estimators'], criterion=hyperparameter['criterion']['data'][best['criterion']], max_depth=best['max_depth'], max_features=best['max_features']) clf.fit(self.train[0], self.train[1]) print("feature importance:\n", clf.feature_importances_) train_predictions = clf.predict(self.test[0]) acc = accuracy_score(self.test[1], train_predictions) print("Accuracy: {:.4%}".format(acc)) print("=" * 30) def test_adaboost_usecase(self): hyperparameter = { "n_estimators": { "domain": "uniform", "data": [1, 300], "type": "int" }, "learning_rate": { "domain": "loguniform", - "data": [-10, 3], + "data": [0.01, 100], "type": "float" } } self.config["hyperparameter"] = hyperparameter ProjectManager.set_config(self.config) uc = adaboost_usecase() uc.run(save=True) res, best = uc.get_results() print("=" * 30) print(best) print("=" * 30) clf = AdaBoostClassifier(n_estimators=best['n_estimators'], learning_rate=best['learning_rate']) clf.fit(self.train[0], self.train[1]) train_predictions = clf.predict(self.test[0]) acc = accuracy_score(self.test[1], train_predictions) print("Accuracy: {:.4%}".format(acc)) print("=" * 30) def test_knc_usecase(self): hyperparameter = { "n_neighbors": { "domain": "uniform", "data": [1, 100], "type": "int" }, "weights": { "domain": "categorical", "data": ["uniform", "distance"], "type": "str" }, "algorithm": { "domain": "categorical", "data": ["auto", "ball_tree", "kd_tree", "brute"], "type": "str" } } self.config["hyperparameter"] = hyperparameter ProjectManager.set_config(self.config) uc = knc_usecase() uc.run(save=True) res, best = uc.get_results() print("=" * 30) print(best) print("=" * 30) clf = KNeighborsClassifier(n_neighbors=best['n_neighbors'], weights=hyperparameter['weights']['data'][best['weights']], algorithm=hyperparameter['algorithm']['data'][best['algorithm']]) clf.fit(self.train[0], self.train[1]) train_predictions = clf.predict(self.test[0]) acc = accuracy_score(self.test[1], train_predictions) print("Accuracy: {:.4%}".format(acc)) print("=" * 30) def tearDown(self): pass if __name__ == '__main__': unittest.main()