diff --git a/hyppopy/plugins/hyperopt_settings_plugin.py b/hyppopy/plugins/hyperopt_settings_plugin.py index 7ca6aff..2809ce6 100644 --- a/hyppopy/plugins/hyperopt_settings_plugin.py +++ b/hyppopy/plugins/hyperopt_settings_plugin.py @@ -1,64 +1,105 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging +import numpy as np from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from pprint import pformat try: from hyperopt import hp from yapsy.IPlugin import IPlugin except: LOG.warning("hyperopt package not installed, will ignore this plugin!") print("hyperopt package not installed, will ignore this plugin!") from hyppopy.settingspluginbase import SettingsPluginBase +from hyppopy.settingsparticle import SettingsParticle class hyperopt_Settings(SettingsPluginBase, IPlugin): def __init__(self): SettingsPluginBase.__init__(self) LOG.debug("initialized") def convert_parameter(self, input_dict): LOG.debug(f"convert input parameter\n\n\t{pformat(input_dict)}\n") solution_space = {} for name, content in input_dict.items(): - data = None - domain = None - domain_fn = None + particle = hyperopt_SettingsParticle(name=name) for key, value in content.items(): if key == 'domain': - domain = value - if value == 'uniform': - domain_fn = hp.uniform - if value == 'categorical': - domain_fn = hp.choice - if key == 'data': - data = value - if domain == 'categorical': - solution_space[name] = domain_fn(name, data) - else: - solution_space[name] = domain_fn(name, data[0], data[1]) + particle.domain = value + elif key == 'data': + particle.data = value + elif key == 'type': + particle.dtype = value + solution_space[name] = particle.get() return solution_space + +class hyperopt_SettingsParticle(SettingsParticle): + + def __init__(self, name=None, domain=None, dtype=None, data=None): + SettingsParticle.__init__(self, name, domain, dtype, data) + + def convert(self): + if self.domain == "uniform": + if self.dtype == "float" or self.dtype == "double": + return hp.uniform(self.name, self.data[0], self.data[1]) + elif self.dtype == "int": + data = list(np.arange(int(self.data[0]), int(self.data[1]+1))) + return hp.choice(self.name, data) + else: + msg = f"cannot convert the type {self.dtype} in domain {self.domain}" + LOG.error(msg) + raise LookupError(msg) + elif self.domain == "loguniform": + if self.dtype == "float" or self.dtype == "double": + return hp.loguniform(self.name, self.data[0], self.data[1]) + else: + msg = f"cannot convert the type {self.dtype} in domain {self.domain}" + LOG.error(msg) + raise LookupError(msg) + elif self.domain == "normal": + if self.dtype == "float" or self.dtype == "double": + return hp.normal(self.name, self.data[0], self.data[1]) + else: + msg = f"cannot convert the type {self.dtype} in domain {self.domain}" + LOG.error(msg) + raise LookupError(msg) + elif self.domain == "categorical": + if self.dtype == 'str': + return hp.choice(self.name, self.data) + elif self.dtype == 'bool': + data = [] + for elem in self.data: + if elem == "true" or elem == "True" or elem == 1 or elem == "1": + data .append(True) + elif elem == "false" or elem == "False" or elem == 0 or elem == "0": + data .append(False) + else: + msg = f"cannot convert the type {self.dtype} in domain {self.domain}, unknown bool type value" + LOG.error(msg) + raise LookupError(msg) + return hp.choice(self.name, data) diff --git a/hyppopy/plugins/optunity_settings_plugin.py b/hyppopy/plugins/optunity_settings_plugin.py index 5c5a939..da2e9d2 100644 --- a/hyppopy/plugins/optunity_settings_plugin.py +++ b/hyppopy/plugins/optunity_settings_plugin.py @@ -1,80 +1,117 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from pprint import pformat try: import optunity from yapsy.IPlugin import IPlugin except: LOG.warning("optunity package not installed, will ignore this plugin!") print("optunity package not installed, will ignore this plugin!") from hyppopy.settingspluginbase import SettingsPluginBase +from hyppopy.settingsparticle import SettingsParticle class optunity_Settings(SettingsPluginBase, IPlugin): def __init__(self): SettingsPluginBase.__init__(self) LOG.debug("initialized") def convert_parameter(self, input_dict): LOG.debug(f"convert input parameter\n\n\t{pformat(input_dict)}\n") # define function spliting input dict # into categorical and non-categorical def split_categorical(pdict): categorical = {} uniform = {} for name, pset in pdict.items(): for key, value in pset.items(): if key == 'domain' and value == 'categorical': categorical[name] = pset elif key == 'domain': uniform[name] = pset return categorical, uniform solution_space = {} # split input in categorical and non-categorical data cat, uni = split_categorical(input_dict) # build up dictionary keeping all non-categorical data uniforms = {} for key, value in uni.items(): for key2, value2 in value.items(): if key2 == 'data': uniforms[key] = value2 # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} tmp2 = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level tmp2[key] = tmp inner_level = tmp2 solution_space = tmp2 return solution_space + + +# class optunity_SettingsParticle(SettingsParticle): +# +# def __init__(self, name=None, domain=None, dtype=None, data=None): +# SettingsParticle.__init__(self, name, domain, dtype, data) +# +# def convert(self): +# if self.domain == "uniform": +# if self.dtype == "float" or self.dtype == "double": +# pass +# elif self.dtype == "int": +# pass +# else: +# msg = f"cannot convert the type {self.dtype} in domain {self.domain}" +# LOG.error(msg) +# raise LookupError(msg) +# elif self.domain == "loguniform": +# if self.dtype == "float" or self.dtype == "double": +# pass +# else: +# msg = f"cannot convert the type {self.dtype} in domain {self.domain}" +# LOG.error(msg) +# raise LookupError(msg) +# elif self.domain == "normal": +# if self.dtype == "float" or self.dtype == "double": +# pass +# else: +# msg = f"cannot convert the type {self.dtype} in domain {self.domain}" +# LOG.error(msg) +# raise LookupError(msg) +# elif self.domain == "categorical": +# if self.dtype == 'str': +# pass +# elif self.dtype == 'bool': +# pass diff --git a/hyppopy/plugins/optunity_solver_plugin.py b/hyppopy/plugins/optunity_solver_plugin.py index 53c0351..9b2779c 100644 --- a/hyppopy/plugins/optunity_solver_plugin.py +++ b/hyppopy/plugins/optunity_solver_plugin.py @@ -1,68 +1,69 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from pprint import pformat import optunity from yapsy.IPlugin import IPlugin from hyppopy.solverpluginbase import SolverPluginBase class optunity_Solver(SolverPluginBase, IPlugin): solver_info = None trials = None best = None status = None def __init__(self): SolverPluginBase.__init__(self) LOG.debug("initialized") def loss_function(self, **params): try: loss = self.loss(self.data, params) self.status.append('ok') return loss except Exception as e: + LOG.error(f"computing loss failed due to:\n {e}") self.status.append('fail') return 1e9 def execute_solver(self, parameter): LOG.debug(f"execute_solver using solution space:\n\n\t{pformat(parameter)}\n") self.status = [] try: self.best, self.trials, self.solver_info = optunity.minimize_structured(f=self.loss_function, num_evals=self.settings.max_iterations, search_space=parameter) except Exception as e: LOG.error(f"internal error in optunity.minimize_structured occured. {e}") raise BrokenPipeError(f"internal error in optunity.minimize_structured occured. {e}") def convert_results(self): solution = dict([(k, v) for k, v in self.best.items() if v is not None]) print('Solution\n========') print("\n".join(map(lambda x: "%s \t %s" % (x[0], str(x[1])), solution.items()))) print(f"Solver used: {self.solver_info['solver_name']}") print(f"Optimum: {self.trials.optimum}") print(f"Iterations used: {self.trials.stats['num_evals']}") print(f"Duration: {self.trials.stats['time']} s") diff --git a/hyppopy/settingsparticle.py b/hyppopy/settingsparticle.py new file mode 100644 index 0000000..b083300 --- /dev/null +++ b/hyppopy/settingsparticle.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import abc +import logging +from hyppopy.globals import DEBUGLEVEL +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + + +class SettingsParticle(object): + domains = ["uniform", "loguniform", "normal", "categorical"] + _name = None + _domain = None + _dtype = None + _data = None + + def __init__(self, name=None, domain=None, dtype=None, data=None): + if name is not None: + self.name = name + if domain is not None: + self.domain = domain + if dtype is not None: + self.dtype = dtype + if data is not None: + self.data = data + + @abc.abstractmethod + def convert(self): + raise NotImplementedError("the user has to implement this function") + + def get(self): + msg = None + if self.name is None: msg = "cannot convert unnamed parameter" + if self.domain is None: msg = "cannot convert parameter of empty domain" + if self.dtype is None: msg = "cannot convert parameter with unknown dtype" + if self.data is None: msg = "cannot convert parameter having no data" + if msg is not None: + LOG.error(msg) + raise LookupError(msg) + return self.convert() + + @property + def name(self): + return self._name + + @name.setter + def name(self, value): + self._name = value + + @property + def domain(self): + return self._domain + + @domain.setter + def domain(self, value): + self._domain = value + + @property + def dtype(self): + return self._dtype + + @dtype.setter + def dtype(self, value): + self._dtype = value + + @property + def data(self): + return self._data + + @data.setter + def data(self, value): + self._data = value diff --git a/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py b/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py index 1fb55db..683a2af 100644 --- a/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py +++ b/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py @@ -1,63 +1,65 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import cross_val_score import hyppopy.solverfactory as sfac def data_loader(path, data_name, labels_name): if data_name.endswith(".npy"): if not labels_name.endswith(".npy"): raise IOError("Expect both data_name and labels_name being of type .npy!") data = [np.load(os.path.join(path, data_name)), np.load(os.path.join(path, labels_name))] elif data_name.endswith(".csv"): try: dataset = pd.read_csv(os.path.join(path, data_name)) y = dataset[labels_name].values X = dataset.drop([labels_name], axis=1).values data = [X, y] except Exception as e: print("Precondition violation, this usage case expects as data_name a " "csv file and as label_name a name of a column in this csv table!") else: raise NotImplementedError("This combination of data_name and labels_name " "does not yet exist, feel free to add it") return data def randomforest_usecase(args): print("Execute Random Forest UseCase...") factory = sfac.SolverFactory.instance() solver = factory.get_solver(args.plugin) solver.read_parameter(args.config) data = data_loader(args.data, solver.settings.data_name, solver.settings.labels_name) solver.set_data(data) def rf_loss(data, params): + if "n_estimators" in params.keys(): + params["n_estimators"] = int(round(params["n_estimators"])) clf = RandomForestClassifier(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() solver.set_loss_function(rf_loss) solver.run() solver.get_results()