diff --git a/hyppopy/deepdict/deepdict.py b/hyppopy/deepdict/deepdict.py index 6b41db5..dce0817 100644 --- a/hyppopy/deepdict/deepdict.py +++ b/hyppopy/deepdict/deepdict.py @@ -1,382 +1,385 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os +import re import json import types import pprint import xmltodict from dicttoxml import dicttoxml from collections import OrderedDict import logging LOG = logging.getLogger('hyppopy') +from hyppopy.globals import DEEPDICT_XML_ROOT def convert_ordered2std_dict(obj): """ Helper function converting an OrderedDict into a standard lib dict. :param obj: [OrderedDict] """ for key, value in obj.items(): if isinstance(value, OrderedDict): obj[key] = dict(obj[key]) convert_ordered2std_dict(obj[key]) def check_dir_existance(dirname): """ Helper function to check if a directory exists, creating it if not. :param dirname: [str] full path of the directory to check """ if not os.path.exists(dirname): os.mkdir(dirname) class DeepDict(object): """ The DeepDict class represents a nested dictionary with additional functionality compared to a standard lib dict. The data can be accessed and changed vie a pathlike access and dumped or read to .json/.xml files. Initializing instances using defaults creates an empty DeepDict. Using in_data enables to initialize the object instance with data, where in_data can be a dict, or a filepath to a json or xml file. Using path sep the appearance of path passing can be changed, a default data access via path would look like my_dd['target/section/path'] with path_sep='.' like so my_dd['target.section.path'] :param in_data: [dict] or [str], input dict or filename :param path_sep: [str] path separator character """ _data = None _sep = "/" def __init__(self, in_data=None, path_sep="/"): self.clear() self._sep = path_sep LOG.debug(f"path separator is: {self._sep}") if in_data is not None: if isinstance(in_data, str): self.from_file(in_data) elif isinstance(in_data, dict): self.data = in_data def __str__(self): """ Enables print output for class instances, printing the instance data dict using pretty print :return: [str] """ return pprint.pformat(self.data) def __eq__(self, other): """ Overloads the == operator comparing the instance data dictionaries for equality :param other: [DeepDict] rhs :return: [bool] """ return self.data == other.data def __getitem__(self, path): """ Overloads the return of the [] operator for data access. This enables access the DeepDict instance like so: my_dd['target/section/path'] or my_dd[['target','section','path']] :param path: [str] or [list(str)], the path to the target data structure level/content :return: [object] """ return DeepDict.get_from_path(self.data, path, self.sep) def __setitem__(self, path, value=None): """ Overloads the setter for the [] operator for data assignment. :param path: [str] or [list(str)], the path to the target data structure level/content :param value: [object] rhs assignment object """ if isinstance(path, str): path = path.split(self.sep) if not isinstance(path, list) or isinstance(path, tuple): raise IOError("Input Error, expect list[str] type for path") if len(path) < 1: raise IOError("Input Error, missing section strings") if not path[0] in self._data.keys(): if value is not None and len(path) == 1: self._data[path[0]] = value else: self._data[path[0]] = {} tmp = self._data[path[0]] path.pop(0) while True: if len(path) == 0: break if path[0] not in tmp.keys(): if value is not None and len(path) == 1: tmp[path[0]] = value else: tmp[path[0]] = {} tmp = tmp[path[0]] else: tmp = tmp[path[0]] path.pop(0) def __len__(self): return len(self._data) def clear(self): """ clears the instance data """ LOG.debug("clear()") self._data = {} def from_file(self, fname): """ Loads data from file. Currently implemented .json and .xml file reader. :param fname: [str] filename """ if not isinstance(fname, str): raise IOError("Input Error, expect str type for fname") if fname.endswith(".json"): self.read_json(fname) elif fname.endswith(".xml"): self.read_xml(fname) else: LOG.error("Unknown filetype, expect [.json, .xml]") raise NotImplementedError("Unknown filetype, expect [.json, .xml]") def read_json(self, fname): """ Read json file :param fname: [str] input filename """ if not isinstance(fname, str): raise IOError("Input Error, expect str type for fname") if not os.path.isfile(fname): raise IOError(f"File {fname} not found!") LOG.debug(f"read_json({fname})") try: with open(fname, "r") as read_file: self._data = json.load(read_file) DeepDict.value_traverse(self.data, callback=DeepDict.parse_type) except Exception as e: LOG.error(f"Error while reading json file {fname} or while converting types") raise IOError("Error while reading json file {fname} or while converting types") def read_xml(self, fname): """ Read xml file :param fname: [str] input filename """ if not isinstance(fname, str): raise IOError("Input Error, expect str type for fname") if not os.path.isfile(fname): raise IOError(f"File {fname} not found!") LOG.debug(f"read_xml({fname})") try: with open(fname, "r") as read_file: xml = "".join(read_file.readlines()) self._data = xmltodict.parse(xml, attr_prefix='') DeepDict.value_traverse(self.data, callback=DeepDict.parse_type) except Exception as e: LOG.error(f"Error while reading xml file {fname} or while converting types") raise IOError("Error while reading json file {fname} or while converting types") # if written with DeepDict, the xml contains a root node called # deepdict which should beremoved for consistency reasons - if "deepdict" in self._data.keys(): - self._data = self._data["deepdict"] + if DEEPDICT_XML_ROOT in self._data.keys(): + self._data = self._data[DEEPDICT_XML_ROOT] self._data = dict(self.data) # convert the orderes dict structure to a default dict for consistency reasons convert_ordered2std_dict(self.data) def to_file(self, fname): """ Write to file, type is determined by checking the filename ending. Currently implemented is writing to json and to xml. :param fname: [str] filename """ if not isinstance(fname, str): raise IOError("Input Error, expect str type for fname") if fname.endswith(".json"): self.write_json(fname) elif fname.endswith(".xml"): self.write_xml(fname) else: LOG.error(f"Unknown filetype, expect [.json, .xml]") raise NotImplementedError("Unknown filetype, expect [.json, .xml]") def write_json(self, fname): """ Dump data to json file. :param fname: [str] filename """ if not isinstance(fname, str): raise IOError("Input Error, expect str type for fname") check_dir_existance(os.path.dirname(fname)) try: LOG.debug(f"write_json({fname})") with open(fname, "w") as write_file: json.dump(self.data, write_file) except Exception as e: LOG.error(f"Failed dumping to json file: {fname}") raise e def write_xml(self, fname): """ Dump data to json file. :param fname: [str] filename """ if not isinstance(fname, str): raise IOError("Input Error, expect str type for fname") check_dir_existance(os.path.dirname(fname)) xml = dicttoxml(self.data, custom_root='deepdict', attr_type=False) LOG.debug(f"write_xml({fname})") try: with open(fname, "w") as write_file: write_file.write(xml.decode("utf-8")) except Exception as e: LOG.error(f"Failed dumping to xml file: {fname}") raise e def has_section(self, section): return DeepDict.has_key(self.data, section) @staticmethod def get_from_path(data, path, sep="/"): """ Implements a nested dict access via a path like string like so path='target/section/path' which is equivalent to my_dict['target']['section']['path']. :param data: [dict] input dictionary :param path: [str] pathlike string :param sep: [str] path separator, default='/' :return: [object] """ if not isinstance(data, dict): LOG.error("Input Error, expect dict type for data") raise IOError("Input Error, expect dict type for data") if isinstance(path, str): path = path.split(sep) if not isinstance(path, list) or isinstance(path, tuple): LOG.error(f"Input Error, expect list[str] type for path: {path}") raise IOError("Input Error, expect list[str] type for path") if not DeepDict.has_key(data, path[-1]): LOG.error(f"Input Error, section {path[-1]} does not exist in dictionary") raise IOError(f"Input Error, section {path[-1]} does not exist in dictionary") try: for k in path: data = data[k] except Exception as e: LOG.error(f"Failed retrieving data from path {path} due to {e}") raise LookupError(f"Failed retrieving data from path {path} due to {e}") return data @staticmethod def has_key(data, section, already_found=False): """ Checks if input dictionary has a key called section. The already_found parameter is for internal recursion checks. :param data: [dict] input dictionary :param section: [str] key string to search for :param already_found: recursion criteria check :return: [bool] section found """ if not isinstance(data, dict): LOG.error("Input Error, expect dict type for obj") raise IOError("Input Error, expect dict type for obj") if not isinstance(section, str): LOG.error(f"Input Error, expect dict type for obj {section}") raise IOError(f"Input Error, expect dict type for obj {section}") if already_found: return True found = False for key, value in data.items(): if key == section: found = True if isinstance(value, dict): found = DeepDict.has_key(data[key], section, found) return found @staticmethod def value_traverse(data, callback=None): """ Dictionary filter function, walks through the input dict (obj) calling the callback function for each value. The callback function return is assigned the the corresponding dict value. :param data: [dict] input dictionary :param callback: """ if not isinstance(data, dict): LOG.error("Input Error, expect dict type for obj") raise IOError("Input Error, expect dict type for obj") if not isinstance(callback, types.FunctionType): LOG.error("Input Error, expect function type for callback") raise IOError("Input Error, expect function type for callback") for key, value in data.items(): if isinstance(value, dict): DeepDict.value_traverse(data[key], callback) else: data[key] = callback(value) @staticmethod def parse_type(string): """ Type convert input string to float, int, list, tuple or string :param string: [str] input string :return: [T] converted output """ try: a = float(string) try: b = int(string) except ValueError: return float(string) if a == b: return b return a except ValueError: if string.startswith("[") and string.endswith("]"): + string = re.sub(' ', '', string) elements = string[1:-1].split(",") li = [] for e in elements: li.append(DeepDict.parse_type(e)) return li elif string.startswith("(") and string.endswith(")"): elements = string[1:-1].split(",") li = [] for e in elements: li.append(DeepDict.parse_type(e)) return tuple(li) return string @property def data(self): return self._data @data.setter def data(self, value): if not isinstance(value, dict): LOG.error(f"Input Error, expect dict type for value, but got {type(value)}") raise IOError(f"Input Error, expect dict type for value, but got {type(value)}") self.clear() self._data = value @property def sep(self): return self._sep @sep.setter def sep(self, value): if not isinstance(value, str): LOG.error(f"Input Error, expect str type for value, but got {type(value)}") raise IOError(f"Input Error, expect str type for value, but got {type(value)}") self._sep = value diff --git a/hyppopy/globals.py b/hyppopy/globals.py index b3a656e..a694c1f 100644 --- a/hyppopy/globals.py +++ b/hyppopy/globals.py @@ -1,28 +1,31 @@ # DKFZ # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # -*- coding: utf-8 -*- import os import sys import logging ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, ROOT) PLUGIN_DEFAULT_DIR = os.path.join(ROOT, *("hyppopy", "plugins")) +TESTDATA_DIR = os.path.join(ROOT, *("hyppopy", "tests", "data")) +SETTINGSPATH = "settings/solver" +DEEPDICT_XML_ROOT = "hyppopy" LOGFILENAME = os.path.join(ROOT, 'logfile.log') DEBUGLEVEL = logging.DEBUG logging.basicConfig(filename=LOGFILENAME, filemode='w', format='%(levelname)s: %(name)s - %(message)s') diff --git a/hyppopy/plugins/hyperopt_solver_plugin.py b/hyppopy/plugins/hyperopt_solver_plugin.py index b99abc5..6d82017 100644 --- a/hyppopy/plugins/hyperopt_solver_plugin.py +++ b/hyppopy/plugins/hyperopt_solver_plugin.py @@ -1,68 +1,65 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from pprint import pformat +from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials +from yapsy.IPlugin import IPlugin -try: - from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials - from yapsy.IPlugin import IPlugin -except: - LOG.warning("hyperopt package not installed, will ignore this plugin!") - print("hyperopt package not installed, will ignore this plugin!") from hyppopy.solverpluginbase import SolverPluginBase class hyperopt_Solver(SolverPluginBase, IPlugin): trials = None best = None def __init__(self): SolverPluginBase.__init__(self) LOG.debug("initialized") def loss_function(self, params): try: loss = self.loss(self.data, params) status = STATUS_OK except Exception as e: status = STATUS_FAIL return {'loss': loss, 'status': status} def execute_solver(self, parameter): LOG.debug(f"execute_solver using solution space:\n\n\t{pformat(parameter)}\n") self.trials = Trials() + try: self.best = fmin(fn=self.loss_function, space=parameter, algo=tpe.suggest, - max_evals=50, + max_evals=self.max_iterations, trials=self.trials) except Exception as e: LOG.error(f"internal error in hyperopt.fmin occured. {e}") raise BrokenPipeError(f"internal error in hyperopt.fmin occured. {e}") def convert_results(self): solution = dict([(k, v) for k, v in self.best.items() if v is not None]) print('Solution\n========') print("\n".join(map(lambda x: "%s \t %s" % (x[0], str(x[1])), solution.items()))) diff --git a/hyppopy/plugins/optunity_solver_plugin.py b/hyppopy/plugins/optunity_solver_plugin.py index 10322dc..7d1c6d0 100644 --- a/hyppopy/plugins/optunity_solver_plugin.py +++ b/hyppopy/plugins/optunity_solver_plugin.py @@ -1,73 +1,68 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from pprint import pformat -try: - import optunity - from yapsy.IPlugin import IPlugin -except: - LOG.warning("optunity package not installed, will ignore this plugin!") - print("optunity package not installed, will ignore this plugin!") - +import optunity +from yapsy.IPlugin import IPlugin from hyppopy.solverpluginbase import SolverPluginBase class optunity_Solver(SolverPluginBase, IPlugin): solver_info = None trials = None best = None status = None def __init__(self): SolverPluginBase.__init__(self) LOG.debug("initialized") def loss_function(self, **params): try: loss = self.loss(self.data, params) self.status.append('ok') return loss except Exception as e: self.status.append('fail') return 1e9 def execute_solver(self, parameter): LOG.debug(f"execute_solver using solution space:\n\n\t{pformat(parameter)}\n") self.status = [] try: self.best, self.trials, self.solver_info = optunity.minimize_structured(f=self.loss_function, - num_evals=50, + num_evals=self.max_iterations, search_space=parameter) except Exception as e: LOG.error(f"internal error in optunity.minimize_structured occured. {e}") raise BrokenPipeError(f"internal error in optunity.minimize_structured occured. {e}") def convert_results(self): solution = dict([(k, v) for k, v in self.best.items() if v is not None]) print('Solution\n========') print("\n".join(map(lambda x: "%s \t %s" % (x[0], str(x[1])), solution.items()))) print(f"Solver used: {self.solver_info['solver_name']}") print(f"Optimum: {self.trials.optimum}") print(f"Iterations used: {self.trials.stats['num_evals']}") print(f"Duration: {self.trials.stats['time']} s") diff --git a/hyppopy/settingspluginbase.py b/hyppopy/settingspluginbase.py index 0b3ee28..909f646 100644 --- a/hyppopy/settingspluginbase.py +++ b/hyppopy/settingspluginbase.py @@ -1,70 +1,78 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import abc import os import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) +from hyppopy.globals import SETTINGSPATH + from hyppopy.deepdict.deepdict import DeepDict class SettingsPluginBase(object): _data = None _name = None def __init__(self): self._data = DeepDict() @abc.abstractmethod def convert_parameter(self): raise NotImplementedError('users must define convert_parameter to use this base class') def get_hyperparameter(self): - return self.convert_parameter(self.data.data["hyperparameter"]) + return self.convert_parameter(self.data["hyperparameter"]) def set(self, data): self.data.clear() self.data.data = data def read(self, fname): + self.data.clear() self.data.from_file(fname) def write(self, fname): self.data.to_file(fname) + def set_attributes(self, cls): + attrs_sec = self.data[SETTINGSPATH] + for key, value in attrs_sec.items(): + setattr(cls, key, value) + @property def data(self): return self._data @data.setter def data(self, value): return self._data @property def name(self): return self._name @name.setter def name(self, value): if not isinstance(value, str): LOG.error(f"Invalid input, str type expected for value, got {type(value)} instead") raise IOError(f"Invalid input, str type expected for value, got {type(value)} instead") self._name = value diff --git a/hyppopy/solver.py b/hyppopy/solver.py index 476693d..8a93e5e 100644 --- a/hyppopy/solver.py +++ b/hyppopy/solver.py @@ -1,81 +1,83 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class Solver(object): _name = None _solver_plugin = None _settings_plugin = None def __init__(self): pass def set_data(self, data): self.solver.set_data(data) def set_parameters(self, params): self.settings.set(params) + self.settings.set_attributes(self.solver) def read_parameter(self, fname): self.settings.read(fname) + self.settings.set_attributes(self.solver) def set_loss_function(self, loss_func): self.solver.set_loss_function(loss_func) def run(self): self.solver.run(self.settings.get_hyperparameter()) def get_results(self): self.solver.get_results() @property def is_ready(self): return self.solver is not None and self.settings is not None @property def solver(self): return self._solver_plugin @solver.setter def solver(self, value): self._solver_plugin = value @property def settings(self): return self._settings_plugin @settings.setter def settings(self, value): self._settings_plugin = value @property def name(self): return self._name @name.setter def name(self, value): if not isinstance(value, str): LOG.error(f"Invalid input, str type expected for value, got {type(value)} instead") raise IOError(f"Invalid input, str type expected for value, got {type(value)} instead") self._name = value diff --git a/hyppopy/tests/data/iris_svc_parameter.json b/hyppopy/tests/data/iris_svc_parameter.json index eb37b87..550a498 100644 --- a/hyppopy/tests/data/iris_svc_parameter.json +++ b/hyppopy/tests/data/iris_svc_parameter.json @@ -1,17 +1,22 @@ {"hyperparameter": { "C": { "domain": "uniform", "data": "[0,20]", "type": "float" }, "gamma": { "domain": "uniform", "data": "[0.0001,20.0]", "type": "float" }, "kernel": { "domain": "categorical", "data": "[linear, sigmoid, poly, rbf]", "type": "str" } -}} \ No newline at end of file +}, +"settings": { + "solver": { + "max_iterations": "300" + } +}} \ No newline at end of file diff --git a/hyppopy/tests/data/iris_svc_parameter.xml b/hyppopy/tests/data/iris_svc_parameter.xml new file mode 100644 index 0000000..1d3217c --- /dev/null +++ b/hyppopy/tests/data/iris_svc_parameter.xml @@ -0,0 +1,24 @@ + + + + uniform + [0,20] + float + + + uniform + [0.0001,20.0] + float + + + categorical + [linear,sigmoid,poly,rbf] + str + + + + + 50 + + + \ No newline at end of file diff --git a/hyppopy/tests/test_solver_factory.py b/hyppopy/tests/test_solver_factory.py index 5527935..c5dbe2c 100644 --- a/hyppopy/tests/test_solver_factory.py +++ b/hyppopy/tests/test_solver_factory.py @@ -1,78 +1,73 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import unittest from sklearn.svm import SVC from sklearn import datasets from sklearn.model_selection import cross_val_score from sklearn.model_selection import train_test_split from hyppopy.solverfactory import SolverFactory -from hyppopy.globals import ROOT -TESTPARAMFILE = os.path.join(ROOT, *('hyppopy', 'tests', 'data', 'iris_svc_parameter.json')) +from hyppopy.globals import TESTDATA_DIR +TESTPARAMFILE = os.path.join(TESTDATA_DIR, 'iris_svc_parameter.xml') + +from hyppopy.deepdict.deepdict import DeepDict class SolverFactoryTestSuite(unittest.TestCase): def setUp(self): pass def test_solver_loading(self): factory = SolverFactory.instance() names = factory.list_solver() self.assertTrue("hyperopt" in names) self.assertTrue("optunity" in names) def test_iris_solver_execution(self): iris = datasets.load_iris() X, X_test, y, y_test = train_test_split(iris.data, iris.target, test_size=0.1, random_state=42) my_IRIS_dta = [X, y] - my_SVC_parameter = {'hyperparameter': { - 'C': {'domain': 'uniform', 'data': [0, 20], 'type': 'float'}, - 'gamma': {'domain': 'uniform', 'data': [0.0001, 20.0], 'type': 'float'}, - 'kernel': {'domain': 'categorical', 'data': ['linear', 'sigmoid', 'poly', 'rbf'], 'type': 'str'} - }} - def my_SVC_loss_func(data, params): clf = SVC(**params) return -cross_val_score(clf, data[0], data[1], cv=3).mean() factory = SolverFactory.instance() - names = factory.list_solver() solver = factory.get_solver('optunity') solver.set_data(my_IRIS_dta) solver.read_parameter(TESTPARAMFILE) solver.set_loss_function(my_SVC_loss_func) solver.run() solver.get_results() solver = factory.get_solver('hyperopt') solver.set_data(my_IRIS_dta) - solver.set_parameters(my_SVC_parameter) + solver.read_parameter(TESTPARAMFILE) solver.set_loss_function(my_SVC_loss_func) solver.run() solver.get_results() if __name__ == '__main__': unittest.main()