diff --git a/__main__.py b/__main__.py index f1d65ba..3643368 100644 --- a/__main__.py +++ b/__main__.py @@ -1,91 +1,91 @@ #!/usr/bin/env python # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import sys import time import argparse ROOT = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) sys.path.append(ROOT) from hyppopy.projectmanager import ProjectManager from hyppopy.workflows.svc_usecase.svc_usecase import svc_usecase from hyppopy.workflows.knc_usecase.knc_usecase import knc_usecase from hyppopy.workflows.adaboost_usecase.adaboost_usecase import adaboost_usecase from hyppopy.workflows.unet_usecase.unet_usecase import unet_usecase from hyppopy.workflows.randomforest_usecase.randomforest_usecase import randomforest_usecase from hyppopy.workflows.imageregistration_usecase.imageregistration_usecase import imageregistration_usecase def print_warning(msg): print("\n!!!!! WARNING !!!!!") print(msg) sys.exit() def args_check(args): if not args.workflow: print_warning("No workflow specified, check --help") if not args.config: print_warning("Missing config parameter, check --help") if not os.path.isfile(args.config): print_warning(f"Couldn't find configfile ({args.config}), please check your input --config") if __name__ == "__main__": - parser = argparse.ArgumentParser(description='UNet Hyppopy UseCase Example Optimization.') + parser = argparse.ArgumentParser(description='Hyppopy UseCase Examples Executable.') parser.add_argument('-w', '--workflow', type=str, help='workflow to be executed') parser.add_argument('-o', '--output', type=str, default=None, help='output path to store result') parser.add_argument('-c', '--config', type=str, help='config filename, .xml or .json formats are supported.' 'pass a full path filename or the filename only if the' 'configfile is in the data folder') args = parser.parse_args() args_check(args) ProjectManager.read_config(args.config) if args.output is not None: ProjectManager.register_member("output_dir", args.output) if args.workflow == "svc_usecase": uc = svc_usecase() elif args.workflow == "randomforest_usecase": uc = randomforest_usecase() elif args.workflow == "knc_usecase": uc = knc_usecase() elif args.workflow == "adaboost_usecase": uc = adaboost_usecase() elif args.workflow == "unet_usecase": uc = unet_usecase() elif args.workflow == "imageregistration_usecase": uc = imageregistration_usecase() else: print("No workflow called {} found!".format(args.workflow)) sys.exit() print("\nStart optimization...") start = time.process_time() uc.run(save=True) end = time.process_time() print("Finished optimization!\n") print("Total Time: {}s\n".format(end-start)) res, best = uc.get_results() print("---- Optimal Parameter -----\n") for p in best.items(): print(" - {}\t:\t{}".format(p[0], p[1])) diff --git a/examples/quality_tests.py b/examples/quality_tests.py new file mode 100644 index 0000000..d1085ca --- /dev/null +++ b/examples/quality_tests.py @@ -0,0 +1,345 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + + + +import os +import sys +import time +import argparse +import tempfile +import numpy as np +import pandas as pd + + +try: + import hyppopy as hp + from hyppopy.globals import ROOT + from hyppopy.virtualfunction import VirtualFunction +except Exception as e: + sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + import hyppopy as hp + from hyppopy.globals import ROOT + from hyppopy.virtualfunction import VirtualFunction + +TEMP = tempfile.gettempdir() +DATADIR = os.path.join(os.path.join(ROOT, os.path.join('hyppopy', 'virtualparameterspace')), "6D") + +vfunc = VirtualFunction() +vfunc.load_images(DATADIR) +minima = vfunc.minima() +# for i in range(6): +# mini = minima[i] +# vfunc.plot(i, title="axis_{} min_x={} min_loss={}".format(str(i).zfill(2), np.mean(mini[0]), mini[1])) + + +def blackboxfunction(data, params): + return sum(vfunc(*params.values())) + + +def getConfig(*args, **kwargs): + if 'output_dir' in kwargs.keys() and kwargs['output_dir'] is not None: + output_dir = kwargs['output_dir'] + else: + output_dir = TEMP + if 'plugin' in kwargs.keys(): + plugin = kwargs['plugin'] + else: + plugin = 'hyperopt' + + max_iterations = 0 + if 'max_iterations' in kwargs.keys(): + max_iterations = kwargs['max_iterations'] + + if len(args) < 6: + print("Missing hyperparameter abortion!") + sys.exit() + + config = { + "hyperparameter": { + "axis_0": { + "domain": "uniform", + "data": args[0], + "type": "float" + }, + "axis_1": { + "domain": "uniform", + "data": args[1], + "type": "float" + }, + "axis_2": { + "domain": "uniform", + "data": args[2], + "type": "float" + }, + "axis_3": { + "domain": "uniform", + "data": args[3], + "type": "float" + }, + "axis_4": { + "domain": "uniform", + "data": args[4], + "type": "float" + }, + "axis_5": { + "domain": "uniform", + "data": args[5], + "type": "float" + } + }, + "settings": { + "solver_plugin": { + "max_iterations": max_iterations, + "use_plugin": plugin, + "output_dir": output_dir + } + } + } + return config + + +def test_randomsearch(output_dir): + print("#" * 30) + print("# RANDOMSEARCH") + print("# output_dir={}".format(output_dir)) + print("#" * 30) + + ranges = [[0, 1], + [0, 800], + [-1, 1], + [0, 5], + [0, 10000], + [0, 10]] + args = {'plugin': 'randomsearch', 'output_dir': output_dir} + config = getConfig(*ranges, **args) + return config + + +def test_hyperopt(output_dir): + print("#" * 30) + print("# HYPEROPT") + print("# output_dir={}".format(output_dir)) + print("#" * 30) + + ranges = [[0, 1], + [0, 800], + [-1, 1], + [0, 5], + [0, 10000], + [0, 10]] + args = {'plugin': 'hyperopt', 'output_dir': output_dir} + config = getConfig(*ranges, **args) + return config + + +def test_optunity(output_dir): + print("#" * 30) + print("# OPTUNITY") + print("# output_dir={}".format(output_dir)) + print("#" * 30) + + ranges = [[0, 1], + [0, 800], + [-1, 1], + [0, 5], + [0, 10000], + [0, 10]] + args = {'plugin': 'optunity', 'output_dir': output_dir} + config = getConfig(*ranges, **args) + return config + + +def analyse_iteration_characteristics(configs): + N = 50 + num_of_iterations = [5, 10, 25, 50, 100, 250, 500, 750, 1000, 1500, 2000] + results = {'iteration': [], + 'time_overhead': [], + 'time_overhead_std': [], + 'accuracy': [], + 'accuracy_std': [], + 'plugin': []} + + accuracies = {} + time_overheads = {} + for plugin in configs.keys(): + accuracies[plugin] = [] + time_overheads[plugin] = [] + + for it in num_of_iterations: + for plugin, config in configs.items(): + print("\riteration loop: {} for plugin {}".format(it, plugin)) + for p, v in accuracies.items(): + v.clear() + for p, v in time_overheads.items(): + v.clear() + for n in range(N): + print("\rrepeat loop: {}".format(n), end="") + config["settings"]["solver_plugin"]["max_iterations"] = it + if not hp.ProjectManager.set_config(config): + print("Invalid config dict!") + sys.exit() + + solver = hp.SolverFactory.get_solver() + solver.set_loss_function(blackboxfunction) + solver.set_data(None) + + start = time.process_time() + solver.run() + end = time.process_time() + time_overheads[plugin].append(end-start) + res, best = solver.get_results() + best_loss = 0 + for i, p in enumerate(best.items()): + best_loss += minima[i][1] + reached_loss = np.min(res["losses"].values) + accuracies[plugin].append(100.0/best_loss*reached_loss) + + print("\r") + results['iteration'].append(it) + results['time_overhead'].append(np.mean(time_overheads[plugin])) + results['accuracy'].append(np.mean(accuracies[plugin])) + results['time_overhead_std'].append(np.std(time_overheads[plugin])) + results['accuracy_std'].append(np.std(accuracies[plugin])) + results['plugin'].append(plugin) + + return results + + +def analyse_random_normal_search(output_dir): + config = { + "hyperparameter": { + "axis_0": { + "domain": "normal", + "data": [0.0, 0.2], + "type": "float" + }, + "axis_1": { + "domain": "normal", + "data": [500, 700], + "type": "float" + }, + "axis_2": { + "domain": "normal", + "data": [-0.2, 0.9], + "type": "float" + }, + "axis_3": { + "domain": "normal", + "data": [0.0, 3.0], + "type": "float" + }, + "axis_4": { + "domain": "normal", + "data": [6000, 10000], + "type": "float" + }, + "axis_5": { + "domain": "normal", + "data": [3, 7], + "type": "float" + } + }, + "settings": { + "solver_plugin": { + "max_iterations": 0, + "use_plugin": 'randomsearch', + "output_dir": output_dir + } + } + } + + N = 50 + num_of_iterations = [5, 10, 25, 50, 100, 250, 500, 750, 1000, 1500, 2000] + + results = {'iteration': [], + 'time_overhead': [], + 'time_overhead_std': [], + 'accuracy': [], + 'accuracy_std': []} + + accuracies = [] + time_overheads = [] + for it in num_of_iterations: + config["settings"]["solver_plugin"]["max_iterations"] = it + print("\riteration loop: {}".format(it)) + accuracies.clear() + time_overheads.clear() + for n in range(N): + print("\rrepeat loop: {}".format(n), end="") + if not hp.ProjectManager.set_config(config): + print("Invalid config dict!") + sys.exit() + + solver = hp.SolverFactory.get_solver() + solver.set_loss_function(blackboxfunction) + solver.set_data(None) + + start = time.process_time() + solver.run() + end = time.process_time() + time_overheads.append(end - start) + res, best = solver.get_results() + best_loss = 0 + for i, p in enumerate(best.items()): + best_loss += minima[i][1] + reached_loss = np.min(res["losses"].values) + accuracies.append(100.0 / best_loss * reached_loss) + + print("\r") + results['iteration'].append(it) + results['time_overhead'].append(np.mean(time_overheads)) + results['accuracy'].append(np.mean(accuracies)) + results['time_overhead_std'].append(np.std(time_overheads)) + results['accuracy_std'].append(np.std(accuracies)) + + return results + + +if __name__ == "__main__": + print("") + parser = argparse.ArgumentParser(description='Hyppopy Quality Test Executable') + parser.add_argument('-o', '--output', type=str, default=None, help='output path to store result') + parser.add_argument('-p', '--plugin', type=str, default=None, help='if set analysis is only executed on this plugin') + args = parser.parse_args() + + do_analyse_iteration_characteristics = True + do_analyse_random_normal_search = False + + funcs = [x for x in locals().keys() if x.startswith("test_")] + configs = {} + for f in funcs: + if args.plugin is not None: + if not f.endswith(args.plugin): + continue + configs[f.split("_")[1]] = locals()[f](args.output) + + if do_analyse_iteration_characteristics: + start = time.process_time() + data = analyse_iteration_characteristics(configs) + end = time.process_time() + print("Total duration analyse_iteration_characteristics: {} min".format((end-start)/60)) + df = pd.DataFrame.from_dict(data) + fname = os.path.join(args.output, "analyse_iteration_characteristics.csv") + df.to_csv(fname, index=False) + + if do_analyse_random_normal_search: + start = time.process_time() + data = analyse_random_normal_search(args.output) + end = time.process_time() + print("Total duration analyse_random_normal_search: {} min".format((end - start) / 60)) + df = pd.DataFrame.from_dict(data) + fname = os.path.join(args.output, "analyse_random_normal_search.csv") + df.to_csv(fname, index=False) diff --git a/hyppopy/helpers.py b/hyppopy/helpers.py index db68299..3226c65 100644 --- a/hyppopy/helpers.py +++ b/hyppopy/helpers.py @@ -1,222 +1,246 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + import copy import time import itertools import numpy as np from numpy import argmin, argmax, unique from collections import OrderedDict, abc def gaussian(x, mu, sigma): return 1.0/(sigma * np.sqrt(2*np.pi))*np.exp(-(x-mu)**2/(2*sigma**2)) def gaussian_axis_sampling(a, b, N): center = a + (b - a) / 2.0 delta = (b - a) / N bn = b - center xn = np.arange(0, bn, delta) dn = [] for x in xn: dn.append(1/gaussian(x, 0, bn/2.5)) dn = np.array(dn) dn /= np.sum(dn) dn *= bn axis = [0] for x in dn: axis.append(x+axis[-1]) axis.insert(0, -axis[-1]) axis = np.array(axis) axis += center return axis def log_axis_sampling(a, b, N): if a == 0: a += 1e-23 assert a > 0, "Precondition Violation, a < 0!" assert a < b, "Precondition Violation, a > b!" assert b > 0, "Precondition Violation, b < 0!" lexp = np.log(a) rexp = np.log(b) assert lexp is not np.nan, "Precondition violation, left bound input error, results in nan!" assert rexp is not np.nan, "Precondition violation, right bound input error, results in nan!" delta = (rexp - lexp) / N logrange = np.arange(lexp, rexp + delta, delta) for n in range(logrange.shape[0]): logrange[n] = np.exp(logrange[n]) return logrange def sample_domain(start, stop, count, ftype="uniform"): assert stop > start, "Precondition Violation, stop <= start not allowed!" assert count > 0, "Precondition Violation, N <= 0 not allowed!" if ftype == 'uniform': delta = (stop - start)/count return np.arange(start, stop + delta, delta) elif ftype == 'loguniform': return log_axis_sampling(start, stop, count) elif ftype == 'normal': return gaussian_axis_sampling(start, stop, count) raise IOError("Precondition Violation, unknown sampling function type!") + class Trials(object): def __init__(self): self.loss = [] self.duration = [] self.status = [] self.parameter = [] self.best = None self._tick = None def start_iteration(self): self._tick = time.process_time() def stop_iteration(self): if self._tick is None: return self.duration.append(time.process_time()-self._tick) self._tick = None def set_status(self, status=True): self.status.append(status) def set_parameter(self, params): self.parameter.append(params) def set_loss(self, value): self.loss.append(value) def get(self): + msg = None if len(self.loss) <= 0: - raise Exception("Empty solver results!") - if len(self.loss) != len(self.duration) or len(self.loss) != len(self.parameter) or len(self.loss) != len(self.status): - raise Exception("Inconsistent results in gridsearch solver!") + msg = "Empty solver results!" + if len(self.loss) != len(self.duration): + msg = "Inconsistent results! len(self.loss) != len(self.duration) -> {} != {}".format(len(self.loss), len(self.duration)) + if len(self.loss) != len(self.parameter): + msg = "Inconsistent results! len(self.loss) != len(self.parameter) -> {} != {}".format(len(self.loss), len(self.parameter)) + if len(self.loss) != len(self.status): + msg = "Inconsistent results! len(self.loss) != len(self.status) -> {} != {}".format(len(self.loss), len(self.status)) + if msg is not None: + raise Exception(msg) + best_index = argmin(self.loss) best = self.parameter[best_index] worst_loss = self.loss[argmax(self.loss)] for n in range(len(self.status)): if not self.status[n]: self.loss[n] = worst_loss res = { 'losses': self.loss, 'duration': self.duration } is_string = [] for key, value in self.parameter[0].items(): res[key] = [] if isinstance(value, str): is_string.append(key) for p in self.parameter: for key, value in p.items(): res[key].append(value) for key in is_string: uniques = unique(res[key]) lookup = {} for n, p in enumerate(uniques): lookup[p] = n for n in range(len(res[key])): res[key][n] = lookup[res[key][n]] return res, best class NestedDictUnfolder(object): def __init__(self, nested_dict): self._nested_dict = nested_dict self._categories = [] self._values = OrderedDict() self._tree_leafs = [] NestedDictUnfolder.nested_dict_iter(self._nested_dict, self) @staticmethod def nested_dict_iter(nested, unfolder): for key, value in nested.items(): if isinstance(value, abc.Mapping): unfolder.add_category(key) NestedDictUnfolder.nested_dict_iter(value, unfolder) else: unfolder.add_values(key, value) unfolder.mark_leaf() def find_parent_nodes(self, nested, node, last_node=""): for key, value in nested.items(): if key == node: self._tree_leafs.append(last_node) return else: last_node = key if isinstance(value, abc.Mapping): self.find_parent_nodes(value, node, last_node) else: return def find_parent_node(self, leaf_names): if not isinstance(leaf_names, list): leaf_names = [leaf_names] for ln in leaf_names: try: pos = self._categories.index(ln) - 1 candidate = self._categories[pos] if candidate not in leaf_names: return candidate except: pass return None def add_category(self, name): self._categories.append(name) def add_values(self, name, values): self._values[name] = values def mark_leaf(self): if len(self._categories) > 0: if not self._categories[-1] in self._tree_leafs: self._tree_leafs.append(self._categories[-1]) def permutate_values(self): pset = list(self._values.values()) pset = list(itertools.product(*pset)) permutations = [] okeys = list(self._values.keys()) for ps in pset: permutations.append({}) for i in range(len(okeys)): permutations[-1][okeys[i]] = ps[i] return permutations def add_categories(self, values_permutated): while True: parent = self.find_parent_node(self._tree_leafs) if parent is None: return result = [] for tl in self._tree_leafs: for elem in values_permutated: new = copy.deepcopy(elem) new[parent] = tl result.append(new) while tl in self._categories: self._categories.remove(tl) while parent in self._categories: self._categories.remove(parent) self._tree_leafs = [] self.find_parent_nodes(self._nested_dict, parent) if len(self._tree_leafs) == 1 and self._tree_leafs[0] == "": break values_permutated = copy.deepcopy(result) return result def unfold(self): values_permutated = self.permutate_values() if len(self._categories) > 0: return self.add_categories(values_permutated) return values_permutated diff --git a/hyppopy/projectmanager.py b/hyppopy/projectmanager.py index af987b9..6f135c7 100644 --- a/hyppopy/projectmanager.py +++ b/hyppopy/projectmanager.py @@ -1,150 +1,150 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) from hyppopy.singleton import * from hyppopy.deepdict import DeepDict from hyppopy.globals import SETTINGSCUSTOMPATH, SETTINGSSOLVERPATH import os import logging import datetime from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) @singleton_object class ProjectManager(metaclass=Singleton): def __init__(self): self.configfilename = None self.config = None self._extmembers = [] self._identifier = None def clear(self): self.configfilename = None self.config = None self.remove_externals() def is_ready(self): return self.config is not None def remove_externals(self): for added in self._extmembers: if added in self.__dict__.keys(): del self.__dict__[added] self._extmembers = [] def get_hyperparameter(self): return self.config["hyperparameter"] def test_config(self): if not isinstance(self.config, DeepDict): msg = "test_config failed, config is not of type DeepDict" LOG.error(msg) raise IOError(msg) sections = ["hyperparameter"] sections += [SETTINGSSOLVERPATH.split("/")[-1]] sections += [SETTINGSCUSTOMPATH.split("/")[-1]] sections_available = [True, True, True] for n, sec in enumerate(sections): if not self.config.has_section(sec): msg = "WARNING: config has no section {}".format(sec) LOG.warning(msg) sections_available[n] = False return sections_available def set_config(self, config): self.clear() if isinstance(config, dict): self.config = DeepDict() self.config.data = config elif isinstance(config, DeepDict): self.config = config else: msg = "unknown type ({}) for config passed, expected dict or DeepDict".format(type(config)) LOG.error(msg) raise IOError(msg) sections_available = self.test_config() if not sections_available[0]: msg = "Missing section {}".format("hyperparameter") LOG.error(msg) raise LookupError(msg) if not sections_available[1]: msg = "Missing section {}".format(SETTINGSSOLVERPATH) LOG.error(msg) raise LookupError(msg) else: try: - self._extmembers += self.config.transfer_attrs(self, SETTINGSCUSTOMPATH.split("/")[-1]) + self._extmembers += self.config.transfer_attrs(self, SETTINGSSOLVERPATH.split("/")[-1]) except Exception as e: msg = "transfering custom section as class attributes failed, " \ "is the config path to your custom section correct? {}. Exception {}".format(SETTINGSCUSTOMPATH, e) LOG.error(msg) raise LookupError(msg) if sections_available[2]: try: - self._extmembers += self.config.transfer_attrs(self, SETTINGSSOLVERPATH.split("/")[-1]) + self._extmembers += self.config.transfer_attrs(self, SETTINGSCUSTOMPATH.split("/")[-1]) except Exception as e: msg = "transfering custom section as class attributes failed, " \ "is the config path to your custom section correct? {}. Exception {}".format(SETTINGSCUSTOMPATH, e) LOG.error(msg) raise LookupError(msg) return True def read_config(self, configfile): self.clear() self.configfilename = configfile self.config = DeepDict(configfile) sections_available = self.test_config() if not sections_available[0]: msg = "Missing section {}".format("hyperparameter") LOG.error(msg) raise LookupError(msg) if not sections_available[1]: msg = "Missing section {}".format(SETTINGSSOLVERPATH) LOG.error(msg) raise LookupError(msg) else: try: self._extmembers += self.config.transfer_attrs(self, SETTINGSSOLVERPATH.split("/")[-1]) except Exception as e: msg = "transfering custom section as class attributes failed, " \ "is the config path to your custom section correct? {}. Exception {}".format(SETTINGSSOLVERPATH, e) LOG.error(msg) raise LookupError(msg) if sections_available[2]: try: self._extmembers += self.config.transfer_attrs(self, SETTINGSCUSTOMPATH.split("/")[-1]) except Exception as e: msg = "transfering custom section as class attributes failed, " \ "is the config path to your custom section correct? {}. Exception {}".format(SETTINGSCUSTOMPATH, e) LOG.error(msg) raise LookupError(msg) return True def identifier(self, force=False): if self._identifier is None or force: self._identifier = datetime.datetime.now().strftime("%Y.%m.%d.%H.%M.%S") return self._identifier def register_member(self, name, value): setattr(name, value) diff --git a/hyppopy/resultviewer.py b/hyppopy/resultviewer.py index 3a98034..1033328 100644 --- a/hyppopy/resultviewer.py +++ b/hyppopy/resultviewer.py @@ -1,175 +1,178 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) sns.set(style="darkgrid") class ResultViewer(object): def __init__(self, fname=None, save_only=False): self.close_all() self.df = None self.has_duration = False self.hyperparameter = None self.save_only = save_only self.path = None self.appendix = None if fname is not None: self.read(fname) def close_all(self): plt.close('all') def read(self, fname): self.path = os.path.dirname(fname) split = os.path.basename(fname).split("_") self.appendix = split[-1] self.appendix = self.appendix[:-4] self.df = pd.read_csv(fname, index_col=0) const_data = ["duration", "losses"] hyperparameter_columns = [item for item in self.df.columns if item not in const_data] self.hyperparameter = pd.DataFrame() for key in hyperparameter_columns: self.hyperparameter[key] = self.df[key] self.has_duration = "duration" in self.df.columns def plot_XYGrid(self, df, x, y, name="", save=None, show=True): argmin = df["losses"].idxmin() grid = [len(x), len(y)] if grid[0] == 1 and grid[1] == 1: fig = plt.figure(figsize=(10.0, 8)) plt.plot(df[x[0]].values, df[y[0]].values, '.') plt.plot(df[x[0]].values[argmin], df[y[0]].values[argmin], 'ro') plt.grid(True) plt.ylabel(y[0]) plt.xlabel(x[0]) plt.title(name, fontsize=16) else: if grid[0] > 1 and grid[1] == 1: fig, axs = plt.subplots(ncols=grid[0], figsize=(10.0, grid[1] * 3.5)) elif grid[0] == 1 and grid[1] > 1: fig, axs = plt.subplots(nrows=grid[1], figsize=(10.0, grid[1] * 3.5)) else: fig, axs = plt.subplots(nrows=grid[1], ncols=grid[0], figsize=(10.0, grid[1] * 3.5)) fig.subplots_adjust(left=0.08, right=0.98, wspace=0.3) for nx, _x in enumerate(x): for ny, _y in enumerate(y): if grid[0] > 1 and grid[1] == 1: ax = axs[nx] elif grid[0] == 1 and grid[1] > 1: ax = axs[ny] else: ax = axs[ny, nx] ax.plot(df[_x].values, df[_y].values, '.') ax.plot(df[_x].values[argmin], df[_y].values[argmin], 'ro') ax.grid(True) if nx == 0: ax.set_ylabel(_y) if ny == len(y)-1: ax.set_xlabel(_x) fig.suptitle(name, fontsize=16) if save is not None: if not os.path.isdir(os.path.dirname(save)): os.makedirs(os.path.dirname(save)) plt.savefig(save) if show: plt.show() def plot_performance_and_feature_grids(self, save=True): x_axis = [] if 'losses' in self.df.columns: x_axis.append('losses') if 'iterations' in self.df.columns: x_axis.append('iterations') y_axis_performance = [] if 'accuracy' in self.df.columns: y_axis_performance.append('accuracy') if 'duration' in self.df.columns: y_axis_performance.append('duration') features = [] for cit in self.df.columns: if cit not in x_axis and cit not in y_axis_performance: features.append(cit) save_name = None if save: save_name = os.path.join(self.path, "performance" + self.appendix + ".png") self.plot_XYGrid(self.df, x=x_axis, y=y_axis_performance, name="Performance", save=save_name, show=not self.save_only) chunks = [features[x:x + 3] for x in range(0, len(features), 3)] for n, chunk in enumerate(chunks): save_name = None if save: save_name = os.path.join(self.path, "features_{}_".format(str(n).zfill(3)) + self.appendix + ".png") self.plot_XYGrid(self.df, x=x_axis, y=chunk, name="Feature set {}".format(n+1), save=save_name, show=not self.save_only) def plot_feature_matrix(self, save=True): sns_plot = sns.pairplot(self.df, height=1.8, aspect=1.8, plot_kws=dict(edgecolor="k", linewidth=0.5), diag_kind="kde", diag_kws=dict(shade=True)) fig = sns_plot.fig fig.subplots_adjust(top=0.93, wspace=0.3) t = fig.suptitle('Pairwise Plots', fontsize=14) if not self.save_only: plt.show() if save: save_name = os.path.join(self.path, "matrixview_"+self.appendix+".png") try: sns_plot.savefig(save_name) except Exception as e: msg = "failed to save file {}, reason {}".format(save_name, e) LOG.error(msg) raise IOError(msg) def plot_duration(self, save=True): - if "duration" in self.df.columns: - sns_plot = sns.jointplot(y="duration", x="losses", data=self.df, kind="kde") - if not self.save_only: - plt.show() - if save: - save_name = os.path.join(self.path, "t_vs_loss_" + self.appendix + ".png") - try: - sns_plot.savefig(save_name) - except Exception as e: - msg = "failed to save file {}, reason {}".format(save_name, e) - LOG.error(msg) - raise IOError(msg) + try: + if "duration" in self.df.columns: + sns_plot = sns.jointplot(y="duration", x="losses", data=self.df, kind="kde") + if not self.save_only: + plt.show() + if save: + save_name = os.path.join(self.path, "t_vs_loss_" + self.appendix + ".png") + try: + sns_plot.savefig(save_name) + except Exception as e: + msg = "failed to save file {}, reason {}".format(save_name, e) + LOG.error(msg) + raise IOError(msg) + except Exception as e: + print(e) def show(self, save=True): self.plot_duration(save) self.plot_feature_matrix(save) self.plot_performance_and_feature_grids(save) diff --git a/hyppopy/solver.py b/hyppopy/solver.py index 47d3762..73fba64 100644 --- a/hyppopy/solver.py +++ b/hyppopy/solver.py @@ -1,125 +1,127 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) from hyppopy.projectmanager import ProjectManager -from hyppopy.resultviewer import ResultViewer +#from hyppopy.resultviewer import ResultViewer import os import logging import pandas as pd from hyppopy.globals import LIBNAME from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class Solver(object): _name = None _solver_plugin = None _settings_plugin = None def __init__(self): pass def set_data(self, data): self._solver_plugin.set_data(data) def set_hyperparameters(self, params): self.settings_plugin.set_hyperparameter(params) def set_loss_function(self, func): self._solver_plugin.set_blackbox_function(func) def run(self): if not ProjectManager.is_ready(): LOG.error("No config data found to initialize PluginSetting object") raise IOError("No config data found to initialize PluginSetting object") self.settings_plugin.set_hyperparameter(ProjectManager.get_hyperparameter()) self._solver_plugin.settings = self.settings_plugin self._solver_plugin.run() - def save_results(self, savedir=None, savename=None, show=False): + def save_results(self, savedir=None, savename=None, overwrite=True):#, show=False): df, best = self.get_results() dir = None if savename is None: savename = LIBNAME if savedir is None: if 'output_dir' in ProjectManager.__dict__.keys(): if not os.path.isdir(ProjectManager.output_dir): os.mkdir(ProjectManager.output_dir) dir = ProjectManager.output_dir else: print("WARNING: No solver option output_dir found, cannot save results!") LOG.warning("WARNING: No solver option output_dir found, cannot save results!") else: dir = savedir if not os.path.isdir(savedir): os.mkdir(savedir) - appendix = ProjectManager.identifier(True) - name = savename + "_all_" + appendix + ".csv" + appendix = "" + if not overwrite: + appendix = "_" + ProjectManager.identifier(True) + name = savename + "_all" + appendix + ".csv" fname_all = os.path.join(dir, name) df.to_csv(fname_all) - name = savename + "_best_" + appendix + ".txt" + name = savename + "_best" + appendix + ".txt" fname_best = os.path.join(dir, name) with open(fname_best, "w") as text_file: for item in best.items(): text_file.write("{}\t:\t{}\n".format(item[0], item[1])) - if show: - viewer = ResultViewer(fname_all) - viewer.show() - else: - viewer = ResultViewer(fname_all, save_only=True) - viewer.show() + # if show: + # viewer = ResultViewer(fname_all) + # viewer.show() + # else: + # viewer = ResultViewer(fname_all, save_only=True) + # viewer.show() def get_results(self): results, best = self._solver_plugin.get_results() df = pd.DataFrame.from_dict(results) return df, best @property def is_ready(self): return self._solver_plugin is not None and self.settings_plugin is not None @property def solver_plugin(self): return self._solver_plugin @solver_plugin.setter def solver_plugin(self, value): self._solver_plugin = value @property def settings_plugin(self): return self._settings_plugin @settings_plugin.setter def settings_plugin(self, value): self._settings_plugin = value @property def name(self): return self._name @name.setter def name(self, value): if not isinstance(value, str): msg = "Invalid input, str type expected for value, got {} instead".format(type(value)) LOG.error(msg) raise IOError(msg) self._name = value diff --git a/hyppopy/virtualfunction.py b/hyppopy/virtualfunction.py index 0842503..ce938d0 100644 --- a/hyppopy/virtualfunction.py +++ b/hyppopy/virtualfunction.py @@ -1,175 +1,190 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + import os import sys import numpy as np import configparser from glob import glob import matplotlib.pyplot as plt import matplotlib.image as mpimg class VirtualFunction(object): def __init__(self): self.config = None self.data = None self.axis = [] def __call__(self, *args): assert len(args) == self.dims(), "wrong number of arguments!" for i in range(len(args)): assert self.axis[i][0] <= args[i] <= self.axis[i][1], "out of range access on axis {}!".format(i) lpos, rpos, fracs = self.pos_to_indices(args) fl = self.data[(list(range(self.dims())), lpos)] fr = self.data[(list(range(self.dims())), rpos)] return fl*np.array(fracs) + fr*(1-np.array(fracs)) def clear(self): self.axis.clear() self.data = None self.config = None def dims(self): return self.data.shape[0] def size(self): return self.data.shape[1] def minima(self): glob_mins = [] for dim in range(self.dims()): x = [] fmin = np.min(self.data[dim, :]) for _x in range(self.size()): if self.data[dim, _x] <= fmin: x.append(_x/self.size()*(self.axis[dim][1]-self.axis[dim][0])+self.axis[dim][0]) glob_mins.append([x, fmin]) return glob_mins def pos_to_indices(self, positions): lpos = [] rpos = [] pfracs = [] for n in range(self.dims()): pos = positions[n] pos -= self.axis[n][0] pos /= np.abs(self.axis[n][1]-self.axis[n][0]) pos *= self.data.shape[1]-1 lp = int(np.floor(pos)) if lp < 0: lp = 0 rp = int(np.ceil(pos)) if rp > self.data.shape[1]-1: rp = self.data.shape[1]-1 pfracs.append(1.0-(pos-np.floor(pos))) lpos.append(lp) rpos.append(rp) return lpos, rpos, pfracs def plot(self, dim=None, title=""): if dim is None: dim = list(range(self.dims())) else: dim = [dim] fig = plt.figure(figsize=(10, 8)) for i in range(len(dim)): width = np.abs(self.axis[dim[i]][1]-self.axis[dim[i]][0]) ax = np.arange(self.axis[dim[i]][0], self.axis[dim[i]][1], width/self.size()) plt.plot(ax, self.data[dim[i], :], '.', label='axis_{}'.format(str(dim[i]).zfill(2))) plt.legend() plt.grid() plt.title(title) plt.show() def add_dimension(self, data, x_range): if self.data is None: self.data = data if len(self.data.shape) == 1: self.data = self.data.reshape((1, self.data.shape[0])) else: if len(data.shape) == 1: data = data.reshape((1, data.shape[0])) assert self.data.shape[1] == data.shape[1], "shape mismatch while adding dimension!" dims = self.data.shape[0] size = self.data.shape[1] tmp = np.append(self.data, data) self.data = tmp.reshape((dims+1, size)) self.axis.append(x_range) def load_images(self, path): self.config = None self.data = None self.axis.clear() img_fnames = [] for f in glob(path + os.sep + "*"): if f.endswith(".png"): img_fnames.append(f) elif f.endswith(".cfg"): self.config = self.read_config(f) else: print("WARNING: files of type {} not supported, the file {} is ignored!".format(f.split(".")[-1], os.path.basename(f))) if self.config is None: print("Aborted, failed to read configfile!") sys.exit() sections = self.config.sections() if len(sections) != len(img_fnames): print("Aborted, inconsistent number of image tmplates and axis specifications!") sys.exit() img_fnames.sort() size_x = None size_y = None for n, fname in enumerate(img_fnames): img = mpimg.imread(fname) if len(img.shape) > 2: img = img[:, :, 0] if size_x is None: size_x = img.shape[1] if size_y is None: size_y = img.shape[0] self.data = np.zeros((len(img_fnames), size_x), dtype=np.float32) assert img.shape[0] == size_y, "Shape mismatch in dimension y {} is not {}".format(img.shape[0], size_y) assert img.shape[1] == size_x, "Shape mismatch in dimension x {} is not {}".format(img.shape[1], size_x) self.sample_image(img, n) def sample_image(self, img, dim): sec_name = "axis_{}".format(str(dim).zfill(2)) assert sec_name in self.config.sections(), "config section {} not found!".format(sec_name) settings = self.get_axis_settings(sec_name) self.axis.append([float(settings['min_x']), float(settings['max_x'])]) y_range = [float(settings['min_y']), float(settings['max_y'])] for x in range(img.shape[1]): candidates = np.where(img[:, x] > 0) assert len(candidates[0]) > 0, "non function value in image detected, ensure each column has at least one value > 0!" y_pos = candidates[0][0]/img.shape[0] self.data[dim, x] = 1-y_pos self.data[dim, :] *= np.abs(y_range[1] - y_range[0]) self.data[dim, :] += y_range[0] def read_config(self, fname): try: config = configparser.ConfigParser() config.read(fname) return config except Exception as e: print(e) return None def get_axis_settings(self, section): dict1 = {} options = self.config.options(section) for option in options: try: dict1[option] = self.config.get(section, option) if dict1[option] == -1: print("skip: %s" % option) except: print("exception on %s!" % option) dict1[option] = None return dict1 diff --git a/hyppopy/virtualparameterspace/3D/axis.cfg b/hyppopy/virtualparameterspace/3D/axis.cfg new file mode 100644 index 0000000..e6ba93f --- /dev/null +++ b/hyppopy/virtualparameterspace/3D/axis.cfg @@ -0,0 +1,17 @@ +[axis_00] +min_x:0 +max_x:800 +min_y:-110 +max_y:20 + +[axis_01] +min_x:-1 +max_x:1 +min_y:-1 +max_y:1 + +[axis_02] +min_x:0 +max_x:10 +min_y:2 +max_y:5 \ No newline at end of file diff --git a/hyppopy/virtualparameterspace/axis_02.png b/hyppopy/virtualparameterspace/3D/axis_00.png similarity index 100% copy from hyppopy/virtualparameterspace/axis_02.png copy to hyppopy/virtualparameterspace/3D/axis_00.png diff --git a/hyppopy/virtualparameterspace/axis_03.png b/hyppopy/virtualparameterspace/3D/axis_01.png similarity index 100% copy from hyppopy/virtualparameterspace/axis_03.png copy to hyppopy/virtualparameterspace/3D/axis_01.png diff --git a/hyppopy/virtualparameterspace/axis_07.png b/hyppopy/virtualparameterspace/3D/axis_02.png similarity index 100% copy from hyppopy/virtualparameterspace/axis_07.png copy to hyppopy/virtualparameterspace/3D/axis_02.png diff --git a/hyppopy/virtualparameterspace/axis.cfg b/hyppopy/virtualparameterspace/6D/axis.cfg similarity index 61% rename from hyppopy/virtualparameterspace/axis.cfg rename to hyppopy/virtualparameterspace/6D/axis.cfg index c997e14..260d9d8 100644 --- a/hyppopy/virtualparameterspace/axis.cfg +++ b/hyppopy/virtualparameterspace/6D/axis.cfg @@ -1,47 +1,35 @@ [axis_00] min_x:0 max_x:1 min_y:-1 max_y:1 [axis_01] min_x:0 -max_x:8000 -min_y:-310 +max_x:800 +min_y:-110 max_y:20 [axis_02] -min_x:-20 -max_x:20 -min_y:0 -max_y:20 +min_x:-1 +max_x:1 +min_y:-1 +max_y:1 [axis_03] min_x:0 max_x:5 min_y:0 max_y:2 [axis_04] min_x:0 -max_x:10 -min_y:2 -max_y:4 - -[axis_05] -min_x:0 max_x:10000 min_y:0 max_y:1 -[axis_06] -min_x:-20000 -max_x:0 -min_y:0 -max_y:9000 - -[axis_07] -min_x:-1 -max_x:1 -min_y:0 -max_y:1 \ No newline at end of file +[axis_05] +min_x:0 +max_x:10 +min_y:2 +max_y:5 \ No newline at end of file diff --git a/hyppopy/virtualparameterspace/axis_00.png b/hyppopy/virtualparameterspace/6D/axis_00.png similarity index 100% rename from hyppopy/virtualparameterspace/axis_00.png rename to hyppopy/virtualparameterspace/6D/axis_00.png diff --git a/hyppopy/virtualparameterspace/axis_02.png b/hyppopy/virtualparameterspace/6D/axis_01.png similarity index 100% rename from hyppopy/virtualparameterspace/axis_02.png rename to hyppopy/virtualparameterspace/6D/axis_01.png diff --git a/hyppopy/virtualparameterspace/axis_03.png b/hyppopy/virtualparameterspace/6D/axis_02.png similarity index 100% rename from hyppopy/virtualparameterspace/axis_03.png rename to hyppopy/virtualparameterspace/6D/axis_02.png diff --git a/hyppopy/virtualparameterspace/axis_04.png b/hyppopy/virtualparameterspace/6D/axis_03.png similarity index 100% rename from hyppopy/virtualparameterspace/axis_04.png rename to hyppopy/virtualparameterspace/6D/axis_03.png diff --git a/hyppopy/virtualparameterspace/axis_05.png b/hyppopy/virtualparameterspace/6D/axis_04.png similarity index 100% rename from hyppopy/virtualparameterspace/axis_05.png rename to hyppopy/virtualparameterspace/6D/axis_04.png diff --git a/hyppopy/virtualparameterspace/axis_07.png b/hyppopy/virtualparameterspace/6D/axis_05.png similarity index 100% rename from hyppopy/virtualparameterspace/axis_07.png rename to hyppopy/virtualparameterspace/6D/axis_05.png diff --git a/hyppopy/virtualparameterspace/axis_01.png b/hyppopy/virtualparameterspace/axis_01.png deleted file mode 100644 index 86de66b..0000000 Binary files a/hyppopy/virtualparameterspace/axis_01.png and /dev/null differ diff --git a/hyppopy/virtualparameterspace/axis_06.png b/hyppopy/virtualparameterspace/axis_06.png deleted file mode 100644 index 4910a3c..0000000 Binary files a/hyppopy/virtualparameterspace/axis_06.png and /dev/null differ