diff --git a/hyppopy/globals.py b/hyppopy/globals.py index 99491ce..a00d280 100644 --- a/hyppopy/globals.py +++ b/hyppopy/globals.py @@ -1,31 +1,33 @@ # DKFZ # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. import os import sys import logging ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, ROOT) LIBNAME = "hyppopy" PLUGIN_DEFAULT_DIR = os.path.join(ROOT, *(LIBNAME, "plugins")) TESTDATA_DIR = os.path.join(ROOT, *(LIBNAME, "tests", "data")) SETTINGSSOLVERPATH = "settings/solver_plugin" SETTINGSCUSTOMPATH = "settings/custom" DEEPDICT_XML_ROOT = LIBNAME +RANDOMSAMPLES = 10000 +DEFAULTITERATIONS = 500 LOGFILENAME = os.path.join(ROOT, '{}_log.log'.format(LIBNAME)) DEBUGLEVEL = logging.DEBUG logging.basicConfig(filename=LOGFILENAME, filemode='w', format='%(levelname)s: %(name)s - %(message)s') diff --git a/hyppopy/helpers.py b/hyppopy/helpers.py index 3fc9cf0..83cbdff 100644 --- a/hyppopy/helpers.py +++ b/hyppopy/helpers.py @@ -1,99 +1,212 @@ import copy +import time import itertools +import numpy as np +from numpy import argmin, argmax, unique from collections import OrderedDict, abc +def gaussian(x, mu, sigma): + return 1.0/(sigma * np.sqrt(2*np.pi))*np.exp(-(x-mu)**2/(2*sigma**2)) + + +def gaussian_axis_sampling(a, b, N): + center = a + (b - a) / 2.0 + delta = (b - a) / N + bn = b - center + xn = np.arange(0, bn, delta) + dn = [] + for x in xn: + dn.append(1/gaussian(x, 0, bn/2.5)) + dn = np.array(dn) + dn /= np.sum(dn) + dn *= bn + + axis = [0] + for x in dn: + axis.append(x+axis[-1]) + axis.insert(0, -axis[-1]) + axis = np.array(axis) + axis += center + return axis + + +def log_axis_sampling(a, b, N): + delta = (b - a) / N + logrange = np.arange(a, b + delta, delta) + for n in range(logrange.shape[0]): + logrange[n] = np.exp(logrange[n]) + return logrange + + +def sample_domain(start, stop, count, ftype="uniform"): + assert stop > start, "Precondition Violation, stop <= start not allowed!" + assert count > 0, "Precondition Violation, N <= 0 not allowed!" + if ftype == 'uniform': + delta = (stop - start)/count + return np.arange(start, stop + delta, delta) + elif ftype == 'loguniform': + return log_axis_sampling(start, stop, count) + elif ftype == 'normal': + return gaussian_axis_sampling(start, stop, count) + raise IOError("Precondition Violation, unknown sampling function type!") + +class Trials(object): + + def __init__(self): + self.loss = [] + self.duration = [] + self.status = [] + self.parameter = [] + self.best = None + self._tick = None + + def start_iteration(self): + self._tick = time.process_time() + + def stop_iteration(self): + if self._tick is None: + return + self.duration.append(time.process_time()-self._tick) + self._tick = None + + def set_status(self, status=True): + self.status.append(status) + + def set_parameter(self, params): + self.parameter.append(params) + + def set_loss(self, value): + self.loss.append(value) + + def get(self): + if len(self.loss) <= 0: + raise Exception("Empty solver results!") + if len(self.loss) != len(self.duration) or len(self.loss) != len(self.parameter) or len(self.loss) != len(self.status): + raise Exception("Inconsistent results in gridsearch solver!") + best_index = argmin(self.loss) + best = self.parameter[best_index] + worst_loss = self.loss[argmax(self.loss)] + for n in range(len(self.status)): + if not self.status[n]: + self.loss[n] = worst_loss + + res = { + 'losses': self.loss, + 'duration': self.duration + } + is_string = [] + for key, value in self.parameter[0].items(): + res[key] = [] + if isinstance(value, str): + is_string.append(key) + + for p in self.parameter: + for key, value in p.items(): + res[key].append(value) + + for key in is_string: + uniques = unique(res[key]) + lookup = {} + for n, p in enumerate(uniques): + lookup[p] = n + for n in range(len(res[key])): + res[key][n] = lookup[res[key][n]] + + return res, best + + class NestedDictUnfolder(object): def __init__(self, nested_dict): self._nested_dict = nested_dict self._categories = [] self._values = OrderedDict() self._tree_leafs = [] NestedDictUnfolder.nested_dict_iter(self._nested_dict, self) @staticmethod def nested_dict_iter(nested, unfolder): for key, value in nested.items(): if isinstance(value, abc.Mapping): unfolder.add_category(key) NestedDictUnfolder.nested_dict_iter(value, unfolder) else: unfolder.add_values(key, value) unfolder.mark_leaf() def find_parent_nodes(self, nested, node, last_node=""): for key, value in nested.items(): if key == node: self._tree_leafs.append(last_node) return else: last_node = key if isinstance(value, abc.Mapping): self.find_parent_nodes(value, node, last_node) else: return def find_parent_node(self, leaf_names): if not isinstance(leaf_names, list): leaf_names = [leaf_names] for ln in leaf_names: try: pos = self._categories.index(ln) - 1 candidate = self._categories[pos] if candidate not in leaf_names: return candidate except: pass return None def add_category(self, name): self._categories.append(name) def add_values(self, name, values): self._values[name] = values def mark_leaf(self): if len(self._categories) > 0: if not self._categories[-1] in self._tree_leafs: self._tree_leafs.append(self._categories[-1]) def permutate_values(self): pset = list(self._values.values()) pset = list(itertools.product(*pset)) permutations = [] okeys = list(self._values.keys()) for ps in pset: permutations.append({}) for i in range(len(okeys)): permutations[-1][okeys[i]] = ps[i] return permutations def add_categories(self, values_permutated): while True: parent = self.find_parent_node(self._tree_leafs) if parent is None: return result = [] for tl in self._tree_leafs: for elem in values_permutated: new = copy.deepcopy(elem) new[parent] = tl result.append(new) while tl in self._categories: self._categories.remove(tl) while parent in self._categories: self._categories.remove(parent) self._tree_leafs = [] self.find_parent_nodes(self._nested_dict, parent) if len(self._tree_leafs) == 1 and self._tree_leafs[0] == "": break values_permutated = copy.deepcopy(result) return result def unfold(self): values_permutated = self.permutate_values() if len(self._categories) > 0: return self.add_categories(values_permutated) return values_permutated diff --git a/hyppopy/plugins/gridsearch_settings_plugin.py b/hyppopy/plugins/gridsearch_settings_plugin.py index 94e51e2..846c2c9 100644 --- a/hyppopy/plugins/gridsearch_settings_plugin.py +++ b/hyppopy/plugins/gridsearch_settings_plugin.py @@ -1,146 +1,101 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging import numpy as np from pprint import pformat from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from yapsy.IPlugin import IPlugin +from hyppopy.helpers import sample_domain from hyppopy.settingspluginbase import SettingsPluginBase from hyppopy.settingsparticle import split_categorical from hyppopy.settingsparticle import SettingsParticle -def gaussian(x, mu, sigma): - return 1.0/(sigma * np.sqrt(2*np.pi))*np.exp(-(x-mu)**2/(2*sigma**2)) - - -def gaussian_axis_sampling(a, b, N): - center = a + (b - a) / 2.0 - delta = (b - a) / N - bn = b - center - xn = np.arange(0, bn, delta) - dn = [] - for x in xn: - dn.append(1/gaussian(x, 0, bn/2.5)) - dn = np.array(dn) - dn /= np.sum(dn) - dn *= bn - - axis = [0] - for x in dn: - axis.append(x+axis[-1]) - axis.insert(0, -axis[-1]) - axis = np.array(axis) - axis += center - return axis - - -def log_axis_sampling(a, b, N): - delta = (b - a) / N - logrange = np.arange(a, b + delta, delta) - for n in range(logrange.shape[0]): - logrange[n] = np.exp(logrange[n]) - return logrange - - -def sample(start, stop, count, ftype="uniform"): - assert stop > start, "Precondition Violation, stop <= start not allowed!" - assert count > 0, "Precondition Violation, N <= 0 not allowed!" - if ftype == 'uniform': - delta = (stop - start)/count - return np.arange(start, stop + delta, delta) - elif ftype == 'loguniform': - return log_axis_sampling(start, stop, count) - elif ftype == 'normal': - return gaussian_axis_sampling(start, stop, count) - raise IOError("Precondition Violation, unknown sampling function type!") - - class gridsearch_Settings(SettingsPluginBase, IPlugin): def __init__(self): SettingsPluginBase.__init__(self) LOG.debug("initialized") def convert_parameter(self, input_dict): LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) solution_space = {} # split input in categorical and non-categorical data cat, uni = split_categorical(input_dict) # build up dictionary keeping all non-categorical data uniforms = {} for name, content in uni.items(): particle = gridsearch_SettingsParticle(name=name) for key, value in content.items(): if key == 'domain': particle.domain = value elif key == 'data': particle.data = value elif key == 'type': particle.dtype = value uniforms[name] = particle.get() # build nested categorical structure inner_level = uniforms for key, value in cat.items(): tmp = {} tmp2 = {} for key2, value2 in value.items(): if key2 == 'data': for elem in value2: tmp[elem] = inner_level tmp2[key] = tmp inner_level = tmp2 if len(cat) > 0: solution_space = tmp2 else: solution_space = inner_level return solution_space class gridsearch_SettingsParticle(SettingsParticle): def __init__(self, name=None, domain=None, dtype=None, data=None): SettingsParticle.__init__(self, name, domain, dtype, data) def convert(self): assert isinstance(self.data, list), "Precondition Violation, invalid input type for data!" if self.domain == "categorical": return self.data else: assert len(self.data) >= 2, "Precondition Violation, invalid input data!" if len(self.data) < 3: self.data.append(10) LOG.warning("Grid sampling has set number of samples automatically to 10!") print("WARNING: Grid sampling has set number of samples automatically to 10!") - samples = sample(start=self.data[0], stop=self.data[1], count=self.data[2], ftype=self.domain) + samples = sample_domain(start=self.data[0], stop=self.data[1], count=self.data[2], ftype=self.domain) if self.dtype == "int": data = [] for s in samples: val = int(np.round(s)) if len(data) > 0: if val == data[-1]: continue data.append(val) return data return list(samples) diff --git a/hyppopy/plugins/gridsearch_solver_plugin.py b/hyppopy/plugins/gridsearch_solver_plugin.py index 66e474d..83a1be0 100644 --- a/hyppopy/plugins/gridsearch_solver_plugin.py +++ b/hyppopy/plugins/gridsearch_solver_plugin.py @@ -1,140 +1,75 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os -import time import logging -from numpy import argmin, argmax, unique from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) from pprint import pformat from yapsy.IPlugin import IPlugin +from hyppopy.helpers import Trials from hyppopy.helpers import NestedDictUnfolder from hyppopy.solverpluginbase import SolverPluginBase -class Trials(object): - - def __init__(self): - self.loss = [] - self.duration = [] - self.status = [] - self.parameter = [] - self.best = None - self._tick = None - - def start_iteration(self): - self._tick = time.process_time() - - def stop_iteration(self): - if self._tick is None: - return - self.duration.append(time.process_time()-self._tick) - self._tick = None - - def set_status(self, status=True): - self.status.append(status) - - def set_parameter(self, params): - self.parameter.append(params) - - def set_loss(self, value): - self.loss.append(value) - - def get(self): - if len(self.loss) <= 0: - raise Exception("Empty solver results!") - if len(self.loss) != len(self.duration) or len(self.loss) != len(self.parameter) or len(self.loss) != len(self.status): - raise Exception("Inconsistent results in gridsearch solver!") - best_index = argmin(self.loss) - best = self.parameter[best_index] - worst_loss = self.loss[argmax(self.loss)] - for n in range(len(self.status)): - if not self.status[n]: - self.loss[n] = worst_loss - - res = { - 'losses': self.loss, - 'duration': self.duration - } - is_string = [] - for key, value in self.parameter[0].items(): - res[key] = [] - if isinstance(value, str): - is_string.append(key) - - for p in self.parameter: - for key, value in p.items(): - res[key].append(value) - - for key in is_string: - uniques = unique(res[key]) - lookup = {} - for n, p in enumerate(uniques): - lookup[p] = n - for n in range(len(res[key])): - res[key][n] = lookup[res[key][n]] - - return res, best - - class gridsearch_Solver(SolverPluginBase, IPlugin): trials = None best = None def __init__(self): SolverPluginBase.__init__(self) LOG.debug("initialized") def blackbox_function(self, params): loss = None self.trials.set_parameter(params) try: self.trials.start_iteration() loss = self.blackbox_function_template(self.data, params) self.trials.stop_iteration() if loss is None: self.trials.set_status(False) except Exception as e: LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e)) self.trials.set_status(False) self.trials.stop_iteration() self.trials.set_status(True) self.trials.set_loss(loss) return def execute_solver(self, parameter): LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter))) self.trials = Trials() unfolder = NestedDictUnfolder(parameter) parameter_set = unfolder.unfold() N = len(parameter_set) print("") try: for n, params in enumerate(parameter_set): self.blackbox_function(params) print("\r{}% done".format(int(round(100.0/N*n))), end="") except Exception as e: msg = "internal error in gridsearch execute_solver occured. {}".format(e) LOG.error(msg) raise BrokenPipeError(msg) + print("\r{}% done".format(100), end="") print("") def convert_results(self): return self.trials.get() diff --git a/hyppopy/plugins/randomsearch_settings_plugin.py b/hyppopy/plugins/randomsearch_settings_plugin.py new file mode 100644 index 0000000..75c5350 --- /dev/null +++ b/hyppopy/plugins/randomsearch_settings_plugin.py @@ -0,0 +1,95 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import random +import logging +import numpy as np +from pprint import pformat +from hyppopy.globals import DEBUGLEVEL +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + +from yapsy.IPlugin import IPlugin + +from hyppopy.helpers import sample_domain +from hyppopy.projectmanager import ProjectManager +from hyppopy.settingsparticle import SettingsParticle +from hyppopy.settingspluginbase import SettingsPluginBase +from hyppopy.globals import RANDOMSAMPLES, DEFAULTITERATIONS + + +class randomsearch_Settings(SettingsPluginBase, IPlugin): + + def __init__(self): + SettingsPluginBase.__init__(self) + LOG.debug("initialized") + + def convert_parameter(self, input_dict): + LOG.debug("convert input parameter\n\n\t{}\n".format(pformat(input_dict))) + + solution_space = {} + for name, content in input_dict.items(): + particle = randomsearch_SettingsParticle(name=name) + for key, value in content.items(): + if key == 'domain': + particle.domain = value + elif key == 'data': + particle.data = value + elif key == 'type': + particle.dtype = value + solution_space[name] = particle.get() + return solution_space + + +class randomsearch_SettingsParticle(SettingsParticle): + + def __init__(self, name=None, domain=None, dtype=None, data=None): + SettingsParticle.__init__(self, name, domain, dtype, data) + + def convert(self): + assert isinstance(self.data, list), "Precondition Violation, invalid input type for data!" + N = DEFAULTITERATIONS + if "max_iterations" in ProjectManager.__dict__.keys(): + N = ProjectManager.max_iterations + else: + setattr(ProjectManager, 'max_iterations', N) + ProjectManager.max_iterations + msg = "No max_iterrations set, set it to default [{}]".format(DEFAULTITERATIONS) + LOG.warning(msg) + print("WARNING: {}".format(msg)) + + if self.domain == "categorical": + samples = [] + for n in range(N): + samples.append(random.sample(self.data, 1)[0]) + return samples + else: + assert len(self.data) >= 2, "Precondition Violation, invalid input data!" + + full_range = list(sample_domain(start=self.data[0], stop=self.data[1], count=RANDOMSAMPLES, ftype=self.domain)) + if self.dtype == "int": + data = [] + for s in full_range: + val = int(np.round(s)) + if len(data) > 0: + if val == data[-1]: + continue + data.append(val) + full_range = data + samples = [] + for n in range(N): + samples.append(random.sample(full_range, 1)[0]) + return samples diff --git a/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin b/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin new file mode 100644 index 0000000..27d25fd --- /dev/null +++ b/hyppopy/plugins/randomsearch_settings_plugin.yapsy-plugin @@ -0,0 +1,9 @@ +[Core] +Name = randomsearch +Module = randomsearch_settings_plugin + +[Documentation] +Author = Sven Wanner +Version = 0.1 +Website = https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html +Description = RandomSearch Settings Plugin \ No newline at end of file diff --git a/hyppopy/plugins/randomsearch_solver_plugin.py b/hyppopy/plugins/randomsearch_solver_plugin.py new file mode 100644 index 0000000..ea8d579 --- /dev/null +++ b/hyppopy/plugins/randomsearch_solver_plugin.py @@ -0,0 +1,75 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import logging +from hyppopy.globals import DEBUGLEVEL +LOG = logging.getLogger(os.path.basename(__file__)) +LOG.setLevel(DEBUGLEVEL) + +from pprint import pformat +from yapsy.IPlugin import IPlugin + +from hyppopy.helpers import Trials +from hyppopy.projectmanager import ProjectManager +from hyppopy.solverpluginbase import SolverPluginBase + + +class randomsearch_Solver(SolverPluginBase, IPlugin): + trials = None + best = None + + def __init__(self): + SolverPluginBase.__init__(self) + LOG.debug("initialized") + + def blackbox_function(self, params): + loss = None + self.trials.set_parameter(params) + try: + self.trials.start_iteration() + loss = self.blackbox_function_template(self.data, params) + self.trials.stop_iteration() + if loss is None: + self.trials.set_status(False) + except Exception as e: + LOG.error("execution of self.loss(self.data, params) failed due to:\n {}".format(e)) + self.trials.set_status(False) + self.trials.stop_iteration() + self.trials.set_status(True) + self.trials.set_loss(loss) + return + + def execute_solver(self, parameter): + LOG.debug("execute_solver using solution space:\n\n\t{}\n".format(pformat(parameter))) + self.trials = Trials() + N = ProjectManager.max_iterations + print("") + try: + for n in range(N): + params = {} + for key, value in parameter.items(): + params[key] = value[n] + self.blackbox_function(params) + print("\r{}% done".format(int(round(100.0 / N * n))), end="") + except Exception as e: + msg = "internal error in randomsearch execute_solver occured. {}".format(e) + LOG.error(msg) + raise BrokenPipeError(msg) + print("\r{}% done".format(100), end="") + print("") + + def convert_results(self): + return self.trials.get() diff --git a/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin b/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin new file mode 100644 index 0000000..e465d93 --- /dev/null +++ b/hyppopy/plugins/randomsearch_solver_plugin.yapsy-plugin @@ -0,0 +1,9 @@ +[Core] +Name = randomsearch +Module = randomsearch_solver_plugin + +[Documentation] +Author = Sven Wanner +Version = 0.1 +Website = https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html +Description = RandomSearch Solver Plugin \ No newline at end of file diff --git a/hyppopy/resultviewer.py b/hyppopy/resultviewer.py index a718c52..c68a44d 100644 --- a/hyppopy/resultviewer.py +++ b/hyppopy/resultviewer.py @@ -1,174 +1,174 @@ # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import copy import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) sns.set(style="darkgrid") class ResultViewer(object): def __init__(self, fname=None, save_only=False): self.df = None self.has_duration = False self.hyperparameter = None self.save_only = save_only self.path = None self.appendix = None if fname is not None: self.read(fname) def close_all(self): plt.close('all') def read(self, fname): self.path = os.path.dirname(fname) split = os.path.basename(fname).split("_") self.appendix = split[-1] self.appendix = self.appendix[:-4] self.df = pd.read_csv(fname, index_col=0) const_data = ["duration", "losses"] hyperparameter_columns = [item for item in self.df.columns if item not in const_data] self.hyperparameter = pd.DataFrame() for key in hyperparameter_columns: self.hyperparameter[key] = self.df[key] self.has_duration = "duration" in self.df.columns def plot_XYGrid(self, df, x, y, name="", save=None, show=True): argmin = df["losses"].idxmin() grid = [len(x), len(y)] if grid[0] == 1 and grid[1] == 1: fig = plt.figure(figsize=(10.0, 8)) - plt.plot(df[x[0]].values, df[y[0]].values, 'o') + plt.plot(df[x[0]].values, df[y[0]].values, '.') plt.plot(df[x[0]].values[argmin], df[y[0]].values[argmin], 'ro') plt.grid(True) plt.ylabel(y[0]) plt.xlabel(x[0]) plt.title(name, fontsize=16) else: if grid[0] > 1 and grid[1] == 1: fig, axs = plt.subplots(ncols=grid[0], figsize=(10.0, grid[1] * 3.5)) elif grid[0] == 1 and grid[1] > 1: fig, axs = plt.subplots(nrows=grid[1], figsize=(10.0, grid[1] * 3.5)) else: fig, axs = plt.subplots(nrows=grid[1], ncols=grid[0], figsize=(10.0, grid[1] * 3.5)) fig.subplots_adjust(left=0.08, right=0.98, wspace=0.3) for nx, _x in enumerate(x): for ny, _y in enumerate(y): if grid[0] > 1 and grid[1] == 1: ax = axs[nx] elif grid[0] == 1 and grid[1] > 1: ax = axs[ny] else: ax = axs[ny, nx] - ax.plot(df[_x].values, df[_y].values, 'o') + ax.plot(df[_x].values, df[_y].values, '.') ax.plot(df[_x].values[argmin], df[_y].values[argmin], 'ro') ax.grid(True) if nx == 0: ax.set_ylabel(_y) if ny == len(y)-1: ax.set_xlabel(_x) fig.suptitle(name, fontsize=16) if save is not None: if not os.path.isdir(os.path.dirname(save)): os.makedirs(os.path.dirname(save)) plt.savefig(save) if show: plt.show() def plot_performance_and_feature_grids(self, save=True): x_axis = [] if 'losses' in self.df.columns: x_axis.append('losses') if 'iterations' in self.df.columns: x_axis.append('iterations') y_axis_performance = [] if 'accuracy' in self.df.columns: y_axis_performance.append('accuracy') if 'duration' in self.df.columns: y_axis_performance.append('duration') features = [] for cit in self.df.columns: if cit not in x_axis and cit not in y_axis_performance: features.append(cit) save_name = None if save: save_name = os.path.join(self.path, "performance" + self.appendix + ".png") self.plot_XYGrid(self.df, x=x_axis, y=y_axis_performance, name="Performance", save=save_name, show=not self.save_only) chunks = [features[x:x + 3] for x in range(0, len(features), 3)] for n, chunk in enumerate(chunks): save_name = None if save: save_name = os.path.join(self.path, "features_{}_".format(str(n).zfill(3)) + self.appendix + ".png") self.plot_XYGrid(self.df, x=x_axis, y=chunk, name="Feature set {}".format(n+1), save=save_name, show=not self.save_only) def plot_feature_matrix(self, save=True): sns_plot = sns.pairplot(self.df, height=1.8, aspect=1.8, plot_kws=dict(edgecolor="k", linewidth=0.5), diag_kind="kde", diag_kws=dict(shade=True)) fig = sns_plot.fig fig.subplots_adjust(top=0.93, wspace=0.3) t = fig.suptitle('Pairwise Plots', fontsize=14) if not self.save_only: plt.show() if save: save_name = os.path.join(self.path, "matrixview_"+self.appendix+".png") try: sns_plot.savefig(save_name) except Exception as e: msg = "failed to save file {}, reason {}".format(save_name, e) LOG.error(msg) raise IOError(msg) def plot_duration(self, save=True): if "duration" in self.df.columns: sns_plot = sns.jointplot(y="duration", x="losses", data=self.df, kind="kde") if not self.save_only: plt.show() if save: save_name = os.path.join(self.path, "t_vs_loss_" + self.appendix + ".png") try: sns_plot.savefig(save_name) except Exception as e: msg = "failed to save file {}, reason {}".format(save_name, e) LOG.error(msg) raise IOError(msg) def show(self, save=True): self.plot_duration(save) self.plot_feature_matrix(save) self.plot_performance_and_feature_grids(save)