diff --git a/hyppopy/solver.py b/hyppopy/solver.py index 35de7c8..78d3917 100644 --- a/hyppopy/solver.py +++ b/hyppopy/solver.py @@ -1,86 +1,84 @@ -# -*- coding: utf-8 -*- -# # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import logging from hyppopy.globals import DEBUGLEVEL LOG = logging.getLogger(os.path.basename(__file__)) LOG.setLevel(DEBUGLEVEL) class Solver(object): _name = None _solver_plugin = None _settings_plugin = None def __init__(self): pass def set_data(self, data): self.solver.set_data(data) def set_parameters(self, params): self.settings.set(params) self.settings.set_attributes(self.solver) self.settings.set_attributes(self.settings) def read_parameter(self, fname): self.settings.read(fname) self.settings.set_attributes(self.solver) self.settings.set_attributes(self.settings) def set_loss_function(self, loss_func): self.solver.set_loss_function(loss_func) def run(self): self.solver.settings = self.settings self.solver.run() def get_results(self): return self.solver.get_results() @property def is_ready(self): return self.solver is not None and self.settings is not None @property def solver(self): return self._solver_plugin @solver.setter def solver(self, value): self._solver_plugin = value @property def settings(self): return self._settings_plugin @settings.setter def settings(self, value): self._settings_plugin = value @property def name(self): return self._name @name.setter def name(self, value): if not isinstance(value, str): LOG.error(f"Invalid input, str type expected for value, got {type(value)} instead") raise IOError(f"Invalid input, str type expected for value, got {type(value)} instead") self._name = value diff --git a/hyppopy/workflows/datalaoder/__init__.py b/hyppopy/workflows/datalaoder/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hyppopy/workflows/datalaoder/dataloader.py b/hyppopy/workflows/datalaoder/dataloader.py new file mode 100644 index 0000000..3d86ac4 --- /dev/null +++ b/hyppopy/workflows/datalaoder/dataloader.py @@ -0,0 +1,34 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import abc + + +class DataLoader(object): + + def __init__(self): + self.data = None + + @abc.abstractmethod + def read(self, **kwargs): + raise NotImplementedError("the read method has to be implemented in classes derived from DataLoader") + + @abc.abstractmethod + def preprocess(self): + pass + + def get(self): + self.preprocess() + return self.data diff --git a/hyppopy/workflows/datalaoder/simpleloader.py b/hyppopy/workflows/datalaoder/simpleloader.py new file mode 100644 index 0000000..4a8c461 --- /dev/null +++ b/hyppopy/workflows/datalaoder/simpleloader.py @@ -0,0 +1,41 @@ +# DKFZ +# +# +# Copyright (c) German Cancer Research Center, +# Division of Medical and Biological Informatics. +# All rights reserved. +# +# This software is distributed WITHOUT ANY WARRANTY; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# See LICENSE.txt or http://www.mitk.org for details. +# +# Author: Sven Wanner (s.wanner@dkfz.de) + +import os +import numpy as np +import pandas as pd + +from hyppopy.workflows.datalaoder.dataloader import DataLoader + + +class SimpleDataLoader(DataLoader): + + def read(self, **kwargs): + if kwargs['data_name'].endswith(".npy"): + if not kwargs['labels_name'].endswith(".npy"): + raise IOError("Expect both data_name and labels_name being of type .npy!") + self.data = [np.load(os.path.join(kwargs['path'], kwargs['data_name'])), np.load(os.path.join(kwargs['path'], kwargs['labels_name']))] + elif kwargs['data_name'].endswith(".csv"): + try: + dataset = pd.read_csv(os.path.join(kwargs['path'], kwargs['data_name'])) + y = dataset[kwargs['labels_name']].values + X = dataset.drop([kwargs['labels_name']], axis=1).values + self.data = [X, y] + except Exception as e: + print("Precondition violation, this usage case expects as data_name a " + "csv file and as label_name a name of a column in this csv table!") + else: + raise NotImplementedError("This combination of data_name and labels_name " + "does not yet exist, feel free to add it") diff --git a/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py b/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py index 18137ca..c7ca0bc 100644 --- a/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py +++ b/hyppopy/workflows/randomforest_usecase/randomforest_usecase.py @@ -1,60 +1,38 @@ -# -*- coding: utf-8 -*- -# # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) -import os -import numpy as np -import pandas as pd + from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import cross_val_score -from hyppopy.workflowbase import Workflow - - -def data_loader(path, data_name, labels_name): - if data_name.endswith(".npy"): - if not labels_name.endswith(".npy"): - raise IOError("Expect both data_name and labels_name being of type .npy!") - data = [np.load(os.path.join(path, data_name)), np.load(os.path.join(path, labels_name))] - elif data_name.endswith(".csv"): - try: - dataset = pd.read_csv(os.path.join(path, data_name)) - y = dataset[labels_name].values - X = dataset.drop([labels_name], axis=1).values - data = [X, y] - except Exception as e: - print("Precondition violation, this usage case expects as data_name a " - "csv file and as label_name a name of a column in this csv table!") - else: - raise NotImplementedError("This combination of data_name and labels_name " - "does not yet exist, feel free to add it") - return data +from hyppopy.workflows.workflowbase import Workflow +from hyppopy.workflows.datalaoder.simpleloader import SimpleDataLoader class randomforest_usecase(Workflow): def __init__(self, args): Workflow.__init__(self, args) def setup(self): - data = data_loader(self.args.data, self.solver.settings.data_name, self.solver.settings.labels_name) - self.solver.set_data(data) + dl = SimpleDataLoader() + dl.read(path=self.args.data, data_name=self.solver.settings.data_name, labels_name=self.solver.settings.labels_name) + self.solver.set_data(dl.get()) def blackbox_function(self, data, params): if "n_estimators" in params.keys(): params["n_estimators"] = int(round(params["n_estimators"])) clf = RandomForestClassifier(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() diff --git a/hyppopy/workflows/svc_usecase/svc_usecase.py b/hyppopy/workflows/svc_usecase/svc_usecase.py index 60ae83d..18f93f1 100644 --- a/hyppopy/workflows/svc_usecase/svc_usecase.py +++ b/hyppopy/workflows/svc_usecase/svc_usecase.py @@ -1,58 +1,41 @@ # -*- coding: utf-8 -*- # # DKFZ # # # Copyright (c) German Cancer Research Center, # Division of Medical and Biological Informatics. # All rights reserved. # # This software is distributed WITHOUT ANY WARRANTY; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. # # See LICENSE.txt or http://www.mitk.org for details. # # Author: Sven Wanner (s.wanner@dkfz.de) import os import numpy as np import pandas as pd from sklearn.svm import SVC from sklearn.model_selection import cross_val_score -from hyppopy.workflowbase import Workflow - - -def data_loader(path, data_name, labels_name): - if data_name.endswith(".npy"): - if not labels_name.endswith(".npy"): - raise IOError("Expect both data_name and labels_name being of type .npy!") - data = [np.load(os.path.join(path, data_name)), np.load(os.path.join(path, labels_name))] - elif data_name.endswith(".csv"): - try: - dataset = pd.read_csv(os.path.join(path, data_name)) - y = dataset[labels_name].values - X = dataset.drop([labels_name], axis=1).values - data = [X, y] - except Exception as e: - print("Precondition violation, this usage case expects as data_name a " - "csv file and as label_name a name of a column in this csv table!") - else: - raise NotImplementedError("This combination of data_name and labels_name " - "does not yet exist, feel free to add it") - return data +from hyppopy.workflows.workflowbase import Workflow +from hyppopy.workflows.datalaoder.simpleloader import SimpleDataLoader class svc_usecase(Workflow): def __init__(self, args): Workflow.__init__(self, args) def setup(self): - data = data_loader(self.args.data, self.solver.settings.data_name, self.solver.settings.labels_name) - self.solver.set_data(data) + dl = SimpleDataLoader() + dl.read(path=self.args.data, data_name=self.solver.settings.data_name, + labels_name=self.solver.settings.labels_name) + self.solver.set_data(dl.get()) def blackbox_function(self, data, params): clf = SVC(**params) return -cross_val_score(estimator=clf, X=data[0], y=data[1], cv=3).mean() diff --git a/hyppopy/workflowbase.py b/hyppopy/workflows/workflowbase.py similarity index 100% rename from hyppopy/workflowbase.py rename to hyppopy/workflows/workflowbase.py