diff --git a/CMakeExternals/Vigra.cmake b/CMakeExternals/Vigra.cmake index 1db5f47de4..c0f9517438 100644 --- a/CMakeExternals/Vigra.cmake +++ b/CMakeExternals/Vigra.cmake @@ -1,43 +1,43 @@ #----------------------------------------------------------------------------- # VIGRA #----------------------------------------------------------------------------- if(MITK_USE_Vigra) # Sanity checks if(DEFINED Vigra_DIR AND NOT EXISTS ${Vigra_DIR}) message(FATAL_ERROR "Vigra_DIR variable is defined but corresponds to non-existing directory") endif() if(NOT ${MITK_USE_HDF5}) message(FATAL_ERROR "HDF5 is required for Vigra. Please enable it.") endif() set(proj Vigra) set(proj_DEPENDENCIES HDF5) set(Vigra_DEPENDS ${proj}) if(NOT DEFINED Vigra_DIR) ExternalProject_Add(${proj} URL ${MITK_THIRDPARTY_DOWNLOAD_PREFIX_URL}/vigra-1.10.0-src.tar.gz URL_MD5 4f963f0be4fcb8b06271c2aa40baa9be - PATCH_COMMAND ${PATCH_COMMAND} -N -p1 -i ${CMAKE_CURRENT_LIST_DIR}/Vigra.patch + PATCH_COMMAND ${PATCH_COMMAND} -N -p1 -i ${CMAKE_CURRENT_LIST_DIR}/Vigra.patch CMAKE_GENERATOR ${gen} CMAKE_ARGS ${ep_common_args} -DAUTOEXEC_TESTS:BOOL=OFF -DWITH_VIGRANUMPY:BOOL=OFF -DHDF5_DIR:PATH=${HDF5_DIR} -DCMAKE_INSTALL_RPATH_USE_LINK_PATH:BOOL=TRUE -DCMAKE_INSTALL_PREFIX:PATH= DEPENDS ${proj_DEPENDENCIES} ) set(Vigra_DIR ${ep_prefix}) else() mitkMacroEmptyExternalProject(${proj} "${proj_DEPENDENCIES}") endif() endif(MITK_USE_Vigra) diff --git a/CMakeExternals/Vigra.patch b/CMakeExternals/Vigra.patch index 297e23a8a8..b462dacc91 100644 --- a/CMakeExternals/Vigra.patch +++ b/CMakeExternals/Vigra.patch @@ -1,1566 +1,291 @@ -diff -urNb vigra-Version-1-10-0/CMakeLists.txt Vigra-src/CMakeLists.txt +diff -urNb vigra-Version-1-10-0/CMakeLists.txt Vigra/CMakeLists.txt --- vigra-Version-1-10-0/CMakeLists.txt 2013-11-18 17:48:16.000000000 +0100 -+++ Vigra-src/CMakeLists.txt 2015-02-24 14:31:03.716528000 +0100 ++++ Vigra/CMakeLists.txt 2015-03-03 10:13:57.693725000 +0100 @@ -70,8 +70,11 @@ ENDIF() - + IF(WITH_HDF5) + FIND_PACKAGE(HDF5 PATHS ${HDF5_DIR} PATH_SUFFIXES hdf5 NO_DEFAULT_PATH NO_MODULE) +IF(NOT HDF5_FOUND) VIGRA_FIND_PACKAGE(HDF5) ENDIF() +ENDIF() - + IF(WITH_BOOST_GRAPH) IF(WITH_VIGRANUMPY) @@ -395,3 +398,4 @@ ENDIF() - + MESSAGE( STATUS "---------------------------------------------------------" ) + -diff -urNb vigra-Version-1-10-0/config/VigraConfig.cmake.in Vigra-src/config/VigraConfig.cmake.in +diff -urNb vigra-Version-1-10-0/config/VigraConfig.cmake.in Vigra/config/VigraConfig.cmake.in --- vigra-Version-1-10-0/config/VigraConfig.cmake.in 2013-11-18 17:48:16.000000000 +0100 -+++ Vigra-src/config/VigraConfig.cmake.in 2015-02-24 14:31:03.716528000 +0100 ++++ Vigra/config/VigraConfig.cmake.in 2015-03-03 10:13:57.693725000 +0100 @@ -1,7 +1,9 @@ get_filename_component(SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) get_filename_component(Vigra_TOP_DIR "${SELF_DIR}/../../" ABSOLUTE) - + -include(${SELF_DIR}/vigra-targets.cmake) +if(NOT TARGET vigraimpex) + include(${SELF_DIR}/vigra-targets.cmake) +endif() get_target_property(VIGRA_TYPE vigraimpex TYPE) if(${VIGRA_TYPE} STREQUAL "STATIC_LIBRARY") SET(VIGRA_STATIC_LIB True) @@ -12,3 +14,4 @@ IF(EXISTS ${SELF_DIR}/../vigranumpy/VigranumpyConfig.cmake) INCLUDE(${SELF_DIR}/../vigranumpy/VigranumpyConfig.cmake) ENDIF() + -diff -urNb vigra-Version-1-10-0/include/vigra/random_forest/rf_decisionTree.hxx Vigra-src/include/vigra/random_forest/rf_decisionTree.hxx +diff -urNb vigra-Version-1-10-0/include/vigra/random_forest/rf_common.hxx Vigra/include/vigra/random_forest/rf_common.hxx +--- vigra-Version-1-10-0/include/vigra/random_forest/rf_common.hxx 2013-11-18 17:48:16.000000000 +0100 ++++ Vigra/include/vigra/random_forest/rf_common.hxx 2015-03-03 10:45:19.423056174 +0100 +@@ -558,6 +558,7 @@ + int is_weighted_; // class_weights_ are used + double precision_; // termination criterion for regression loss + int response_size_; ++ int max_tree_depth; + + template + void to_classlabel(int index, T & out) const +@@ -583,7 +584,8 @@ + EQUALS(class_weights_), + EQUALS(is_weighted_), + EQUALS(precision_), +- EQUALS(response_size_) ++ EQUALS(response_size_), ++ EQUALS(max_tree_depth) + { + std::back_insert_iterator > + iter(classes); +@@ -604,7 +606,8 @@ + EQUALS(class_weights_), + EQUALS(is_weighted_), + EQUALS(precision_), +- EQUALS(response_size_) ++ EQUALS(response_size_), ++ EQUALS(max_tree_depth) + { + std::back_insert_iterator > + iter(classes); +@@ -624,7 +627,8 @@ + EQUALS(used_); + EQUALS(is_weighted_); + EQUALS(precision_); +- EQUALS(response_size_) ++ EQUALS(response_size_); ++ EQUALS(max_tree_depth) + class_weights_.clear(); + std::back_insert_iterator > + iter2(class_weights_); +@@ -648,7 +652,8 @@ + EQUALS(used_); + EQUALS(is_weighted_); + EQUALS(precision_); +- EQUALS(response_size_) ++ EQUALS(response_size_); ++ EQUALS(max_tree_depth) + class_weights_.clear(); + std::back_insert_iterator > + iter2(class_weights_); +@@ -677,7 +682,8 @@ + COMPARE(used_); + COMPARE(class_weights_); + COMPARE(classes); +- COMPARE(response_size_) ++ COMPARE(response_size_); ++ COMPARE(max_tree_depth) + #undef COMPARE + return result; + } +@@ -715,6 +721,7 @@ + PULL(used_, int); + PULL(precision_, double); + PULL(response_size_, int); ++ PULL(max_tree_depth, int); + if(is_weighted_) + { + vigra_precondition(end - begin == 10 + 2*class_count_, +@@ -747,6 +754,7 @@ + PUSH(used_); + PUSH(precision_); + PUSH(response_size_); ++ PUSH(max_tree_depth); + if(is_weighted_) + { + std::copy(class_weights_.begin(), +@@ -773,6 +781,7 @@ + PULL(used_, int); + PULL(precision_, double); + PULL(response_size_, int); ++ PULL(max_tree_depth, int); + class_weights_ = in["class_weights_"]; + #undef PUSH + } +@@ -789,6 +798,7 @@ + PUSH(used_); + PUSH(precision_); + PUSH(response_size_); ++ PUSH(max_tree_depth); + in["class_weights_"] = class_weights_; + #undef PUSH + } +@@ -805,7 +815,8 @@ + used_(false), + is_weighted_(false), + precision_(0.0), +- response_size_(1) ++ response_size_(1), ++ max_tree_depth(50) + {} + + +@@ -857,7 +868,7 @@ + is_weighted_ = false; + precision_ = 0.0; + response_size_ = 0; +- ++ max_tree_depth = 50; + } + + bool used() const +diff -urNb vigra-Version-1-10-0/include/vigra/random_forest/rf_decisionTree.hxx Vigra/include/vigra/random_forest/rf_decisionTree.hxx --- vigra-Version-1-10-0/include/vigra/random_forest/rf_decisionTree.hxx 2013-11-18 17:48:16.000000000 +0100 -+++ Vigra-src/include/vigra/random_forest/rf_decisionTree.hxx 2015-01-16 19:16:34.000000000 +0100 ++++ Vigra/include/vigra/random_forest/rf_decisionTree.hxx 2015-03-03 10:49:22.264260358 +0100 @@ -90,6 +90,8 @@ ProblemSpec<> ext_param_; unsigned int classCount_; + std::map m_Parents; + public: /* \brief Create tree with parameters */ @@ -350,6 +352,22 @@ continueLearn(features,labels,stack_entry,split,stop,visitor,randint); } +template < class TRandomForest> +int GetTreeDepthForNode(int nodeIndex, TRandomForest* rf) +{ + int depth = 0; + while (true) + { + if (nodeIndex < 1) + { + break; + } + ++depth; + nodeIndex = rf->m_Parents[nodeIndex]; + } + return depth; +} + template < class U, class C, class U2, class C2, class StackEntry_t, @@ -374,6 +392,11 @@ size_t last_node_pos = 0; StackEntry_t top=stack.back(); + Split_t* splitPointer = &split; + bool isDepthSplitter = true; + + int maximumTreeDepth = splitPointer->GetMaximumTreeDepth(); + while(!stack.empty()) { @@ -392,7 +415,20 @@ //kind of node to make TreeInt NodeID; - if(stop(top)) + bool depthStop = false; + if (isDepthSplitter) + { + int currentDepthLevel; + if (top.leftParent != StackEntry_t::DecisionTreeNoParent) + currentDepthLevel = GetTreeDepthForNode(top.leftParent, this); + else + currentDepthLevel = GetTreeDepthForNode(top.rightParent, this); + + depthStop = (currentDepthLevel >= maximumTreeDepth); + } + if(stop(top) || (depthStop)) + + //if (stop(top) || currentDepthLevel >= MaximumSplitDepth(split)) NodeID = split.makeTerminalNode(features, labels, top, @@ -421,17 +457,20 @@ // Using InteriorNodeBase because exact parameter form not needed. // look at the Node base before getting scared. last_node_pos = topology_.size(); + m_Parents[last_node_pos] = StackEntry_t::DecisionTreeNoParent; if(top.leftParent != StackEntry_t::DecisionTreeNoParent) { NodeBase(topology_, parameters_, top.leftParent).child(0) = last_node_pos; + m_Parents[last_node_pos] = top.leftParent; } else if(top.rightParent != StackEntry_t::DecisionTreeNoParent) { NodeBase(topology_, parameters_, top.rightParent).child(1) = last_node_pos; + m_Parents[last_node_pos] = top.rightParent; } -diff -urNb vigra-Version-1-10-0/include/vigra/random_forest/rf_nodeproxy.hxx Vigra-src/include/vigra/random_forest/rf_nodeproxy.hxx +diff -urNb vigra-Version-1-10-0/include/vigra/random_forest/rf_nodeproxy.hxx Vigra/include/vigra/random_forest/rf_nodeproxy.hxx --- vigra-Version-1-10-0/include/vigra/random_forest/rf_nodeproxy.hxx 2013-11-18 17:48:16.000000000 +0100 -+++ Vigra-src/include/vigra/random_forest/rf_nodeproxy.hxx 2015-01-13 16:56:56.000000000 +0100 ++++ Vigra/include/vigra/random_forest/rf_nodeproxy.hxx 2015-03-03 10:13:57.693725000 +0100 @@ -50,7 +50,11 @@ namespace vigra { - +class DepthSplitterBase +{ +public: + virtual int GetMaximumTreeDepth() const = 0; +}; enum NodeTags { -diff -urNb vigra-Version-1-10-0/include/vigra/random_forest/rf_split.hxx Vigra-src/include/vigra/random_forest/rf_split.hxx +diff -urNb vigra-Version-1-10-0/include/vigra/random_forest/rf_split.hxx Vigra/include/vigra/random_forest/rf_split.hxx --- vigra-Version-1-10-0/include/vigra/random_forest/rf_split.hxx 2013-11-18 17:48:16.000000000 +0100 -+++ Vigra-src/include/vigra/random_forest/rf_split.hxx 2015-01-16 19:11:02.000000000 +0100 ++++ Vigra/include/vigra/random_forest/rf_split.hxx 2015-03-03 10:49:01.164155728 +0100 @@ -108,6 +108,11 @@ \ref SplitBase::findBestSplit() or \ref SplitBase::makeTerminalNode(). **/ + virtual int GetMaximumTreeDepth() + { -+ return 1000000000; ++ return ext_param_.max_tree_depth; + } + template void set_external_parameters(ProblemSpec const & in) { -diff -urNb vigra-Version-1-10-0/include/vigra/random_forest/rf_split-old.hxx Vigra-src/include/vigra/random_forest/rf_split-old.hxx ---- vigra-Version-1-10-0/include/vigra/random_forest/rf_split-old.hxx 1970-01-01 01:00:00.000000000 +0100 -+++ Vigra-src/include/vigra/random_forest/rf_split-old.hxx 2014-03-31 08:46:18.000000000 +0200 -@@ -0,0 +1,1383 @@ -+/************************************************************************/ -+/* */ -+/* Copyright 2008-2009 by Ullrich Koethe and Rahul Nair */ -+/* */ -+/* This file is part of the VIGRA computer vision library. */ -+/* The VIGRA Website is */ -+/* http://hci.iwr.uni-heidelberg.de/vigra/ */ -+/* Please direct questions, bug reports, and contributions to */ -+/* ullrich.koethe@iwr.uni-heidelberg.de or */ -+/* vigra@informatik.uni-hamburg.de */ -+/* */ -+/* Permission is hereby granted, free of charge, to any person */ -+/* obtaining a copy of this software and associated documentation */ -+/* files (the "Software"), to deal in the Software without */ -+/* restriction, including without limitation the rights to use, */ -+/* copy, modify, merge, publish, distribute, sublicense, and/or */ -+/* sell copies of the Software, and to permit persons to whom the */ -+/* Software is furnished to do so, subject to the following */ -+/* conditions: */ -+/* */ -+/* The above copyright notice and this permission notice shall be */ -+/* included in all copies or substantial portions of the */ -+/* Software. */ -+/* */ -+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */ -+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */ -+/* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */ -+/* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */ -+/* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */ -+/* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */ -+/* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */ -+/* OTHER DEALINGS IN THE SOFTWARE. */ -+/* */ -+/************************************************************************/ -+#ifndef VIGRA_RANDOM_FOREST_SPLIT_HXX -+#define VIGRA_RANDOM_FOREST_SPLIT_HXX -+#include -+#include -+#include -+#include -+#include -+#include "../mathutil.hxx" -+#include "../array_vector.hxx" -+#include "../sized_int.hxx" -+#include "../matrix.hxx" -+#include "../random.hxx" -+#include "../functorexpression.hxx" -+#include "rf_nodeproxy.hxx" -+//#include "rf_sampling.hxx" -+#include "rf_region.hxx" -+//#include "../hokashyap.hxx" -+//#include "vigra/rf_helpers.hxx" -+ -+namespace vigra -+{ -+ -+// Incomplete Class to ensure that findBestSplit is always implemented in -+// the derived classes of SplitBase -+class CompileTimeError; -+ -+ -+namespace detail -+{ -+ template -+ class Normalise -+ { -+ public: -+ template -+ static void exec(Iter begin, Iter end) -+ {} -+ }; -+ -+ template<> -+ class Normalise -+ { -+ public: -+ template -+ static void exec (Iter begin, Iter end) -+ { -+ double bla = std::accumulate(begin, end, 0.0); -+ for(int ii = 0; ii < end - begin; ++ii) -+ begin[ii] = begin[ii]/bla ; -+ } -+ }; -+} -+ -+ -+/** Base Class for all SplitFunctors used with the \ref RandomForest class -+ defines the interface used while learning a tree. -+**/ -+template -+class SplitBase -+{ -+ public: -+ -+ typedef Tag RF_Tag; -+ typedef DT_StackEntry::iterator> -+ StackEntry_t; -+ -+ ProblemSpec<> ext_param_; -+ -+ NodeBase::T_Container_type t_data; -+ NodeBase::P_Container_type p_data; -+ -+ NodeBase node_; -+ -+ /** returns the DecisionTree Node created by -+ \ref SplitBase::findBestSplit() or \ref SplitBase::makeTerminalNode(). -+ **/ -+ -+ template -+ void set_external_parameters(ProblemSpec const & in) -+ { -+ ext_param_ = in; -+ t_data.push_back(in.column_count_); -+ t_data.push_back(in.class_count_); -+ } -+ -+ NodeBase & createNode() -+ { -+ return node_; -+ } -+ -+ int classCount() const -+ { -+ return int(t_data[1]); -+ } -+ -+ int featureCount() const -+ { -+ return int(t_data[0]); -+ } -+ -+ /** resets internal data. Should always be called before -+ calling findBestSplit or makeTerminalNode -+ **/ -+ void reset() -+ { -+ t_data.resize(2); -+ p_data.resize(0); -+ } -+ -+ -+ /** findBestSplit has to be re-implemented in derived split functor. -+ The defaut implementation only insures that a CompileTime error is issued -+ if no such method was defined. -+ **/ -+ -+ template -+ int findBestSplit(MultiArrayView<2, T, C> features, -+ MultiArrayView<2, T2, C2> labels, -+ Region region, -+ ArrayVector childs, -+ Random randint) -+ { -+#ifndef __clang__ -+ // FIXME: This compile-time checking trick does not work for clang. -+ CompileTimeError SplitFunctor__findBestSplit_member_was_not_defined; -+#endif -+ return 0; -+ } -+ -+ /** Default action for creating a terminal Node. -+ sets the Class probability of the remaining region according to -+ the class histogram -+ **/ -+ template -+ int makeTerminalNode(MultiArrayView<2, T, C> features, -+ MultiArrayView<2, T2, C2> labels, -+ Region & region, -+ Random randint) -+ { -+ Node ret(t_data, p_data); -+ node_ = ret; -+ if(ext_param_.class_weights_.size() != region.classCounts().size()) -+ { -+ std::copy(region.classCounts().begin(), -+ region.classCounts().end(), -+ ret.prob_begin()); -+ } -+ else -+ { -+ std::transform(region.classCounts().begin(), -+ region.classCounts().end(), -+ ext_param_.class_weights_.begin(), -+ ret.prob_begin(), std::multiplies()); -+ } -+ detail::Normalise::exec(ret.prob_begin(), ret.prob_end()); -+// std::copy(ret.prob_begin(), ret.prob_end(), std::ostream_iterator(std::cerr, ", " )); -+// std::cerr << std::endl; -+ ret.weights() = region.size(); -+ return e_ConstProbNode; -+ } -+ -+ -+}; -+ -+/** Functor to sort the indices of a feature Matrix by a certain dimension -+**/ -+template -+class SortSamplesByDimensions -+{ -+ DataMatrix const & data_; -+ MultiArrayIndex sortColumn_; -+ double thresVal_; -+ public: -+ -+ SortSamplesByDimensions(DataMatrix const & data, -+ MultiArrayIndex sortColumn, -+ double thresVal = 0.0) -+ : data_(data), -+ sortColumn_(sortColumn), -+ thresVal_(thresVal) -+ {} -+ -+ void setColumn(MultiArrayIndex sortColumn) -+ { -+ sortColumn_ = sortColumn; -+ } -+ void setThreshold(double value) -+ { -+ thresVal_ = value; -+ } -+ -+ bool operator()(MultiArrayIndex l, MultiArrayIndex r) const -+ { -+ return data_(l, sortColumn_) < data_(r, sortColumn_); -+ } -+ bool operator()(MultiArrayIndex l) const -+ { -+ return data_(l, sortColumn_) < thresVal_; -+ } -+}; -+ -+template -+class DimensionNotEqual -+{ -+ DataMatrix const & data_; -+ MultiArrayIndex sortColumn_; -+ -+ public: -+ -+ DimensionNotEqual(DataMatrix const & data, -+ MultiArrayIndex sortColumn) -+ : data_(data), -+ sortColumn_(sortColumn) -+ {} -+ -+ void setColumn(MultiArrayIndex sortColumn) -+ { -+ sortColumn_ = sortColumn; -+ } -+ -+ bool operator()(MultiArrayIndex l, MultiArrayIndex r) const -+ { -+ return data_(l, sortColumn_) != data_(r, sortColumn_); -+ } -+}; -+ -+template -+class SortSamplesByHyperplane -+{ -+ DataMatrix const & data_; -+ Node const & node_; -+ -+ public: -+ -+ SortSamplesByHyperplane(DataMatrix const & data, -+ Node const & node) -+ : -+ data_(data), -+ node_(node) -+ {} -+ -+ /** calculate the distance of a sample point to a hyperplane -+ */ -+ double operator[](MultiArrayIndex l) const -+ { -+ double result_l = -1 * node_.intercept(); -+ for(int ii = 0; ii < node_.columns_size(); ++ii) -+ { -+ result_l += rowVector(data_, l)[node_.columns_begin()[ii]] -+ * node_.weights()[ii]; -+ } -+ return result_l; -+ } -+ -+ bool operator()(MultiArrayIndex l, MultiArrayIndex r) const -+ { -+ return (*this)[l] < (*this)[r]; -+ } -+ -+}; -+ -+/** makes a Class Histogram given indices in a labels_ array -+ * usage: -+ * MultiArrayView<2, T2, C2> labels = makeSomeLabels() -+ * ArrayVector hist(numberOfLabels(labels), 0); -+ * RandomForestClassCounter counter(labels, hist); -+ * -+ * Container indices = getSomeIndices() -+ * std::for_each(indices, counter); -+ */ -+template -+class RandomForestClassCounter -+{ -+ DataSource const & labels_; -+ CountArray & counts_; -+ -+ public: -+ -+ RandomForestClassCounter(DataSource const & labels, -+ CountArray & counts) -+ : labels_(labels), -+ counts_(counts) -+ { -+ reset(); -+ } -+ -+ void reset() -+ { -+ counts_.init(0); -+ } -+ -+ void operator()(MultiArrayIndex l) const -+ { -+ counts_[labels_[l]] +=1; -+ } -+}; -+ -+ -+/** Functor To Calculate the Best possible Split Based on the Gini Index -+ given Labels and Features along a given Axis -+*/ -+ -+namespace detail -+{ -+ template -+ class ConstArr -+ { -+ public: -+ double operator[](size_t) const -+ { -+ return (double)N; -+ } -+ }; -+ -+ -+} -+ -+ -+ -+ -+/** Functor to calculate the entropy based impurity -+ */ -+class EntropyCriterion -+{ -+public: -+ /**calculate the weighted gini impurity based on class histogram -+ * and class weights -+ */ -+ template -+ double operator() (Array const & hist, -+ Array2 const & weights, -+ double total = 1.0) const -+ { -+ return impurity(hist, weights, total); -+ } -+ -+ /** calculate the gini based impurity based on class histogram -+ */ -+ template -+ double operator()(Array const & hist, double total = 1.0) const -+ { -+ return impurity(hist, total); -+ } -+ -+ /** static version of operator(hist total) -+ */ -+ template -+ static double impurity(Array const & hist, double total) -+ { -+ return impurity(hist, detail::ConstArr<1>(), total); -+ } -+ -+ /** static version of operator(hist, weights, total) -+ */ -+ template -+ static double impurity (Array const & hist, -+ Array2 const & weights, -+ double total) -+ { -+ -+ int class_count = hist.size(); -+ double entropy = 0.0; -+ if(class_count == 2) -+ { -+ double p0 = (hist[0]/total); -+ double p1 = (hist[1]/total); -+ entropy = 0 - weights[0]*p0*std::log(p0) - weights[1]*p1*std::log(p1); -+ } -+ else -+ { -+ for(int ii = 0; ii < class_count; ++ii) -+ { -+ double w = weights[ii]; -+ double pii = hist[ii]/total; -+ entropy -= w*( pii*std::log(pii)); -+ } -+ } -+ entropy = total * entropy; -+ return entropy; -+ } -+}; -+ -+/** Functor to calculate the gini impurity -+ */ -+class GiniCriterion -+{ -+public: -+ /**calculate the weighted gini impurity based on class histogram -+ * and class weights -+ */ -+ template -+ double operator() (Array const & hist, -+ Array2 const & weights, -+ double total = 1.0) const -+ { -+ return impurity(hist, weights, total); -+ } -+ -+ /** calculate the gini based impurity based on class histogram -+ */ -+ template -+ double operator()(Array const & hist, double total = 1.0) const -+ { -+ return impurity(hist, total); -+ } -+ -+ /** static version of operator(hist total) -+ */ -+ template -+ static double impurity(Array const & hist, double total) -+ { -+ return impurity(hist, detail::ConstArr<1>(), total); -+ } -+ -+ /** static version of operator(hist, weights, total) -+ */ -+ template -+ static double impurity (Array const & hist, -+ Array2 const & weights, -+ double total) -+ { -+ -+ int class_count = hist.size(); -+ double gini = 0.0; -+ if(class_count == 2) -+ { -+ double w = weights[0] * weights[1]; -+ gini = w * (hist[0] * hist[1] / total); -+ } -+ else -+ { -+ // Gini Impurity is defined as -+ // gini = sum_features-i ( p(i) * (1 - p(i) )) -+ for(int ii = 0; ii < class_count; ++ii) -+ { -+ double w = weights[ii]; -+ gini += w*( hist[ii]*( 1.0 - w * hist[ii]/total ) ); -+ } -+ gini /= total; -+ } -+ return gini; -+ } -+}; -+ -+ -+template -+class ImpurityLoss -+{ -+ -+ DataSource const & labels_; -+ ArrayVector counts_; -+ ArrayVector const class_weights_; -+ double total_counts_; -+ Impurity impurity_; -+ -+ public: -+ -+ template -+ ImpurityLoss(DataSource const & labels, -+ ProblemSpec const & ext_) -+ : labels_(labels), -+ counts_(ext_.class_count_, 0.0), -+ class_weights_(ext_.class_weights_), -+ total_counts_(0.0) -+ {} -+ -+ void reset() -+ { -+ counts_.init(0); -+ total_counts_ = 0.0; -+ } -+ -+ template -+ double increment_histogram(Counts const & counts) -+ { -+ std::transform(counts.begin(), counts.end(), -+ counts_.begin(), counts_.begin(), -+ std::plus()); -+ total_counts_ = std::accumulate( counts_.begin(), -+ counts_.end(), -+ 0.0); -+ return impurity_(counts_, class_weights_, total_counts_); -+ } -+ -+ template -+ double decrement_histogram(Counts const & counts) -+ { -+ std::transform(counts.begin(), counts.end(), -+ counts_.begin(), counts_.begin(), -+ std::minus()); -+ total_counts_ = std::accumulate( counts_.begin(), -+ counts_.end(), -+ 0.0); -+ return impurity_(counts_, class_weights_, total_counts_); -+ } -+ -+ template -+ double increment(Iter begin, Iter end) -+ { -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ counts_[labels_(*iter, 0)] +=1.0; -+ total_counts_ +=1.0; -+ } -+ return impurity_(counts_, class_weights_, total_counts_); -+ } -+ -+ template -+ double decrement(Iter const & begin, Iter const & end) -+ { -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ counts_[labels_(*iter,0)] -=1.0; -+ total_counts_ -=1.0; -+ } -+ return impurity_(counts_, class_weights_, total_counts_); -+ } -+ -+ template -+ double init (Iter begin, Iter end, Resp_t resp) -+ { -+ reset(); -+ std::copy(resp.begin(), resp.end(), counts_.begin()); -+ total_counts_ = std::accumulate(counts_.begin(), counts_.end(), 0.0); -+ return impurity_(counts_,class_weights_, total_counts_); -+ } -+ -+ ArrayVector const & response() -+ { -+ return counts_; -+ } -+}; -+ -+ -+ -+ template -+ class RegressionForestCounter -+ { -+ public: -+ typedef MultiArrayShape<2>::type Shp; -+ DataSource const & labels_; -+ ArrayVector mean_; -+ ArrayVector variance_; -+ ArrayVector tmp_; -+ size_t count_; -+ int* end_; -+ -+ template -+ RegressionForestCounter(DataSource const & labels, -+ ProblemSpec const & ext_) -+ : -+ labels_(labels), -+ mean_(ext_.response_size_, 0.0), -+ variance_(ext_.response_size_, 0.0), -+ tmp_(ext_.response_size_), -+ count_(0) -+ {} -+ -+ template -+ double increment (Iter begin, Iter end) -+ { -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ ++count_; -+ for(unsigned int ii = 0; ii < mean_.size(); ++ii) -+ tmp_[ii] = labels_(*iter, ii) - mean_[ii]; -+ double f = 1.0 / count_, -+ f1 = 1.0 - f; -+ for(unsigned int ii = 0; ii < mean_.size(); ++ii) -+ mean_[ii] += f*tmp_[ii]; -+ for(unsigned int ii = 0; ii < mean_.size(); ++ii) -+ variance_[ii] += f1*sq(tmp_[ii]); -+ } -+ double res = std::accumulate(variance_.begin(), -+ variance_.end(), -+ 0.0, -+ std::plus()); -+ //std::cerr << res << " ) = "; -+ return res; -+ } -+ -+ template //This is BROKEN -+ double decrement (Iter begin, Iter end) -+ { -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ --count_; -+ } -+ -+ begin = end; -+ end = end + count_; -+ -+ -+ for(unsigned int ii = 0; ii < mean_.size(); ++ii) -+ { -+ mean_[ii] = 0; -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ mean_[ii] += labels_(*iter, ii); -+ } -+ mean_[ii] /= count_; -+ variance_[ii] = 0; -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ variance_[ii] += (labels_(*iter, ii) - mean_[ii])*(labels_(*iter, ii) - mean_[ii]); -+ } -+ } -+ double res = std::accumulate(variance_.begin(), -+ variance_.end(), -+ 0.0, -+ std::plus()); -+ //std::cerr << res << " ) = "; -+ return res; -+ } -+ -+ -+ template -+ double init (Iter begin, Iter end, Resp_t resp) -+ { -+ reset(); -+ return this->increment(begin, end); -+ -+ } -+ -+ -+ ArrayVector const & response() -+ { -+ return mean_; -+ } -+ -+ void reset() -+ { -+ mean_.init(0.0); -+ variance_.init(0.0); -+ count_ = 0; -+ } -+ }; -+ -+ -+template -+class RegressionForestCounter2 -+{ -+public: -+ typedef MultiArrayShape<2>::type Shp; -+ DataSource const & labels_; -+ ArrayVector mean_; -+ ArrayVector variance_; -+ ArrayVector tmp_; -+ size_t count_; -+ -+ template -+ RegressionForestCounter2(DataSource const & labels, -+ ProblemSpec const & ext_) -+ : -+ labels_(labels), -+ mean_(ext_.response_size_, 0.0), -+ variance_(ext_.response_size_, 0.0), -+ tmp_(ext_.response_size_), -+ count_(0) -+ {} -+ -+ template -+ double increment (Iter begin, Iter end) -+ { -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ ++count_; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ tmp_[ii] = labels_(*iter, ii) - mean_[ii]; -+ double f = 1.0 / count_, -+ f1 = 1.0 - f; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ mean_[ii] += f*tmp_[ii]; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ variance_[ii] += f1*sq(tmp_[ii]); -+ } -+ double res = std::accumulate(variance_.begin(), -+ variance_.end(), -+ 0.0, -+ std::plus()) -+ /((count_ == 1)? 1:(count_ -1)); -+ //std::cerr << res << " ) = "; -+ return res; -+ } -+ -+ template //This is BROKEN -+ double decrement (Iter begin, Iter end) -+ { -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ double f = 1.0 / count_, -+ f1 = 1.0 - f; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ mean_[ii] = (mean_[ii] - f*labels_(*iter,ii))/(1-f); -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ variance_[ii] -= f1*sq(labels_(*iter,ii) - mean_[ii]); -+ --count_; -+ } -+ double res = std::accumulate(variance_.begin(), -+ variance_.end(), -+ 0.0, -+ std::plus()) -+ /((count_ == 1)? 1:(count_ -1)); -+ //std::cerr << "( " << res << " + "; -+ return res; -+ } -+ /* west's algorithm for incremental variance -+ // calculation -+ template -+ double increment (Iter begin, Iter end) -+ { -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ ++count_; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ tmp_[ii] = labels_(*iter, ii) - mean_[ii]; -+ double f = 1.0 / count_, -+ f1 = 1.0 - f; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ mean_[ii] += f*tmp_[ii]; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ variance_[ii] += f1*sq(tmp_[ii]); -+ } -+ return std::accumulate(variance_.begin(), -+ variance_.end(), -+ 0.0, -+ std::plus()) -+ /(count_ -1); -+ } -+ -+ template -+ double decrement (Iter begin, Iter end) -+ { -+ for(Iter iter = begin; iter != end; ++iter) -+ { -+ --count_; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ tmp_[ii] = labels_(*iter, ii) - mean_[ii]; -+ double f = 1.0 / count_, -+ f1 = 1.0 + f; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ mean_[ii] -= f*tmp_[ii]; -+ for(int ii = 0; ii < mean_.size(); ++ii) -+ variance_[ii] -= f1*sq(tmp_[ii]); -+ } -+ return std::accumulate(variance_.begin(), -+ variance_.end(), -+ 0.0, -+ std::plus()) -+ /(count_ -1); -+ }*/ -+ -+ template -+ double init (Iter begin, Iter end, Resp_t resp) -+ { -+ reset(); -+ return this->increment(begin, end, resp); -+ } -+ -+ -+ ArrayVector const & response() -+ { -+ return mean_; -+ } -+ -+ void reset() -+ { -+ mean_.init(0.0); -+ variance_.init(0.0); -+ count_ = 0; -+ } -+}; -+ -+template -+struct LossTraits; -+ -+struct LSQLoss -+{}; -+ -+template -+struct LossTraits -+{ -+ typedef ImpurityLoss type; -+}; -+ -+template -+struct LossTraits -+{ -+ typedef ImpurityLoss type; -+}; -+ -+template -+struct LossTraits -+{ -+ typedef RegressionForestCounter type; -+}; -+ -+/** Given a column, choose a split that minimizes some loss -+ */ -+template -+class BestGiniOfColumn -+{ -+public: -+ ArrayVector class_weights_; -+ ArrayVector bestCurrentCounts[2]; -+ double min_gini_; -+ std::ptrdiff_t min_index_; -+ double min_threshold_; -+ ProblemSpec<> ext_param_; -+ -+ BestGiniOfColumn() -+ {} -+ -+ template -+ BestGiniOfColumn(ProblemSpec const & ext) -+ : -+ class_weights_(ext.class_weights_), -+ ext_param_(ext) -+ { -+ bestCurrentCounts[0].resize(ext.class_count_); -+ bestCurrentCounts[1].resize(ext.class_count_); -+ } -+ template -+ void set_external_parameters(ProblemSpec const & ext) -+ { -+ class_weights_ = ext.class_weights_; -+ ext_param_ = ext; -+ bestCurrentCounts[0].resize(ext.class_count_); -+ bestCurrentCounts[1].resize(ext.class_count_); -+ } -+ /** calculate the best gini split along a Feature Column -+ * \param column the feature vector - has to support the [] operator -+ * \param labels the label vector -+ * \param begin -+ * \param end (in and out) -+ * begin and end iterators to the indices of the -+ * samples in the current region. -+ * the range begin - end is sorted by the column supplied -+ * during function execution. -+ * \param region_response -+ * ??? -+ * class histogram of the range. -+ * -+ * precondition: begin, end valid range, -+ * class_counts positive integer valued array with the -+ * class counts in the current range. -+ * labels.size() >= max(begin, end); -+ * postcondition: -+ * begin, end sorted by column given. -+ * min_gini_ contains the minimum gini found or -+ * NumericTraits::max if no split was found. -+ * min_index_ contains the splitting index in the range -+ * or invalid data if no split was found. -+ * BestCirremtcounts[0] and [1] contain the -+ * class histogram of the left and right region of -+ * the left and right regions. -+ */ -+ template< class DataSourceF_t, -+ class DataSource_t, -+ class I_Iter, -+ class Array> -+ void operator()(DataSourceF_t const & column, -+ DataSource_t const & labels, -+ I_Iter & begin, -+ I_Iter & end, -+ Array const & region_response) -+ { -+ std::sort(begin, end, -+ SortSamplesByDimensions(column, 0)); -+ typedef typename -+ LossTraits::type LineSearchLoss; -+ LineSearchLoss left(labels, ext_param_); //initialize left and right region -+ LineSearchLoss right(labels, ext_param_); -+ -+ -+ -+ min_gini_ = right.init(begin, end, region_response); -+ min_threshold_ = *begin; -+ min_index_ = 0; //the starting point where to split -+ DimensionNotEqual comp(column, 0); -+ -+ I_Iter iter = begin; -+ I_Iter next = std::adjacent_find(iter, end, comp); -+ //std::cerr << std::distance(begin, end) << std::endl; -+ while( next != end) -+ { -+ double lr = right.decrement(iter, next + 1); -+ double ll = left.increment(iter , next + 1); -+ double loss = lr +ll; -+ //std::cerr <> in; -+ } -+ -+ template -+ double loss_of_region(DataSource_t const & labels, -+ Iter & begin, -+ Iter & end, -+ Array const & region_response) const -+ { -+ typedef typename -+ LossTraits::type LineSearchLoss; -+ LineSearchLoss region_loss(labels, ext_param_); -+ return -+ region_loss.init(begin, end, region_response); -+ } -+ -+}; -+ -+namespace detail -+{ -+ template -+ struct Correction -+ { -+ template -+ static void exec(Region & in, LabelT & labels) -+ {} -+ }; -+ -+ template<> -+ struct Correction -+ { -+ template -+ static void exec(Region & region, LabelT & labels) -+ { -+ if(std::accumulate(region.classCounts().begin(), -+ region.classCounts().end(), 0.0) != region.size()) -+ { -+ RandomForestClassCounter< LabelT, -+ ArrayVector > -+ counter(labels, region.classCounts()); -+ std::for_each( region.begin(), region.end(), counter); -+ region.classCountsIsValid = true; -+ } -+ } -+ }; -+} -+ -+/** Chooses mtry columns and applies ColumnDecisionFunctor to each of the -+ * columns. Then Chooses the column that is best -+ */ -+template -+class ThresholdSplit: public SplitBase -+{ -+ public: -+ -+ -+ typedef SplitBase SB; -+ -+ ArrayVector splitColumns; -+ ColumnDecisionFunctor bgfunc; -+ -+ double region_gini_; -+ ArrayVector min_gini_; -+ ArrayVector min_indices_; -+ ArrayVector min_thresholds_; -+ -+ int bestSplitIndex; -+ -+ double minGini() const -+ { -+ return min_gini_[bestSplitIndex]; -+ } -+ int bestSplitColumn() const -+ { -+ return splitColumns[bestSplitIndex]; -+ } -+ double bestSplitThreshold() const -+ { -+ return min_thresholds_[bestSplitIndex]; -+ } -+ -+ template -+ void set_external_parameters(ProblemSpec const & in) -+ { -+ SB::set_external_parameters(in); -+ bgfunc.set_external_parameters( SB::ext_param_); -+ int featureCount_ = SB::ext_param_.column_count_; -+ splitColumns.resize(featureCount_); -+ for(int k=0; k -+ int findBestSplit(MultiArrayView<2, T, C> features, -+ MultiArrayView<2, T2, C2> labels, -+ Region & region, -+ ArrayVector& childRegions, -+ Random & randint) -+ { -+ -+ typedef typename Region::IndexIterator IndexIterator; -+ if(region.size() == 0) -+ { -+ std::cerr << "SplitFunctor::findBestSplit(): stackentry with 0 examples encountered\n" -+ "continuing learning process...."; -+ } -+ // calculate things that haven't been calculated yet. -+ detail::Correction::exec(region, labels); -+ -+ -+ // Is the region pure already? -+ region_gini_ = bgfunc.loss_of_region(labels, -+ region.begin(), -+ region.end(), -+ region.classCounts()); -+ if(region_gini_ <= SB::ext_param_.precision_) -+ return this->makeTerminalNode(features, labels, region, randint); -+ -+ // select columns to be tried. -+ for(int ii = 0; ii < SB::ext_param_.actual_mtry_; ++ii) -+ std::swap(splitColumns[ii], -+ splitColumns[ii+ randint(features.shape(1) - ii)]); -+ -+ // find the best gini index -+ bestSplitIndex = 0; -+ double current_min_gini = region_gini_; -+ int num2try = features.shape(1); -+ for(int k=0; kmakeTerminalNode(features, labels, region, randint); -+ -+ //create a Node for output -+ Node node(SB::t_data, SB::p_data); -+ SB::node_ = node; -+ node.threshold() = min_thresholds_[bestSplitIndex]; -+ node.column() = splitColumns[bestSplitIndex]; -+ -+ // partition the range according to the best dimension -+ SortSamplesByDimensions > -+ sorter(features, node.column(), node.threshold()); -+ IndexIterator bestSplit = -+ std::partition(region.begin(), region.end(), sorter); -+ // Save the ranges of the child stack entries. -+ childRegions[0].setRange( region.begin() , bestSplit ); -+ childRegions[0].rule = region.rule; -+ childRegions[0].rule.push_back(std::make_pair(1, 1.0)); -+ childRegions[1].setRange( bestSplit , region.end() ); -+ childRegions[1].rule = region.rule; -+ childRegions[1].rule.push_back(std::make_pair(1, 1.0)); -+ -+ return i_ThresholdNode; -+ } -+}; -+ -+typedef ThresholdSplit > GiniSplit; -+typedef ThresholdSplit > EntropySplit; -+typedef ThresholdSplit, RegressionTag> RegressionSplit; -+ -+namespace rf -+{ -+ -+/** This namespace contains additional Splitfunctors. -+ * -+ * The Split functor classes are designed in a modular fashion because new split functors may -+ * share a lot of code with existing ones. -+ * -+ * ThresholdSplit implements the functionality needed for any split functor, that makes its -+ * decision via one dimensional axis-parallel cuts. The Template parameter defines how the split -+ * along one dimension is chosen. -+ * -+ * The BestGiniOfColumn class chooses a split that minimizes one of the Loss functions supplied -+ * (GiniCriterion for classification and LSQLoss for regression). Median chooses the Split in a -+ * kD tree fashion. -+ * -+ * -+ * Currently defined typedefs: -+ * \code -+ * typedef ThresholdSplit > GiniSplit; -+ * typedef ThresholdSplit, RegressionTag> RegressionSplit; -+ * typedef ThresholdSplit MedianSplit; -+ * \endcode -+ */ -+namespace split -+{ -+ -+/** This Functor chooses the median value of a column -+ */ -+class Median -+{ -+public: -+ -+ typedef GiniCriterion LineSearchLossTag; -+ ArrayVector class_weights_; -+ ArrayVector bestCurrentCounts[2]; -+ double min_gini_; -+ std::ptrdiff_t min_index_; -+ double min_threshold_; -+ ProblemSpec<> ext_param_; -+ -+ Median() -+ {} -+ -+ template -+ Median(ProblemSpec const & ext) -+ : -+ class_weights_(ext.class_weights_), -+ ext_param_(ext) -+ { -+ bestCurrentCounts[0].resize(ext.class_count_); -+ bestCurrentCounts[1].resize(ext.class_count_); -+ } -+ -+ template -+ void set_external_parameters(ProblemSpec const & ext) -+ { -+ class_weights_ = ext.class_weights_; -+ ext_param_ = ext; -+ bestCurrentCounts[0].resize(ext.class_count_); -+ bestCurrentCounts[1].resize(ext.class_count_); -+ } -+ -+ template< class DataSourceF_t, -+ class DataSource_t, -+ class I_Iter, -+ class Array> -+ void operator()(DataSourceF_t const & column, -+ DataSource_t const & labels, -+ I_Iter & begin, -+ I_Iter & end, -+ Array const & region_response) -+ { -+ std::sort(begin, end, -+ SortSamplesByDimensions(column, 0)); -+ typedef typename -+ LossTraits::type LineSearchLoss; -+ LineSearchLoss left(labels, ext_param_); -+ LineSearchLoss right(labels, ext_param_); -+ right.init(begin, end, region_response); -+ -+ min_gini_ = NumericTraits::max(); -+ min_index_ = floor(double(end - begin)/2.0); -+ min_threshold_ = column[*(begin + min_index_)]; -+ SortSamplesByDimensions -+ sorter(column, 0, min_threshold_); -+ I_Iter part = std::partition(begin, end, sorter); -+ DimensionNotEqual comp(column, 0); -+ if(part == begin) -+ { -+ part= std::adjacent_find(part, end, comp)+1; -+ -+ } -+ if(part >= end) -+ { -+ return; -+ } -+ else -+ { -+ min_threshold_ = column[*part]; -+ } -+ min_gini_ = right.decrement(begin, part) -+ + left.increment(begin , part); -+ -+ bestCurrentCounts[0] = left.response(); -+ bestCurrentCounts[1] = right.response(); -+ -+ min_index_ = part - begin; -+ } -+ -+ template -+ double loss_of_region(DataSource_t const & labels, -+ Iter & begin, -+ Iter & end, -+ Array const & region_response) const -+ { -+ typedef typename -+ LossTraits::type LineSearchLoss; -+ LineSearchLoss region_loss(labels, ext_param_); -+ return -+ region_loss.init(begin, end, region_response); -+ } -+ -+}; -+ -+typedef ThresholdSplit MedianSplit; -+ -+ -+/** This Functor chooses a random value of a column -+ */ -+class RandomSplitOfColumn -+{ -+public: -+ -+ typedef GiniCriterion LineSearchLossTag; -+ ArrayVector class_weights_; -+ ArrayVector bestCurrentCounts[2]; -+ double min_gini_; -+ std::ptrdiff_t min_index_; -+ double min_threshold_; -+ ProblemSpec<> ext_param_; -+ typedef RandomMT19937 Random_t; -+ Random_t random; -+ -+ RandomSplitOfColumn() -+ {} -+ -+ template -+ RandomSplitOfColumn(ProblemSpec const & ext) -+ : -+ class_weights_(ext.class_weights_), -+ ext_param_(ext), -+ random(RandomSeed) -+ { -+ bestCurrentCounts[0].resize(ext.class_count_); -+ bestCurrentCounts[1].resize(ext.class_count_); -+ } -+ -+ template -+ RandomSplitOfColumn(ProblemSpec const & ext, Random_t & random_) -+ : -+ class_weights_(ext.class_weights_), -+ ext_param_(ext), -+ random(random_) -+ { -+ bestCurrentCounts[0].resize(ext.class_count_); -+ bestCurrentCounts[1].resize(ext.class_count_); -+ } -+ -+ template -+ void set_external_parameters(ProblemSpec const & ext) -+ { -+ class_weights_ = ext.class_weights_; -+ ext_param_ = ext; -+ bestCurrentCounts[0].resize(ext.class_count_); -+ bestCurrentCounts[1].resize(ext.class_count_); -+ } -+ -+ template< class DataSourceF_t, -+ class DataSource_t, -+ class I_Iter, -+ class Array> -+ void operator()(DataSourceF_t const & column, -+ DataSource_t const & labels, -+ I_Iter & begin, -+ I_Iter & end, -+ Array const & region_response) -+ { -+ std::sort(begin, end, -+ SortSamplesByDimensions(column, 0)); -+ typedef typename -+ LossTraits::type LineSearchLoss; -+ LineSearchLoss left(labels, ext_param_); -+ LineSearchLoss right(labels, ext_param_); -+ right.init(begin, end, region_response); -+ -+ -+ min_gini_ = NumericTraits::max(); -+ int tmp_pt = random.uniformInt(std::distance(begin, end)); -+ min_index_ = tmp_pt; -+ min_threshold_ = column[*(begin + min_index_)]; -+ SortSamplesByDimensions -+ sorter(column, 0, min_threshold_); -+ I_Iter part = std::partition(begin, end, sorter); -+ DimensionNotEqual comp(column, 0); -+ if(part == begin) -+ { -+ part= std::adjacent_find(part, end, comp)+1; -+ -+ } -+ if(part >= end) -+ { -+ return; -+ } -+ else -+ { -+ min_threshold_ = column[*part]; -+ } -+ min_gini_ = right.decrement(begin, part) -+ + left.increment(begin , part); -+ -+ bestCurrentCounts[0] = left.response(); -+ bestCurrentCounts[1] = right.response(); -+ -+ min_index_ = part - begin; -+ } -+ -+ template -+ double loss_of_region(DataSource_t const & labels, -+ Iter & begin, -+ Iter & end, -+ Array const & region_response) const -+ { -+ typedef typename -+ LossTraits::type LineSearchLoss; -+ LineSearchLoss region_loss(labels, ext_param_); -+ return -+ region_loss.init(begin, end, region_response); -+ } -+ -+}; -+ -+typedef ThresholdSplit RandomSplit; -+} -+} -+ -+ -+} //namespace vigra -+#endif // VIGRA_RANDOM_FOREST_SPLIT_HXX -diff -urNb vigra-Version-1-10-0/include/vigra/random_forest.hxx Vigra-src/include/vigra/random_forest.hxx +diff -urNb vigra-Version-1-10-0/include/vigra/random_forest.hxx Vigra/include/vigra/random_forest.hxx --- vigra-Version-1-10-0/include/vigra/random_forest.hxx 2013-11-18 17:48:16.000000000 +0100 -+++ Vigra-src/include/vigra/random_forest.hxx 2014-01-23 13:23:00.000000000 +0100 ++++ Vigra/include/vigra/random_forest.hxx 2015-03-03 10:13:57.693725000 +0100 @@ -584,6 +584,7 @@ { vigra_precondition(features.shape(0) == labels.shape(0), "RandomForest::predictLabels(): Label array has wrong size."); +#pragma omp parallel for for(int k=0; k currentRow(rowVector(features, row));