diff --git a/Modules/FiberDissection/MachineLearning/mitkStreamlineFeatureExtractor.cpp b/Modules/FiberDissection/MachineLearning/mitkStreamlineFeatureExtractor.cpp index c92d6c3..8cd76ba 100644 --- a/Modules/FiberDissection/MachineLearning/mitkStreamlineFeatureExtractor.cpp +++ b/Modules/FiberDissection/MachineLearning/mitkStreamlineFeatureExtractor.cpp @@ -1,920 +1,935 @@ /*=================================================================== The Medical Imaging Interaction Toolkit (MITK) Copyright (c) German Cancer Research Center. All rights reserved. This software is distributed WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See LICENSE.txt or http://www.mitk.org for details. ===================================================================*/ #include "mitkStreamlineFeatureExtractor.h" #define _USE_MATH_DEFINES #include #include #include #include namespace mitk{ StreamlineFeatureExtractor::StreamlineFeatureExtractor() : m_NumPoints(40) { } StreamlineFeatureExtractor::~StreamlineFeatureExtractor() { } void StreamlineFeatureExtractor::SetTractogramPrediction(const mitk::FiberBundle::Pointer &TractogramPrediction) { m_TractogramPrediction= TractogramPrediction; } void StreamlineFeatureExtractor::SetTractogramGroundtruth(const mitk::FiberBundle::Pointer &TractogramGroundtruth) { m_TractogramGroundtruth= TractogramGroundtruth; } void StreamlineFeatureExtractor::SetTractogramPlus(const mitk::FiberBundle::Pointer &TractogramPlus) { m_TractogramPlus = TractogramPlus; } void StreamlineFeatureExtractor::SetTractogramMinus(const mitk::FiberBundle::Pointer &TractogramMinus) { m_TractogramMinus = TractogramMinus; } void StreamlineFeatureExtractor::SetTractogramPrototypes(const mitk::FiberBundle::Pointer &TractogramPrototypes, bool standard) { if (standard) { MITK_INFO << "Use Standard Prototypes..."; m_inputPrototypes = mitk::IOUtil::Load("/home/r948e/E132-Projekte/Projects/2022_Peretzke_Interactive_Fiber_Dissection/mitk_diff/prototypes_599671_40.trk"); } else { MITK_INFO << "Use individual Prototypes..."; m_inputPrototypes = TractogramPrototypes; } } void StreamlineFeatureExtractor::SetActiveCycle(int &activeCycle) { m_activeCycle= activeCycle; } void StreamlineFeatureExtractor::SetTractogramTest(const mitk::FiberBundle::Pointer &TractogramTest, std::string TractogramTestName) { MITK_INFO << TractogramTestName; m_TractogramTest= TractogramTest; } std::vector > StreamlineFeatureExtractor::ResampleFibers(mitk::FiberBundle::Pointer tractogram) { MITK_INFO << "Infunction"; // mitk::FiberBundle::Pointer temp_fib = tractogram->GetDeepCopy(); // temp_fib->ResampleToNumPoints(m_NumPoints); MITK_INFO << "Resampling Done"; std::vector< vnl_matrix > out_fib(tractogram->GetFiberPolyData()->GetNumberOfCells()); //#pragma omp parallel for for (int i=0; iGetFiberPolyData()->GetNumberOfCells(); i++) { vtkCell* cell = tractogram->GetFiberPolyData()->GetCell(i); int numPoints = cell->GetNumberOfPoints(); vtkPoints* points = cell->GetPoints(); vnl_matrix streamline; streamline.set_size(3, m_NumPoints); streamline.fill(0.0); for (int j=0; jGetPoint(j, cand); vnl_vector_fixed< float, 3 > candV; candV[0]=cand[0]; candV[1]=cand[1]; candV[2]=cand[2]; streamline.set_column(j, candV); } out_fib.at(i)=streamline; } return out_fib; } std::vector > StreamlineFeatureExtractor::CalculateDmdf(std::vector > tractogram, std::vector > prototypes) { +// Initialize a vector to store distance matrices for each tractogram item +std::vector> dist_vec(tractogram.size()); - std::vector< vnl_matrix > dist_vec(tractogram.size());// - MITK_INFO << "Start Calculating Dmdf"; - +// Loop over each tractogram item #pragma omp parallel for - for (unsigned int i=0; i distances(1, 2 * prototypes.size(), 0.0); + + // Loop over each prototype + for (std::size_t j = 0; j < prototypes.size(); ++j) { - vnl_matrix distances; - distances.set_size(1, prototypes.size()); - distances.fill(0.0); - for (unsigned int j=0; j single_distances(1, tractogram[0].cols(), 0.0); + vnl_matrix single_distances_flip(1, tractogram[0].cols(), 0.0); + vnl_matrix single_end_distances(1, 2, 0.0); + vnl_matrix single_end_distances_flip(1, 2, 0.0); + + // Loop over each point in the current tractogram item + for (std::size_t ik = 0; ik < tractogram[0].cols(); ++ik) { - vnl_matrix single_distances; - single_distances.set_size(1, tractogram.at(0).cols()); - single_distances.fill(0.0); - vnl_matrix single_distances_flip; - single_distances_flip.set_size(1, tractogram.at(0).cols()); - single_distances_flip.fill(0.0); - for (unsigned int ik=0; ik single_distances.mean()) + else if (ik == tractogram[0].cols() - 1) { - distances.put(0,j, single_distances.mean()); + single_end_distances(0, 1) = cur_dist; + single_end_distances_flip(0, 1) = cur_dist_flip; } - else { - distances.put(0,j, single_distances_flip.mean()); - } - } - dist_vec.at(i) = distances; - } + } + + // Calculate the mean distance between the current tractogram item and the current prototype + const double mean_single_distances = single_distances.mean(); + const double mean_single_distances_flip = single_distances_flip.mean(); + distances(0, j) = mean_single_distances_flip > mean_single_distances ? mean_single_distances : mean_single_distances_flip; + + // // Calculate the mean END distance between the current tractogram item and the current prototype + const double mean_single_end_distances = single_end_distances.mean(); + const double mean_single_end_distances_flip = single_end_distances_flip.mean(); + const std::size_t end_distances_index = prototypes.size() + j; + distances(0, end_distances_index) = mean_single_end_distances_flip > mean_single_end_distances ? mean_single_distances : mean_single_end_distances_flip; + } + + dist_vec[i] = distances; + } - MITK_INFO << "Done Calculation"; - MITK_INFO << dist_vec.at(0).size(); + MITK_INFO << "The size of the distances is " < > StreamlineFeatureExtractor::MergeTractogram(std::vector > prototypes, std::vector > positive_local_prototypes, std::vector > negative_local_prototypes) { unsigned int pos_locals; unsigned int neg_locals; if (positive_local_prototypes.size() >= 50) { pos_locals= 50; } else { pos_locals= positive_local_prototypes.size(); } if (negative_local_prototypes.size() >= 50) { neg_locals = 50; } else { neg_locals= negative_local_prototypes.size(); } std::vector< vnl_matrix > merged_prototypes; for (unsigned int k=0; k StreamlineFeatureExtractor::Sort(std::vector sortingVector, int lengths, int start) { std::vector index; std::priority_queue> q; for (unsigned int i = 0; i < sortingVector.size(); ++i) { q.push(std::pair(sortingVector[i], i)); } for (int i = 0; i < lengths; ++i) { int ki = q.top().second; if (i>=start) { index.push_back(ki); } q.pop(); } return index; } std::vector> StreamlineFeatureExtractor::GetData() { MITK_INFO << "Start Function Get Data"; /*Vector which saves Prediction and Fibers to label based on uncertainty*/ std::vector> index_vec; cv::Mat data; cv::Mat labels_arr_vec; int size_plus = 0; /*Create Trainingdata: Go through positive and negative Bundle and save distances as cv::Mat and create vector with labels*/ for ( unsigned int i=0; i seeds; for (int cont = 0; cont < labels_arr_vec.rows; cont++) { seeds.push_back(cont); } cv::randShuffle(seeds); cv::Mat labels_shuffled; cv::Mat samples_shuffled; for (int cont = 0; cont < labels_arr_vec.rows; cont++) { labels_shuffled.push_back(labels_arr_vec.row(seeds[cont])); } for (int cont = 0; cont < labels_arr_vec.rows; cont++) { samples_shuffled.push_back(data.row(seeds[cont])); } /*Create Dataset and initialize Classifier*/ cv::Ptr m_traindata = cv::ml::TrainData::create(samples_shuffled, cv::ml::ROW_SAMPLE, labels_shuffled); statistic_model= cv::ml::RTrees::create(); auto criteria = cv::TermCriteria(); criteria.type = cv::TermCriteria::MAX_ITER; // criteria.epsilon = 1e-8; criteria.maxCount = 300; statistic_model->setMaxDepth(10); //set to three // statistic_model->setMinSampleCount(m_traindata->getNTrainSamples()*0.01); statistic_model->setMinSampleCount(2); statistic_model->setTruncatePrunedTree(false); statistic_model->setUse1SERule(false); statistic_model->setUseSurrogates(false); statistic_model->setTermCriteria(criteria); statistic_model->setCVFolds(1); statistic_model->setPriors(newweight); /*Train Classifier*/ MITK_INFO << "Start Training"; statistic_model->train(m_traindata); /*Predict on Test Data*/ MITK_INFO << "Predicting"; /*Create Dataset as cv::Mat*/ cv::Mat dataTest; for ( unsigned int i=0; i indexPrediction; std::vector e(m_DistancesTest.size()); std::vector pred(m_DistancesTest.size()); /*For every Sample/Streamline get Prediction and entropy (=based on counts of Random Forest)*/ MITK_INFO << "Predicting on all cores"; #pragma omp parallel for for (unsigned int i=0; ipredict(dataTest.row(i)); pred.at(i)=val; #pragma omp critical if (val==1) { indexPrediction.push_back(i); } cv::Mat vote; statistic_model->getVotes(dataTest.row(i), vote, 0); e.at(i) = ( -(vote.at(1,0)*1.0)/ (vote.at(1,0)+vote.at(1,1)) * log2((vote.at(1,0)*1.0)/ (vote.at(1,0)+vote.at(1,1))) - (vote.at(1,1)*1.0)/ (vote.at(1,0)+vote.at(1,1))* log2((vote.at(1,1)*1.0)/ (vote.at(1,0)+vote.at(1,1)))); if (isnan(e.at(i))) { e.at(i)=0; } } MITK_INFO << "Done"; MITK_INFO << "--------------"; MITK_INFO << "Prediction vector size:"; MITK_INFO << indexPrediction.size(); MITK_INFO << "Entropy vector size:"; entropy_vector = e; MITK_INFO << e.size(); MITK_INFO << "--------------"; /*Get index of most unertain data (lengths defines how many data is saved)*/ // int lengths=500; int lengths = std::count_if(e.begin(), e.end(),[&](auto const& val){ return val >= 0.95; }); if (lengths>1500) { lengths=1500; } int lengthsCertain = std::count_if(e.begin(), e.end(),[&](auto const& val){ return val < 0.1; }); std::vector indexUnc = Sort(e, lengths, 0); std::vector indexCertain = Sort(e, e.size() , e.size()-lengthsCertain ); // std::vector indexCertainBetween = Sort(e, e.size()-lengthsCertain , lengths); MITK_INFO << "Index Certainty Vector size"; MITK_INFO << indexCertain.size(); std::vector indexCertainNeg; std::vector indexCertainPos; for (unsigned int i=0; i=0; --i) // std::vector indexCertainBetweenNeg; // std::vector indexCertainBetweenPos; // for (unsigned int i=0; i distances_matrix; // distances_matrix.set_size(lengths, lengths); // distances_matrix.fill(0.0); // std::vector distances_matrix_mean; // for (int i=0; i diff = m_DistancesTest.at(indexUnc.at(i)) - m_DistancesTest.at(indexUnc.at(k)); // /*Into the eucledean difference matrix, put the distance in Feature Space between every sample pair*/ // distances_matrix.put(i,k,diff.absolute_value_sum()/m_DistancesTest.at(0).size()); // } // /*For every Sample/Streamline get the mean eucledean distance to all other Samples => one value for every Sample*/ //// distances_matrix_mean.push_back(distances_matrix.get_row(i).mean()); //// MITK_INFO << meanval.at(i); // } // /*Index to find values in distancematrix*/ // std::vector myidx; // /*Index to find actual streamlines using indexUnc*/ // std::vector indexUncDist; // /*Start with the Streamline of the highest entropy, which is in distance_matrix at idx 0*/ // myidx.push_back(0); // indexUncDist.push_back(indexUnc.at(myidx.at(0))); // /*Vecotr that stores minvalues of current iteration*/ // vnl_matrix cur_vec; // cur_vec.set_size(1,lengths); // cur_vec.fill(0.0); // for (int i=0; i sum_matrix; // sum_matrix.set_size(myidx.size(), lengths); // sum_matrix.fill(0); // for (unsigned int ii=0; ii> StreamlineFeatureExtractor::GetDistanceData(float &value) { /*Vector which saves Fibers to be labeled based on fft subset uncertainty*/ std::vector> index_vec; /*Get index of most unertain data (lengths defines how many data is saved)*/ // int lengths=500; MITK_INFO << entropy_vector.size(); int lengths = std::count_if(entropy_vector.begin(), entropy_vector.end(),[&](auto const& val){ return val >= value; }); if (lengths>1500) { lengths=1500; } MITK_INFO << lengths; /*Maybe shuffling of length so not the most uncertain values are chosen*/ std::vector indexUnc = Sort(entropy_vector, lengths, 0); vnl_matrix distances_matrix; distances_matrix.set_size(lengths, lengths); distances_matrix.fill(0.0); std::vector distances_matrix_mean; for (int i=0; i diff = m_DistancesTest.at(indexUnc.at(i)) - m_DistancesTest.at(indexUnc.at(k)); /*Into the eucledean difference matrix, put the distance in Feature Space between every sample pair*/ distances_matrix.put(i,k,diff.absolute_value_sum()/m_DistancesTest.at(0).size()); } /*For every Sample/Streamline get the mean eucledean distance to all other Samples => one value for every Sample*/ // distances_matrix_mean.push_back(distances_matrix.get_row(i).mean()); // MITK_INFO << meanval.at(i); } MITK_INFO << "Distance Matrix Calculated"; /*Index to find values in distancematrix*/ std::vector myidx; /*Index to find actual streamlines using indexUnc*/ std::vector indexUncDist; /*Start with the Streamline of the highest entropy, which is in distance_matrix at idx 0*/ myidx.push_back(0); indexUncDist.push_back(indexUnc.at(myidx.at(0))); /*Vecotr that stores minvalues of current iteration*/ vnl_matrix cur_vec; cur_vec.set_size(1,lengths); cur_vec.fill(0.0); for (int i=0; i sum_matrix; sum_matrix.set_size(myidx.size(), lengths); sum_matrix.fill(0); for (unsigned int ii=0; ii &index, bool removefrompool) { mitk::FiberBundle::Pointer Prediction; MITK_INFO << "Create Bundle"; vtkSmartPointer FibersData; FibersData = vtkSmartPointer::New(); FibersData->SetPoints(vtkSmartPointer::New()); FibersData->SetLines(vtkSmartPointer::New()); vtkSmartPointer vNewPolyData = vtkSmartPointer::New(); vtkSmartPointer vNewLines = vtkSmartPointer::New(); vtkSmartPointer vNewPoints = vtkSmartPointer::New(); // vtkSmartPointer weights = vtkSmartPointer::New(); // weights->SetNumberOfValues(this->GetNumFibers()+fib->GetNumFibers()); unsigned int indexSize = index.size(); unsigned int counter = 0; MITK_INFO << "Start Loop"; for (unsigned int i=0; iGetFiberPolyData()->GetCell(index[i]); auto numPoints = cell->GetNumberOfPoints(); vtkPoints* points = cell->GetPoints(); vtkSmartPointer container = vtkSmartPointer::New(); for (unsigned int j=0; jGetPoint(j, p); vtkIdType id = vNewPoints->InsertNextPoint(p); container->GetPointIds()->InsertNextId(id); } // weights->InsertValue(counter, fib->GetFiberWeight(i)); vNewLines->InsertNextCell(container); counter++; } if (removefrompool) { for (unsigned int i=0; iGetFiberPolyData()->DeleteCell(index[i]); } } MITK_INFO << "Counter"; MITK_INFO << counter; vNewPolyData->SetLines(vNewLines); vNewPolyData->SetPoints(vNewPoints); FibersData = vtkSmartPointer::New(); FibersData->SetPoints(vtkSmartPointer::New()); FibersData->SetLines(vtkSmartPointer::New()); FibersData->SetPoints(vNewPoints); FibersData->SetLines(vNewLines); Prediction = mitk::FiberBundle::New(vNewPolyData); // Bundle->SetFiberColors(255, 255, 255); MITK_INFO << "Cells Prediciton"; MITK_INFO << Prediction->GetFiberPolyData()->GetNumberOfCells(); MITK_INFO << "Cells Tractorgram"; MITK_INFO << m_TractogramTest->GetFiberPolyData()->GetNumberOfCells(); return Prediction; } std::vector StreamlineFeatureExtractor::CreateLabels(std::vector > Testdata, std::vector > Prediction) { // vnl_vector labels; // vnl_vector.set_size(Testdata.size()); // vnl_vector.fill(0); std::vector labels(Testdata.size(), 0); #pragma omp parallel for for (unsigned int i=0; i > T_Prototypes; std::vector > T_TractogramPlus; std::vector > T_TractogramMinus; std::vector > T_TractogramTest; std::vector > T_mergedPrototypes; MITK_INFO << "Resample Input Prototypes"; T_Prototypes = ResampleFibers(m_inputPrototypes); MITK_INFO << "Resample Input Tractogram Minus"; T_TractogramMinus= ResampleFibers(m_TractogramMinus); MITK_INFO << "Resample Input Tractogram Plus"; T_TractogramPlus= ResampleFibers(m_TractogramPlus); /* Merge T_Prototypes, T_TractogramMinus and T_TractogramPlus for extra Features*/ MITK_INFO << "Merging Prototypes"; T_mergedPrototypes = MergeTractogram(T_Prototypes, T_TractogramPlus, T_TractogramMinus); MITK_INFO << "Calculate Features"; MITK_INFO << "Calculate Minus Features"; m_DistancesMinus = CalculateDmdf(T_TractogramMinus, T_mergedPrototypes); MITK_INFO << "Calculate Plus Features"; m_DistancesPlus = CalculateDmdf(T_TractogramPlus, T_mergedPrototypes); MITK_INFO << "Resample Test Data"; T_TractogramTest= ResampleFibers(m_TractogramTest); MITK_INFO << "Calculate Features of Test Data"; m_DistancesTest= CalculateDmdf(T_TractogramTest, T_mergedPrototypes); MITK_INFO << "Done with Datacreation"; m_index =GetData(); } vnl_vector StreamlineFeatureExtractor::ValidationPipe() { std::vector > T_Prototypes; std::vector > T_TractogramPrediction; std::vector > T_TractogramGroundtruth; std::vector > T_TractogramTest; std::vector > DistancesPrediction; std::vector > DistancesGroundtruth; std::vector > DistancesTest; std::vector LabelsPrediction; std::vector LabelsGroundtruth; MITK_INFO << "Start Resampling"; T_Prototypes = ResampleFibers(m_inputPrototypes); T_TractogramPrediction= ResampleFibers(m_TractogramPrediction); T_TractogramGroundtruth= ResampleFibers(m_TractogramGroundtruth); T_TractogramTest= ResampleFibers(m_TractogramTest); MITK_INFO << "Calculate Features"; DistancesPrediction = CalculateDmdf(T_TractogramPrediction, T_Prototypes); DistancesGroundtruth = CalculateDmdf(T_TractogramGroundtruth, T_Prototypes); DistancesTest = CalculateDmdf(T_TractogramTest, T_Prototypes); LabelsGroundtruth = CreateLabels(DistancesTest, DistancesGroundtruth); LabelsPrediction = CreateLabels(DistancesTest, DistancesPrediction); float FP = 0.0; float FN = 0.0; float TP = 0.0; float TN = 0.0; std::vector indexfp; //#pragma omp parallel for for (unsigned int i=0; i metrics(7); metrics.put(0, TP); metrics.put(1, FP); metrics.put(2, TN); metrics.put(3, FN); metrics.put(4, Precision); metrics.put(5, Recall); metrics.put(6, F1_score); return metrics; } }