diff --git a/Modules/FiberDissection/MachineLearning/mitkStreamlineFeatureExtractor.cpp b/Modules/FiberDissection/MachineLearning/mitkStreamlineFeatureExtractor.cpp index 596c843..9e61d68 100644 --- a/Modules/FiberDissection/MachineLearning/mitkStreamlineFeatureExtractor.cpp +++ b/Modules/FiberDissection/MachineLearning/mitkStreamlineFeatureExtractor.cpp @@ -1,922 +1,922 @@ /*=================================================================== The Medical Imaging Interaction Toolkit (MITK) Copyright (c) German Cancer Research Center. All rights reserved. This software is distributed WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See LICENSE.txt or http://www.mitk.org for details. ===================================================================*/ #include "mitkStreamlineFeatureExtractor.h" #define _USE_MATH_DEFINES #include #include #include #include namespace mitk{ StreamlineFeatureExtractor::StreamlineFeatureExtractor() : m_NumPoints(40) { } StreamlineFeatureExtractor::~StreamlineFeatureExtractor() { } void StreamlineFeatureExtractor::SetTractogramPrediction(const mitk::FiberBundle::Pointer &TractogramPrediction) { m_TractogramPrediction= TractogramPrediction; } void StreamlineFeatureExtractor::SetTractogramGroundtruth(const mitk::FiberBundle::Pointer &TractogramGroundtruth) { m_TractogramGroundtruth= TractogramGroundtruth; } void StreamlineFeatureExtractor::SetTractogramPlus(const mitk::FiberBundle::Pointer &TractogramPlus) { m_TractogramPlus = TractogramPlus; } void StreamlineFeatureExtractor::SetTractogramMinus(const mitk::FiberBundle::Pointer &TractogramMinus) { m_TractogramMinus = TractogramMinus; } void StreamlineFeatureExtractor::SetTractogramPrototypes(const mitk::FiberBundle::Pointer &TractogramPrototypes, bool standard) { if (standard) { MITK_INFO << "Use Standard Prototypes..."; m_inputPrototypes = mitk::IOUtil::Load("/home/r948e/E132-Projekte/Projects/2022_Peretzke_Interactive_Fiber_Dissection/mitk_diff/prototypes_599671_40.trk"); } else { MITK_INFO << "Use individual Prototypes..."; m_inputPrototypes = TractogramPrototypes; } } void StreamlineFeatureExtractor::SetActiveCycle(int &activeCycle) { m_activeCycle= activeCycle; } void StreamlineFeatureExtractor::SetInitRandom(int &initRandom) { m_initRandom= initRandom; } void StreamlineFeatureExtractor::SetTractogramTest(const mitk::FiberBundle::Pointer &TractogramTest, std::string TractogramTestName) { std::string path = "/home/r948e/E132-Projekte/Projects/2022_Peretzke_Interactive_Fiber_Dissection/mitk_diff/storage/"; path.append(TractogramTestName); m_TractogramTest= TractogramTest; auto s = std::to_string(m_NumPoints); m_DistancesTestName= path.append("_distances" + std::to_string(m_NumPoints) + "_" + std::to_string(m_activeCycle) + ".csv"); } std::vector > StreamlineFeatureExtractor::ResampleFibers(mitk::FiberBundle::Pointer tractogram) { MITK_INFO << "Infunction"; mitk::FiberBundle::Pointer temp_fib = tractogram->GetDeepCopy(); temp_fib->ResampleToNumPoints(m_NumPoints); MITK_INFO << "Resampling Done"; std::vector< vnl_matrix > out_fib(temp_fib->GetFiberPolyData()->GetNumberOfCells()); for (int i=0; iGetFiberPolyData()->GetNumberOfCells(); i++) { vtkCell* cell = temp_fib->GetFiberPolyData()->GetCell(i); int numPoints = cell->GetNumberOfPoints(); vtkPoints* points = cell->GetPoints(); vnl_matrix streamline; streamline.set_size(3, m_NumPoints); streamline.fill(0.0); for (int j=0; jGetPoint(j, cand); vnl_vector_fixed< float, 3 > candV; candV[0]=cand[0]; candV[1]=cand[1]; candV[2]=cand[2]; streamline.set_column(j, candV); } // out_fib.push_back(streamline); out_fib.at(i)=streamline; } // }); return out_fib; } std::vector > StreamlineFeatureExtractor::CalculateDmdf(std::vector > tractogram, std::vector > prototypes) { std::vector< vnl_matrix > dist_vec(tractogram.size());// MITK_INFO << "Start Calculating Dmdf"; cv::parallel_for_(cv::Range(0, tractogram.size()), [&](const cv::Range &range) { for (int i = range.start; i < range.end; i++) // for (unsigned int i=0; i distances; distances.set_size(1, prototypes.size()); distances.fill(0.0); for (unsigned int j=0; j single_distances; single_distances.set_size(1, tractogram.at(0).cols()); single_distances.fill(0.0); vnl_matrix single_distances_flip; single_distances_flip.set_size(1, tractogram.at(0).cols()); single_distances_flip.fill(0.0); for (unsigned int ik=0; ik single_distances.mean()) { distances.put(0,j, single_distances.mean()); } else { distances.put(0,j, single_distances_flip.mean()); } } // dist_vec.push_back(distances); dist_vec.at(i) = distances; } }); MITK_INFO << "Done Calculation"; MITK_INFO << dist_vec.at(0).size(); return dist_vec; } std::vector > StreamlineFeatureExtractor::MergeTractogram(std::vector > prototypes, std::vector > positive_local_prototypes, std::vector > negative_local_prototypes) { unsigned int pos_locals; unsigned int neg_locals; if (positive_local_prototypes.size() >= 50) { pos_locals= 50; } else { pos_locals= positive_local_prototypes.size(); } if (pos_locals <= negative_local_prototypes.size()) { neg_locals = pos_locals; } else { neg_locals= negative_local_prototypes.size(); } std::vector< vnl_matrix > merged_prototypes; for (unsigned int k=0; k StreamlineFeatureExtractor::Sort(std::vector sortingVector, int lengths) { std::vector index; std::priority_queue> q; for (unsigned int i = 0; i < sortingVector.size(); ++i) { q.push(std::pair(sortingVector[i], i)); } for (int i = 0; i < lengths; ++i) { int ki = q.top().second; index.push_back(ki); q.pop(); } return index; } std::vector> StreamlineFeatureExtractor::GetData() { MITK_INFO << "Start Function Get Data"; /*Vector which saves Prediction and Fibers to label based on uncertainty*/ std::vector> index_vec; // int labels_arr [m_DistancesPlus.size()+m_DistancesMinus.size()]; cv::Mat data; cv::Mat labels_arr_vec; int size_plus = 0; /*Create Trainingdata: Go through positive and negative Bundle and save distances as cv::Mat and create vector with labels*/ for ( unsigned int i=0; i seeds; for (int cont = 0; cont < labels_arr_vec.rows; cont++) { seeds.push_back(cont); } cv::randShuffle(seeds); cv::Mat labels_shuffled; cv::Mat samples_shuffled; for (int cont = 0; cont < labels_arr_vec.rows; cont++) { labels_shuffled.push_back(labels_arr_vec.row(seeds[cont])); } for (int cont = 0; cont < labels_arr_vec.rows; cont++) { samples_shuffled.push_back(data.row(seeds[cont])); } std::ofstream labelsfile; labelsfile.open("/home/r948e/E132-Projekte/Projects/2022_Peretzke_Interactive_Fiber_Dissection/mitk_diff/storage/Labels_" + std::to_string(m_activeCycle) + ".csv"); labelsfile<< cv::format(labels_shuffled, cv::Formatter::FMT_CSV) << std::endl; labelsfile.close(); std::ofstream featuresfile; featuresfile.open("/home/r948e/E132-Projekte/Projects/2022_Peretzke_Interactive_Fiber_Dissection/mitk_diff/storage/Features_" + std::to_string(m_activeCycle) + ".csv"); featuresfile<< cv::format(samples_shuffled, cv::Formatter::FMT_CSV) << std::endl; featuresfile.close(); /*Create Dataset and initialize Classifier*/ cv::Ptr m_traindata = cv::ml::TrainData::create(samples_shuffled, cv::ml::ROW_SAMPLE, labels_shuffled); auto statistic_model = cv::ml::RTrees::create(); auto criteria = cv::TermCriteria(); criteria.type = cv::TermCriteria::MAX_ITER; // criteria.epsilon = 1e-8; criteria.maxCount = 800; statistic_model->setMaxDepth(50); //set to three // statistic_model->setMinSampleCount(m_traindata->getNTrainSamples()*0.01); statistic_model->setMinSampleCount(2); statistic_model->setTruncatePrunedTree(false); statistic_model->setUse1SERule(false); statistic_model->setUseSurrogates(false); statistic_model->setTermCriteria(criteria); statistic_model->setCVFolds(1); statistic_model->setPriors(newweight); /*Train Classifier*/ MITK_INFO << "Start Training"; statistic_model->train(m_traindata); /*Predict on Test Data*/ MITK_INFO << "Predicting"; /*Create Dataset as cv::Mat*/ cv::Mat dataTest; for ( unsigned int i=0; i indexPrediction; std::vector e(m_DistancesTest.size()); std::vector pred(m_DistancesTest.size()); /*For every Sample/Streamline get Prediction and entropy (=based on counts of Random Forest)*/ MITK_INFO << "Predicting on all cores"; #pragma omp parallel for for (unsigned int i=0; ipredict(dataTest.row(i)); pred.at(i)=val; #pragma omp critical if (val==1) { indexPrediction.push_back(i); } cv::Mat vote; statistic_model->getVotes(dataTest.row(i), vote, 0); e.at(i) = ( -(vote.at(1,0)*1.0)/ (vote.at(1,0)+vote.at(1,1)) * log2((vote.at(1,0)*1.0)/ (vote.at(1,0)+vote.at(1,1))) - (vote.at(1,1)*1.0)/ (vote.at(1,0)+vote.at(1,1))* log2((vote.at(1,1)*1.0)/ (vote.at(1,0)+vote.at(1,1)))); if (isnan(e.at(i))) { e.at(i)=0; } } MITK_INFO << "Done"; /*Save entropy values for analysis*/ std::ofstream entropyfile; entropyfile.open("/home/r948e/mycsv/entropydata_" + std::to_string(m_activeCycle) + ".csv"); for (unsigned int i = 0; i < e.size(); i++) { entropyfile << e.at(i) << ' '; } entropyfile.close(); std::ofstream predictionfile; predictionfile.open("/home/r948e/mycsv/predictiondata_" + std::to_string(m_activeCycle) + ".csv"); for (unsigned int i = 0; i < pred.size(); i++) { predictionfile << pred.at(i) << ' '; } predictionfile.close(); MITK_INFO << "--------------"; MITK_INFO << "Prediction vector size:"; MITK_INFO << indexPrediction.size(); MITK_INFO << "Entropy vector size:"; MITK_INFO << e.size(); MITK_INFO << "--------------"; /*Get index of most unertain data (lengths defines how many data is saved)*/ // int lengths=500; int lengths = std::count_if(e.begin(), e.end(),[&](auto const& val){ return val >= 0.95; }); if (lengths<500) { lengths=500; } else if (lengths>2000) { lengths=2000; } std::vector indexUnc = Sort(e, lengths); MITK_INFO << indexUnc.size(); vnl_matrix distances_matrix; distances_matrix.set_size(lengths, lengths); distances_matrix.fill(0.0); std::vector distances_matrix_mean; for (int i=0; i diff = m_DistancesTest.at(indexUnc.at(i)) - m_DistancesTest.at(indexUnc.at(k)); /*Into the eucledean difference matrix, put the distance in Feature Space between every sample pair*/ distances_matrix.put(i,k,diff.absolute_value_sum()/m_DistancesTest.at(0).size()); } /*For every Sample/Streamline get the mean eucledean distance to all other Samples => one value for every Sample*/ distances_matrix_mean.push_back(distances_matrix.get_row(i).mean()); // MITK_INFO << meanval.at(i); } // vnl_vector sum_matrix; // sum_matrix.set_size(lengths); // sum_matrix.fill(0.0); // MITK_INFO << distances_matrix.get_row(myidx.at(i)+ sum_matrix.get_row(0) // for (int i=0; i sum_matrix; // sum_matrix.set_size(lengths, lengths); // sum_matrix.set_size(lengths, lengths); /*Index to find values in distancematrix*/ std::vector myidx; /*Index to find actual streamlines using indexUnc*/ std::vector indexUncDist; /*Start with the Streamline of the highest entropy, which is in distance_matrix at idx 0*/ myidx.push_back(0); indexUncDist.push_back(indexUnc.at(myidx.at(0))); /*Vecotr that stores minvalues of current iteration*/ vnl_matrix cur_vec; cur_vec.set_size(1,lengths); cur_vec.fill(0.0); for (int i=0; i sum_matrix; sum_matrix.set_size(myidx.size(), lengths); sum_matrix.fill(0); for (unsigned int ii=0; ii &index) { mitk::FiberBundle::Pointer Prediction; MITK_INFO << "Create Bundle"; vtkSmartPointer FibersData; FibersData = vtkSmartPointer::New(); FibersData->SetPoints(vtkSmartPointer::New()); FibersData->SetLines(vtkSmartPointer::New()); vtkSmartPointer vNewPolyData = vtkSmartPointer::New(); vtkSmartPointer vNewLines = vtkSmartPointer::New(); vtkSmartPointer vNewPoints = vtkSmartPointer::New(); // vtkSmartPointer weights = vtkSmartPointer::New(); // weights->SetNumberOfValues(this->GetNumFibers()+fib->GetNumFibers()); unsigned int indexSize = index.size(); unsigned int counter = 0; MITK_INFO << "Start Loop"; for (unsigned int i=0; iGetFiberPolyData()->GetCell(index[i]); auto numPoints = cell->GetNumberOfPoints(); vtkPoints* points = cell->GetPoints(); vtkSmartPointer container = vtkSmartPointer::New(); for (unsigned int j=0; jGetPoint(j, p); vtkIdType id = vNewPoints->InsertNextPoint(p); container->GetPointIds()->InsertNextId(id); } // weights->InsertValue(counter, fib->GetFiberWeight(i)); vNewLines->InsertNextCell(container); counter++; } MITK_INFO << "Counter"; MITK_INFO << counter; vNewPolyData->SetLines(vNewLines); vNewPolyData->SetPoints(vNewPoints); FibersData = vtkSmartPointer::New(); FibersData->SetPoints(vtkSmartPointer::New()); FibersData->SetLines(vtkSmartPointer::New()); FibersData->SetPoints(vNewPoints); FibersData->SetLines(vNewLines); Prediction = mitk::FiberBundle::New(vNewPolyData); // Bundle->SetFiberColors(255, 255, 255); MITK_INFO << "Cells Prediciton"; MITK_INFO << Prediction->GetFiberPolyData()->GetNumberOfCells(); MITK_INFO << "Cells Tractorgram"; MITK_INFO << m_TractogramTest->GetFiberPolyData()->GetNumberOfCells(); return Prediction; } std::vector StreamlineFeatureExtractor::CreateLabels(std::vector > Testdata, std::vector > Prediction) { // vnl_vector labels; // vnl_vector.set_size(Testdata.size()); // vnl_vector.fill(0); std::vector labels(Testdata.size(), 0); #pragma omp parallel for for (unsigned int i=0; i("/home/r948e/E132-Projekte/Projects/2022_Peretzke_Interactive_Fiber_Dissection/data/Synt_tract_40_prototypes.trk"); std::vector > T_Prototypes; std::vector > T_TractogramPlus; std::vector > T_TractogramMinus; std::vector > T_TractogramTest; std::vector > T_mergedPrototypes; T_Prototypes = ResampleFibers(m_inputPrototypes); T_TractogramMinus= ResampleFibers(m_TractogramMinus); T_TractogramPlus= ResampleFibers(m_TractogramPlus); /* Merge T_Prototypes, T_TractogramMinus and T_TractogramPlus for extra Features*/ T_mergedPrototypes = MergeTractogram(T_Prototypes, T_TractogramPlus, T_TractogramMinus); MITK_INFO << "Calculate Features"; m_DistancesMinus = CalculateDmdf(T_TractogramMinus, T_Prototypes); m_DistancesPlus = CalculateDmdf(T_TractogramPlus, T_Prototypes); MITK_INFO << m_DistancesTestName; MITK_INFO << "Resample Test Data"; T_TractogramTest= ResampleFibers(m_TractogramTest); MITK_INFO << "Calculate Features of Test Data"; - m_DistancesTest= CalculateDmdf(T_TractogramTest, T_mergedPrototypes); + m_DistancesTest= CalculateDmdf(T_TractogramTest, T_Prototypes); std::ofstream myFile(m_DistancesTestName); // myFile << colname << "\n"; for(long unsigned int i = 0; i < m_DistancesTest.size(); ++i) { myFile << m_DistancesTest.at(i); } myFile.close(); std::ifstream f(m_DistancesTestName); // if (f.good() && m_activeCycle!=0) // { // MITK_INFO << "Loading Features of Tractogram"; // m_DistancesTest.clear(); // std::ifstream myFile(m_DistancesTestName); // if(!myFile.is_open()) throw std::runtime_error("Could not open file"); // std::string line; // vnl_matrix curline; // curline.set_size(1, m_DistancesPlus.at(0).cols()); // curline.fill(0.0); // float val; // while(std::getline(myFile, line)) // { // // Create a stringstream of the current line // std::stringstream ss(line); //// MITK_INFO << ss; // // Keep track of the current column index // int colIdx = 0; // // Extract each integer // while(ss >> val){ //// // Add the current integer to the 'colIdx' column's values vector // curline.put(0,colIdx, val); //// // If the next token is a comma, ignore it and move on //// if(ss.peek() == ',') ss.ignore(); //// // Increment the column index // colIdx++; // } // m_DistancesTest.push_back(curline); // } // // Close file // myFile.close(); // } // else // { // MITK_INFO << m_DistancesTestName; // MITK_INFO << "Resample Test Data"; // T_TractogramTest= ResampleFibers(m_TractogramTest); // MITK_INFO << "Calculate Features of Test Data"; // m_DistancesTest= CalculateDmdf(T_TractogramTest, T_mergedPrototypes); // std::ofstream myFile(m_DistancesTestName); // // myFile << colname << "\n"; // for(long unsigned int i = 0; i < m_DistancesTest.size(); ++i) // { // myFile << m_DistancesTest.at(i); // } // myFile.close(); // } // MITK_INFO << m_DistancesTest.size(); MITK_INFO << "Sizes of Plus and Minus"; MITK_INFO << m_DistancesPlus.size() + m_DistancesMinus.size(); MITK_INFO << "Sizes of Prototypes"; MITK_INFO << T_mergedPrototypes.size(); MITK_INFO << T_mergedPrototypes.at(0).rows(); MITK_INFO << "Size of Test Data"; MITK_INFO << m_DistancesTest.size(); MITK_INFO << "Done with Datacreation"; m_index =GetData(); } vnl_vector StreamlineFeatureExtractor::ValidationPipe() { std::vector > T_Prototypes; std::vector > T_TractogramPrediction; std::vector > T_TractogramGroundtruth; std::vector > T_TractogramTest; std::vector > DistancesPrediction; std::vector > DistancesGroundtruth; std::vector > DistancesTest; std::vector LabelsPrediction; std::vector LabelsGroundtruth; MITK_INFO << "Start Resampling"; T_Prototypes = ResampleFibers(m_inputPrototypes); T_TractogramPrediction= ResampleFibers(m_TractogramPrediction); T_TractogramGroundtruth= ResampleFibers(m_TractogramGroundtruth); T_TractogramTest= ResampleFibers(m_TractogramTest); MITK_INFO << "Calculate Features"; DistancesPrediction = CalculateDmdf(T_TractogramPrediction, T_Prototypes); DistancesGroundtruth = CalculateDmdf(T_TractogramGroundtruth, T_Prototypes); DistancesTest = CalculateDmdf(T_TractogramTest, T_Prototypes); LabelsGroundtruth = CreateLabels(DistancesTest, DistancesGroundtruth); LabelsPrediction = CreateLabels(DistancesTest, DistancesPrediction); std::ofstream LabelsPredictionFile; LabelsPredictionFile.open("/home/r948e/mycsv/predictionlabels_" + std::to_string(m_activeCycle) + ".csv"); for (unsigned int i = 0; i < LabelsPrediction.size(); i++) { LabelsPredictionFile << LabelsPrediction.at(i) << ' '; } LabelsPredictionFile.close(); std::ofstream LabelsGroundtruthFile; LabelsGroundtruthFile.open("/home/r948e/mycsv/groundtruthlabels_" + std::to_string(m_activeCycle) + ".csv"); for (unsigned int i = 0; i < LabelsGroundtruth.size(); i++) { LabelsGroundtruthFile << LabelsGroundtruth.at(i) << ' '; } LabelsGroundtruthFile.close(); float FP = 0.0; float FN = 0.0; float TP = 0.0; float TN = 0.0; //#pragma omp parallel for for (unsigned int i=0; i metrics(7); metrics.put(0, TP); metrics.put(1, FP); metrics.put(2, TN); metrics.put(3, FN); metrics.put(4, Precision); metrics.put(5, Recall); metrics.put(6, F1_score); return metrics; } }