diff --git a/Modules/OpenCL/mitkOclFilter.cpp b/Modules/OpenCL/mitkOclFilter.cpp index 98d8902987..a3fb6c9ebd 100644 --- a/Modules/OpenCL/mitkOclFilter.cpp +++ b/Modules/OpenCL/mitkOclFilter.cpp @@ -1,348 +1,355 @@ /*=================================================================== The Medical Imaging Interaction Toolkit (MITK) Copyright (c) German Cancer Research Center, Division of Medical and Biological Informatics. All rights reserved. This software is distributed WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See LICENSE.txt or http://www.mitk.org for details. ===================================================================*/ //Ocl #include "mitkOclFilter.h" #include "mitkOclUtils.h" #include "mitkOpenCLActivator.h" //Mitk #include #include //usService #include "usServiceReference.h" #include #include #include #include #include #include //standard library #include mitk::OclFilter::OclFilter() : m_ClCompilerFlags(""), m_ClProgram(nullptr), m_CommandQue(nullptr), m_FilterID("mitkOclFilter"), m_Preambel(" "), m_Initialized(false) { } mitk::OclFilter::OclFilter(const char* filename) : m_ClCompilerFlags(""), m_ClProgram(nullptr), m_CommandQue(nullptr), m_FilterID(filename), m_Preambel(" "), m_Initialized(false) { m_ClFiles.push_back(filename); } mitk::OclFilter::~OclFilter() { MITK_DEBUG << "OclFilter Destructor"; // release program if (m_ClProgram) { us::ServiceReference ref = GetModuleContext()->GetServiceReference(); OclResourceService* resources = GetModuleContext()->GetService(ref); // remove program from storage resources->RemoveProgram(m_FilterID); } } bool mitk::OclFilter::ExecuteKernel( cl_kernel kernel, unsigned int workSizeDim ) { cl_int clErr = 0; clErr = clEnqueueNDRangeKernel( this->m_CommandQue, kernel, workSizeDim, nullptr, this->m_GlobalWorkSize, m_LocalWorkSize, 0, nullptr, nullptr); CHECK_OCL_ERR( clErr ); return ( clErr == CL_SUCCESS ); } bool mitk::OclFilter::ExecuteKernelChunks( cl_kernel kernel, unsigned int workSizeDim, size_t* chunksDim ) { size_t offset[3] ={0, 0, 0}; cl_int clErr = 0; if(workSizeDim == 2) { for(offset[0] = 0; offset[0] < m_GlobalWorkSize[0]; offset[0] += chunksDim[0]) { for(offset[1] = 0; offset[1] < m_GlobalWorkSize[1]; offset[1] += chunksDim[1]) { clErr |= clEnqueueNDRangeKernel(this->m_CommandQue, kernel, workSizeDim, offset, chunksDim, m_LocalWorkSize, 0, nullptr, nullptr); } } } else if(workSizeDim == 3) { for(offset[0] = 0; offset[0] < m_GlobalWorkSize[0]; offset[0] += chunksDim[0]) { for(offset[1] = 0; offset[1] < m_GlobalWorkSize[1]; offset[1] += chunksDim[1]) { for(offset[2] = 0; offset[2] < m_GlobalWorkSize[2]; offset[2] += chunksDim[2]) { clErr |= clEnqueueNDRangeKernel( this->m_CommandQue, kernel, workSizeDim, offset, chunksDim, m_LocalWorkSize, 0, nullptr, nullptr); } } } } CHECK_OCL_ERR(clErr); return ( clErr == CL_SUCCESS ); } bool mitk::OclFilter::ExecuteKernelChunksInBatches(cl_kernel kernel, unsigned int workSizeDim, size_t* chunksDim, size_t batchSize, int waitTimems) { size_t offset[3] = { 0, 0, 0 }; cl_int clErr = 0; unsigned int currentChunk = 0; cl_event* waitFor = new cl_event[batchSize]; if (workSizeDim == 2) { for (offset[0] = 0; offset[0] < m_GlobalWorkSize[0]; offset[0] += chunksDim[0]) { for (offset[1] = 0; offset[1] < m_GlobalWorkSize[1]; offset[1] += chunksDim[1]) { if (currentChunk % batchSize == 0 && currentChunk != 0) { clWaitForEvents(batchSize, &waitFor[0]); std::this_thread::sleep_for(std::chrono::milliseconds(waitTimems)); clErr |= clEnqueueNDRangeKernel(this->m_CommandQue, kernel, workSizeDim, offset, chunksDim, m_LocalWorkSize, 0, nullptr, &waitFor[0]); } else { clErr |= clEnqueueNDRangeKernel(this->m_CommandQue, kernel, workSizeDim, offset, chunksDim, m_LocalWorkSize, 0, nullptr, &waitFor[currentChunk % batchSize]); } currentChunk++; } } } else if (workSizeDim == 3) { for (offset[0] = 0; offset[0] < m_GlobalWorkSize[0]; offset[0] += chunksDim[0]) { for (offset[1] = 0; offset[1] < m_GlobalWorkSize[1]; offset[1] += chunksDim[1]) { for (offset[2] = 0; offset[2] < m_GlobalWorkSize[2]; offset[2] += chunksDim[2]) { if (currentChunk % batchSize == 0 && currentChunk != 0) { clWaitForEvents(batchSize, &waitFor[0]); std::this_thread::sleep_for(std::chrono::milliseconds(waitTimems)); clErr |= clEnqueueNDRangeKernel(this->m_CommandQue, kernel, workSizeDim, offset, chunksDim, m_LocalWorkSize, 0, nullptr, &waitFor[0]); } else { clErr |= clEnqueueNDRangeKernel(this->m_CommandQue, kernel, workSizeDim, offset, chunksDim, m_LocalWorkSize, 0, nullptr, &waitFor[currentChunk % batchSize]); } currentChunk++; } } } } CHECK_OCL_ERR(clErr); return (clErr == CL_SUCCESS); } bool mitk::OclFilter::Initialize() { us::ServiceReference ref = GetModuleContext()->GetServiceReference(); OclResourceService* resources = GetModuleContext()->GetService(ref); m_CommandQue = resources->GetCommandQueue(); cl_int clErr = 0; m_Initialized = CHECK_OCL_ERR(clErr); if ( m_ClFiles.empty()) { MITK_ERROR<<"No OpenCL Source FILE specified"; return false; } if (m_ClProgram == nullptr) { try { this->m_ClProgram = resources->GetProgram( this->m_FilterID ); } catch(const mitk::Exception& e) { MITK_INFO << "Program not stored in resource manager, compiling. " << e; this->CompileSource(); } } return m_Initialized; } void mitk::OclFilter::LoadSourceFiles(CStringList &sourceCode, ClSizeList &sourceCodeSize) { for( CStringList::iterator it = m_ClFiles.begin(); it != m_ClFiles.end(); ++it ) { MITK_DEBUG << "Load file :" << *it; us::ModuleResource mdr = GetModule()->GetResource(*it); if( !mdr.IsValid() ) MITK_WARN << "Could not load resource: " << mdr.GetName() << " is invalid!"; us::ModuleResourceStream rss(mdr); // read resource file to a string std::istreambuf_iterator eos; std::string source(std::istreambuf_iterator(rss), eos); // add preambel and build up string to compile std::string src(m_Preambel); src.append("\n"); src.append(source); // allocate new char buffer char* tmp = new char[src.size() + 1]; strcpy(tmp,src.c_str()); // add source to list sourceCode.push_back((const char*)tmp); sourceCodeSize.push_back(src.size()); } } void mitk::OclFilter::CompileSource() { // helper variable int clErr = 0; CStringList sourceCode; ClSizeList sourceCodeSize; if (m_ClFiles.empty()) { MITK_ERROR("ocl.filter") << "No shader source file was set"; return; } //get a valid opencl context us::ServiceReference ref = GetModuleContext()->GetServiceReference(); OclResourceService* resources = GetModuleContext()->GetService(ref); cl_context gpuContext = resources->GetContext(); // load the program source from file LoadSourceFiles(sourceCode, sourceCodeSize); if ( !sourceCode.empty() ) { // create program from all files in the file list m_ClProgram = clCreateProgramWithSource(gpuContext, sourceCode.size(), &sourceCode[0], &sourceCodeSize[0], &clErr); CHECK_OCL_ERR(clErr); // build the source code MITK_DEBUG << "Building Program Source"; std::string compilerOptions = ""; compilerOptions.append(m_ClCompilerFlags); MITK_DEBUG("ocl.filter") << "cl compiler flags: " << compilerOptions.c_str(); clErr = clBuildProgram(m_ClProgram, 0, nullptr, compilerOptions.c_str(), nullptr, nullptr); CHECK_OCL_ERR(clErr); // if OpenCL Source build failed if (clErr != CL_SUCCESS) { MITK_ERROR("ocl.filter") << "Failed to build source"; oclLogBuildInfo(m_ClProgram, resources->GetCurrentDevice() ); oclLogBinary(m_ClProgram, resources->GetCurrentDevice() ); m_Initialized = false; } // store the succesfully build program into the program storage provided by the resource service resources->InsertProgram(m_ClProgram, m_FilterID, true); // free the char buffers with the source code for( CStringList::iterator it = sourceCode.begin(); it != sourceCode.end(); ++it ) { delete[] *it; } } else { MITK_ERROR("ocl.filter") << "Could not load from source"; m_Initialized = false; } } void mitk::OclFilter::SetWorkingSize(unsigned int locx, unsigned int dimx, unsigned int locy, unsigned int dimy, unsigned int locz, unsigned int dimz) { // set the local work size this->m_LocalWorkSize[0] = locx; this->m_LocalWorkSize[1] = locy; this->m_LocalWorkSize[2] = locz; this->m_GlobalWorkSize[0] = dimx; this->m_GlobalWorkSize[1] = dimy; this->m_GlobalWorkSize[2] = dimz; // estimate the global work size this->m_GlobalWorkSize[0] = iDivUp( dimx, this->m_LocalWorkSize[0]) * this->m_LocalWorkSize[0]; if ( dimy > 1) this->m_GlobalWorkSize[1] = iDivUp( dimy, this->m_LocalWorkSize[1]) * this->m_LocalWorkSize[1]; if( dimz > 1 ) this->m_GlobalWorkSize[2] = iDivUp( dimz, this->m_LocalWorkSize[2]) * this->m_LocalWorkSize[2]; } void mitk::OclFilter::SetSourcePreambel(const char* preambel) { this->m_Preambel = preambel; } void mitk::OclFilter::AddSourceFile(const char* filename) { m_ClFiles.push_back(filename); } void mitk::OclFilter::SetCompilerFlags(const char* flags) { m_ClCompilerFlags = flags; } bool mitk::OclFilter::IsInitialized() { return m_Initialized; } + +long mitk::OclFilter::GetDeviceMemory() +{ + OclResourceService* resources = GetModuleContext()->GetService(GetModuleContext()->GetServiceReference()); + auto device = resources->GetCurrentDevice(); + return oclGetGlobalMemSize(device); +} \ No newline at end of file diff --git a/Modules/OpenCL/mitkOclFilter.h b/Modules/OpenCL/mitkOclFilter.h index 41fcaa8b0f..89f557bd07 100644 --- a/Modules/OpenCL/mitkOclFilter.h +++ b/Modules/OpenCL/mitkOclFilter.h @@ -1,157 +1,162 @@ /*=================================================================== The Medical Imaging Interaction Toolkit (MITK) Copyright (c) German Cancer Research Center, Division of Medical and Biological Informatics. All rights reserved. This software is distributed WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See LICENSE.txt or http://www.mitk.org for details. ===================================================================*/ #ifndef __mitkOclFilter_h #define __mitkOclFilter_h #include "mitkOclUtils.h" #include "mitkCommon.h" #include #include #include namespace mitk { /** @class OclFilter @brief Superclass for all OpenCL based filter. This class takes care of loading and compiling the external GPU program code. */ class MITKOPENCL_EXPORT OclFilter { public: /** * @brief Add a source file from the resource files to the * OpenCL shader file list. Multiple files can be added to the list. * * @param name of the file in the resource system */ void AddSourceFile(const char* filename); /** * @brief Set specific compilerflags to compile the CL source. Default is set to nullptr; * example: "-cl-fast-relaxed-math -cl-mad-enable -cl-strict-aliasing" * * @param flags to the modulefolder that contains the gpuSource */ void SetCompilerFlags(const char* flags); /** * @brief Returns true if the initialization was successfull */ virtual bool IsInitialized(); + /** + * @brief Returns the amount of global memory of the used device in bytes + */ + virtual long GetDeviceMemory(); + /** @brief Destructor */ virtual ~OclFilter(); protected: typedef std::vector CStringList; typedef std::vector ClSizeList; /** @brief Constructor */ OclFilter(); /** @brief Constructor ( overloaded ) */ OclFilter(const char* filename); /** @brief String that contains the compiler flags */ const char* m_ClCompilerFlags; /** @brief The compiled OpenCL program */ cl_program m_ClProgram; /** @brief Command queue for the filter */ cl_command_queue m_CommandQue; /** @brief Unique ID of the filter, needs to be specified in the constructor of the derived class */ std::string m_FilterID; /*! @brief source preambel for e.g. #define commands to be inserted into the OpenCL source */ const char* m_Preambel; /** @brief List of sourcefiles that will be compiled for this filter.*/ CStringList m_ClFiles; /** @brief status of the filter */ bool m_Initialized; /** @brief The local work size fo the filter */ size_t m_LocalWorkSize[3]; /** @brief The global work size of the filter */ size_t m_GlobalWorkSize[3]; /** @brief Set the working size for the following OpenCL kernel call */ void SetWorkingSize(unsigned int locx, unsigned int dimx, unsigned int locy = 1, unsigned int dimy = 1, unsigned int locz = 1, unsigned int dimz = 1); /** @brief Execute the given kernel on the OpenCL Index-Space defined by the local and global work sizes */ bool ExecuteKernel( cl_kernel kernel, unsigned int workSizeDim ); /** @brief Execute the given kernel on the OpenCL Index-Space defined by the local and global work sizes, but divide it into chunks of dimension chunksDim */ bool ExecuteKernelChunks( cl_kernel kernel, unsigned int workSizeDim, size_t* chunksDim ); /** @brief Execute the given kernel on the OpenCL Index-Space defined by the local and global work sizes, but divide it into chunks of dimension chunksDim and wait between * batches of batchSize chunks a time of waitTimems milliseconds */ bool ExecuteKernelChunksInBatches(cl_kernel kernel, unsigned int workSizeDim, size_t* chunksDim, size_t batchSize, int waitTimems); /** * \brief Initialize all necessary parts of the filter * * The Initialize() method creates the command queue and the m_clProgram. * The program is either compiled from the given source or taken from the * OclResourceManager if the program was compiled already. */ bool Initialize(); /** * @brief Compile the program source * * @param preambel e.g. defines for the shader code */ void CompileSource(); /** * @brief Add some source code on the beginning of the loaded source * * In this way, some preprocessor flags for the CL compiler can at the beginning of the filter * @param preambel Source preambel for e.g. #define commands to be inserted into the OpenCL source */ void SetSourcePreambel(const char* preambel); /** * @brief Get the Module of the filter. Needs to be implemented by every subclass. * The filter will load the OpenCL sourcefiles from this module context. */ virtual us::Module* GetModule() = 0; /** * @brief Helper functions that load sourcefiles from the module context in the Initialize function. * @param SourceCodeList holds the sourcecode for every file as string, the SourceCodeSizeList holst the * size of every file in bytes. */ void LoadSourceFiles(CStringList &SourceCodeList, ClSizeList &SourceCodeSizeList); }; } #endif // __mitkOclFilter_h diff --git a/Modules/PhotoacousticsAlgorithms/source/mitkPhotoacousticBeamformingFilter.cpp b/Modules/PhotoacousticsAlgorithms/source/mitkPhotoacousticBeamformingFilter.cpp index ab769fdc21..f4adb633fa 100644 --- a/Modules/PhotoacousticsAlgorithms/source/mitkPhotoacousticBeamformingFilter.cpp +++ b/Modules/PhotoacousticsAlgorithms/source/mitkPhotoacousticBeamformingFilter.cpp @@ -1,785 +1,804 @@ /*=================================================================== mitkPhotoacousticBeamformingFilter The Medical Imaging Interaction Toolkit (MITK) Copyright (c) German Cancer Research Center, Division of Medical and Biological Informatics. All rights reserved. This software is distributed WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See LICENSE.txt or http://www.mitk.org for details. ===================================================================*/ #define _USE_MATH_DEFINES #include "mitkProperties.h" #include "mitkImageReadAccessor.h" #include #include #include #include #include #include #include "mitkImageCast.h" #include "mitkPhotoacousticBeamformingFilter.h" mitk::BeamformingFilter::BeamformingFilter() : m_OutputData(nullptr), m_InputData(nullptr), m_Message("noMessage") { this->SetNumberOfIndexedInputs(1); this->SetNumberOfRequiredInputs(1); m_ProgressHandle = [](int, std::string) {}; m_BeamformingOclFilter = mitk::PhotoacousticOCLBeamformingFilter::New(); m_VonHannFunction = VonHannFunction(m_Conf.apodizationArraySize); m_HammFunction = HammFunction(m_Conf.apodizationArraySize); m_BoxFunction = BoxFunction(m_Conf.apodizationArraySize); } void mitk::BeamformingFilter::SetProgressHandle(std::function progressHandle) { m_ProgressHandle = progressHandle; } mitk::BeamformingFilter::~BeamformingFilter() { delete[] m_VonHannFunction; delete[] m_HammFunction; delete[] m_BoxFunction; } void mitk::BeamformingFilter::GenerateInputRequestedRegion() { Superclass::GenerateInputRequestedRegion(); mitk::Image* output = this->GetOutput(); mitk::Image* input = const_cast (this->GetInput()); if (!output->IsInitialized()) { return; } input->SetRequestedRegionToLargestPossibleRegion(); //GenerateTimeInInputRegion(output, input); } void mitk::BeamformingFilter::GenerateOutputInformation() { mitk::Image::ConstPointer input = this->GetInput(); mitk::Image::Pointer output = this->GetOutput(); if ((output->IsInitialized()) && (this->GetMTime() <= m_TimeOfHeaderInitialization.GetMTime())) return; itkDebugMacro(<< "GenerateOutputInformation()"); unsigned int dim[] = { m_Conf.ReconstructionLines, m_Conf.SamplesPerLine, input->GetDimension(2) }; output->Initialize(mitk::MakeScalarPixelType(), 3, dim); mitk::Vector3D spacing; spacing[0] = m_Conf.Pitch * m_Conf.TransducerElements * 1000 / m_Conf.ReconstructionLines; spacing[1] = (m_Conf.TimeSpacing * m_Conf.inputDim[1]) / 2 * m_Conf.SpeedOfSound * 1000 / m_Conf.SamplesPerLine; spacing[2] = 1; output->GetGeometry()->SetSpacing(spacing); output->GetGeometry()->Modified(); output->SetPropertyList(input->GetPropertyList()->Clone()); m_TimeOfHeaderInitialization.Modified(); } void mitk::BeamformingFilter::GenerateData() { GenerateOutputInformation(); mitk::Image::Pointer input = this->GetInput(); mitk::Image::Pointer output = this->GetOutput(); if (!output->IsInitialized()) return; float* ApodWindow; if (m_ConfOld.apodizationArraySize != m_Conf.apodizationArraySize) { delete[] m_VonHannFunction; delete[] m_HammFunction; delete[] m_BoxFunction; m_VonHannFunction = VonHannFunction(m_Conf.apodizationArraySize); m_HammFunction = HammFunction(m_Conf.apodizationArraySize); m_BoxFunction = BoxFunction(m_Conf.apodizationArraySize); m_ConfOld = m_Conf; } // set the appropiate apodization window switch (m_Conf.Apod) { case BeamformingSettings::Apodization::Hann: ApodWindow = m_VonHannFunction; break; case BeamformingSettings::Apodization::Hamm: ApodWindow = m_HammFunction; break; case BeamformingSettings::Apodization::Box: ApodWindow = m_BoxFunction; break; default: ApodWindow = m_BoxFunction; break; } auto begin = std::chrono::high_resolution_clock::now(); // debbuging the performance... if (!m_Conf.UseGPU) { int progInterval = output->GetDimension(2) / 20 > 1 ? output->GetDimension(2) / 20 : 1; // the interval at which we update the gui progress bar float inputDim[2] = { (float)input->GetDimension(0), (float)input->GetDimension(1) }; float outputDim[2] = { (float)output->GetDimension(0), (float)output->GetDimension(1) }; for (unsigned int i = 0; i < output->GetDimension(2); ++i) // seperate Slices should get Beamforming seperately applied { mitk::ImageReadAccessor inputReadAccessor(input, input->GetSliceData(i)); // first, we check whether the dara is float, other formats are unsupported if (input->GetPixelType().GetTypeAsString() == "scalar (float)" || input->GetPixelType().GetTypeAsString() == " (float)") { m_InputData = (float*)inputReadAccessor.GetData(); } else { MITK_INFO << "Pixel type is not float, abort"; return; } m_OutputData = new float[m_Conf.ReconstructionLines*m_Conf.SamplesPerLine]; // fill the image with zeros for (int l = 0; l < outputDim[0]; ++l) { for (int s = 0; s < outputDim[1]; ++s) { m_OutputData[l*(short)outputDim[1] + s] = 0; } } std::thread *threads = new std::thread[(short)outputDim[0]]; // every line will be beamformed in a seperate thread if (m_Conf.Algorithm == BeamformingSettings::BeamformingAlgorithm::DAS) { if (m_Conf.DelayCalculationMethod == BeamformingSettings::DelayCalc::QuadApprox) { for (short line = 0; line < outputDim[0]; ++line) { threads[line] = std::thread(&BeamformingFilter::DASQuadraticLine, this, m_InputData, m_OutputData, inputDim, outputDim, line, ApodWindow, m_Conf.apodizationArraySize); } } else if (m_Conf.DelayCalculationMethod == BeamformingSettings::DelayCalc::Spherical) { for (short line = 0; line < outputDim[0]; ++line) { threads[line] = std::thread(&BeamformingFilter::DASSphericalLine, this, m_InputData, m_OutputData, inputDim, outputDim, line, ApodWindow, m_Conf.apodizationArraySize); } } } else if (m_Conf.Algorithm == BeamformingSettings::BeamformingAlgorithm::DMAS) { if (m_Conf.DelayCalculationMethod == BeamformingSettings::DelayCalc::QuadApprox) { for (short line = 0; line < outputDim[0]; ++line) { threads[line] = std::thread(&BeamformingFilter::DMASQuadraticLine, this, m_InputData, m_OutputData, inputDim, outputDim, line, ApodWindow, m_Conf.apodizationArraySize); } } else if (m_Conf.DelayCalculationMethod == BeamformingSettings::DelayCalc::Spherical) { for (short line = 0; line < outputDim[0]; ++line) { threads[line] = std::thread(&BeamformingFilter::DMASSphericalLine, this, m_InputData, m_OutputData, inputDim, outputDim, line, ApodWindow, m_Conf.apodizationArraySize); } } } else if (m_Conf.Algorithm == BeamformingSettings::BeamformingAlgorithm::sDMAS) { if (m_Conf.DelayCalculationMethod == BeamformingSettings::DelayCalc::QuadApprox) { for (short line = 0; line < outputDim[0]; ++line) { threads[line] = std::thread(&BeamformingFilter::sDMASQuadraticLine, this, m_InputData, m_OutputData, inputDim, outputDim, line, ApodWindow, m_Conf.apodizationArraySize); } } else if (m_Conf.DelayCalculationMethod == BeamformingSettings::DelayCalc::Spherical) { for (short line = 0; line < outputDim[0]; ++line) { threads[line] = std::thread(&BeamformingFilter::sDMASSphericalLine, this, m_InputData, m_OutputData, inputDim, outputDim, line, ApodWindow, m_Conf.apodizationArraySize); } } } // wait for all lines to finish for (short line = 0; line < outputDim[0]; ++line) { threads[line].join(); } output->SetSlice(m_OutputData, i); if (i % progInterval == 0) m_ProgressHandle((int)((i + 1) / (float)output->GetDimension(2) * 100), "performing reconstruction"); delete[] m_OutputData; m_OutputData = nullptr; m_InputData = nullptr; } } #if defined(PHOTOACOUSTICS_USE_GPU) || DOXYGEN else { try { // first, we check whether the data is float, other formats are unsupported if (!(input->GetPixelType().GetTypeAsString() == "scalar (float)" || input->GetPixelType().GetTypeAsString() == " (float)")) { MITK_ERROR << "Pixel type is not float, abort"; return; } + long availableMemory = m_BeamformingOclFilter->GetDeviceMemory(); + unsigned int batchSize = 16; unsigned int batches = (unsigned int)((float)input->GetDimension(2)/batchSize) + (input->GetDimension(2)%batchSize > 0); - unsigned int batchDim[] = {input->GetDimension(0), input->GetDimension(1), batchSize}; - unsigned int batchDimLast[] = {input->GetDimension(0), input->GetDimension(1), input->GetDimension(2)%batchSize}; + unsigned int batchDim[] = { input->GetDimension(0), input->GetDimension(1), batchSize }; + unsigned int batchDimLast[] = { input->GetDimension(0), input->GetDimension(1), input->GetDimension(2) % batchSize }; + + // the following safeguard is probably only needed for absurdly small GPU memory + for (batchSize = 16; + batchDim[0] * batchDim[1] * 4 + // Input image (float) + m_Conf.ReconstructionLines * m_Conf.SamplesPerLine * 4 // Output image (float) + > availableMemory - + m_Conf.ReconstructionLines / 2 * m_Conf.SamplesPerLine * 2 - // Delays buffer (unsigned short) + m_Conf.ReconstructionLines * m_Conf.SamplesPerLine * 3 * 2 - // UsedLines buffer (unsigned short) + 100 * 1024; // 100 MB buffer for local data, system purposes etc + --batchSize) + {} + if (batchSize < 1) + { + MITK_ERROR << "device memory too small for GPU beamforming"; + return; + } + MITK_INFO << batchSize; mitk::ImageReadAccessor copy(input); for(unsigned int i = 0; i < batches; ++i) { m_ProgressHandle(input->GetDimension(2)/batches * i, "performing reconstruction"); mitk::Image::Pointer inputBatch = mitk::Image::New(); if(i == batches - 1 && (input->GetDimension(2)%batchSize > 0)) { inputBatch->Initialize(mitk::MakeScalarPixelType(), 3, batchDimLast); m_Conf.inputDim[2] = batchDimLast[2]; } else { inputBatch->Initialize(mitk::MakeScalarPixelType(), 3, batchDim); m_Conf.inputDim[2] = batchDim[2]; } inputBatch->SetSpacing(input->GetGeometry()->GetSpacing()); inputBatch->SetImportVolume(&(((float*)copy.GetData())[input->GetDimension(0) * input->GetDimension(1) * batchSize * i])); m_BeamformingOclFilter->SetApodisation(ApodWindow, m_Conf.apodizationArraySize); m_BeamformingOclFilter->SetConfig(m_Conf); m_BeamformingOclFilter->SetInput(inputBatch); m_BeamformingOclFilter->Update(); void* out = m_BeamformingOclFilter->GetOutput(); for(unsigned int slice = 0; slice < m_Conf.inputDim[2]; ++slice) { output->SetImportSlice( &(((float*)out)[m_Conf.ReconstructionLines * m_Conf.SamplesPerLine * slice]), batchSize * i + slice, 0, 0, mitk::Image::ImportMemoryManagementType::CopyMemory); } } } catch (mitk::Exception &e) { std::string errorMessage = "Caught unexpected exception "; errorMessage.append(e.what()); MITK_ERROR << errorMessage; float* dummyData = new float[m_Conf.ReconstructionLines * m_Conf.SamplesPerLine * m_Conf.inputDim[2]]; output->SetImportVolume(dummyData, 0, 0, mitk::Image::ImportMemoryManagementType::ManageMemory); m_Message = "An openCL error occurred; all GPU operations in this and the next session may be corrupted."; } } #endif m_TimeOfHeaderInitialization.Modified(); auto end = std::chrono::high_resolution_clock::now(); MITK_INFO << "Beamforming of " << output->GetDimension(2) << " Images completed in " << ((float)std::chrono::duration_cast(end - begin).count()) / 1000000 << "ms" << std::endl; } float* mitk::BeamformingFilter::VonHannFunction(int samples) { float* ApodWindow = new float[samples]; for (int n = 0; n < samples; ++n) { ApodWindow[n] = (1 - cos(2 * M_PI * n / (samples - 1))) / 2; } return ApodWindow; } float* mitk::BeamformingFilter::HammFunction(int samples) { float* ApodWindow = new float[samples]; for (int n = 0; n < samples; ++n) { ApodWindow[n] = 0.54 - 0.46*cos(2 * M_PI*n / (samples - 1)); } return ApodWindow; } float* mitk::BeamformingFilter::BoxFunction(int samples) { float* ApodWindow = new float[samples]; for (int n = 0; n < samples; ++n) { ApodWindow[n] = 1; } return ApodWindow; } void mitk::BeamformingFilter::DASQuadraticLine(float* input, float* output, float inputDim[2], float outputDim[2], const short& line, float* apodisation, const short& apodArraySize) { float& inputS = inputDim[1]; float& inputL = inputDim[0]; float& outputS = outputDim[1]; float& outputL = outputDim[0]; short AddSample = 0; short maxLine = 0; short minLine = 0; float delayMultiplicator = 0; float l_i = 0; float s_i = 0; float part = 0.07 * inputL; float tan_phi = std::tan(m_Conf.Angle / 360 * 2 * M_PI); float part_multiplicator = tan_phi * m_Conf.TimeSpacing * m_Conf.SpeedOfSound / m_Conf.Pitch * inputL / m_Conf.TransducerElements; float apod_mult = 1; short usedLines = (maxLine - minLine); //quadratic delay l_i = line / outputL * inputL; for (short sample = 0; sample < outputS; ++sample) { s_i = (float)sample / outputS * inputS / 2; part = part_multiplicator*s_i; if (part < 1) part = 1; maxLine = (short)std::min((l_i + part) + 1, inputL); minLine = (short)std::max((l_i - part), 0.0f); usedLines = (maxLine - minLine); apod_mult = (float)apodArraySize / (float)usedLines; delayMultiplicator = pow((1 / (m_Conf.TimeSpacing*m_Conf.SpeedOfSound) * (m_Conf.Pitch*m_Conf.TransducerElements) / inputL), 2) / s_i / 2; for (short l_s = minLine; l_s < maxLine; ++l_s) { AddSample = delayMultiplicator * pow((l_s - l_i), 2) + s_i + (1 - m_Conf.isPhotoacousticImage)*s_i; if (AddSample < inputS && AddSample >= 0) output[sample*(short)outputL + line] += input[l_s + AddSample*(short)inputL] * apodisation[(short)((l_s - minLine)*apod_mult)]; else --usedLines; } output[sample*(short)outputL + line] = output[sample*(short)outputL + line] / usedLines; } } void mitk::BeamformingFilter::DASSphericalLine(float* input, float* output, float inputDim[2], float outputDim[2], const short& line, float* apodisation, const short& apodArraySize) { float& inputS = inputDim[1]; float& inputL = inputDim[0]; float& outputS = outputDim[1]; float& outputL = outputDim[0]; short AddSample = 0; short maxLine = 0; short minLine = 0; float l_i = 0; float s_i = 0; float part = 0.07 * inputL; float tan_phi = std::tan(m_Conf.Angle / 360 * 2 * M_PI); float part_multiplicator = tan_phi * m_Conf.TimeSpacing * m_Conf.SpeedOfSound / m_Conf.Pitch * inputL / (float)m_Conf.TransducerElements; float apod_mult = 1; short usedLines = (maxLine - minLine); //exact delay l_i = (float)line / outputL * inputL; for (short sample = 0; sample < outputS; ++sample) { s_i = (float)sample / outputS * inputS / 2; part = part_multiplicator*s_i; if (part < 1) part = 1; maxLine = (short)std::min((l_i + part) + 1, inputL); minLine = (short)std::max((l_i - part), 0.0f); usedLines = (maxLine - minLine); apod_mult = (float)apodArraySize / (float)usedLines; for (short l_s = minLine; l_s < maxLine; ++l_s) { AddSample = (int)sqrt( pow(s_i, 2) + pow((1 / (m_Conf.TimeSpacing*m_Conf.SpeedOfSound) * (((float)l_s - l_i)*m_Conf.Pitch*(float)m_Conf.TransducerElements) / inputL), 2) ) + (1 - m_Conf.isPhotoacousticImage)*s_i; if (AddSample < inputS && AddSample >= 0) output[sample*(short)outputL + line] += input[l_s + AddSample*(short)inputL] * apodisation[(short)((l_s - minLine)*apod_mult)]; else --usedLines; } output[sample*(short)outputL + line] = output[sample*(short)outputL + line] / usedLines; } } void mitk::BeamformingFilter::DMASQuadraticLine(float* input, float* output, float inputDim[2], float outputDim[2], const short& line, float* apodisation, const short& apodArraySize) { float& inputS = inputDim[1]; float& inputL = inputDim[0]; float& outputS = outputDim[1]; float& outputL = outputDim[0]; short maxLine = 0; short minLine = 0; float delayMultiplicator = 0; float l_i = 0; float s_i = 0; float part = 0.07 * inputL; float tan_phi = std::tan(m_Conf.Angle / 360 * 2 * M_PI); float part_multiplicator = tan_phi * m_Conf.TimeSpacing * m_Conf.SpeedOfSound / m_Conf.Pitch * inputL / (float)m_Conf.TransducerElements; float apod_mult = 1; float mult = 0; short usedLines = (maxLine - minLine); //quadratic delay l_i = line / outputL * inputL; for (short sample = 0; sample < outputS; ++sample) { s_i = sample / outputS * inputS / 2; part = part_multiplicator*s_i; if (part < 1) part = 1; maxLine = (short)std::min((l_i + part) + 1, inputL); minLine = (short)std::max((l_i - part), 0.0f); usedLines = (maxLine - minLine); apod_mult = (float)apodArraySize / (float)usedLines; delayMultiplicator = pow((1 / (m_Conf.TimeSpacing*m_Conf.SpeedOfSound) * (m_Conf.Pitch*m_Conf.TransducerElements) / inputL), 2) / s_i / 2; //calculate the AddSamples beforehand to save some time short* AddSample = new short[maxLine - minLine]; for (short l_s = 0; l_s < maxLine - minLine; ++l_s) { AddSample[l_s] = (short)(delayMultiplicator * pow((minLine + l_s - l_i), 2) + s_i) + (1 - m_Conf.isPhotoacousticImage)*s_i; } float s_1 = 0; float s_2 = 0; for (short l_s1 = minLine; l_s1 < maxLine - 1; ++l_s1) { if (AddSample[l_s1 - minLine] < inputS && AddSample[l_s1 - minLine] >= 0) { for (short l_s2 = l_s1 + 1; l_s2 < maxLine; ++l_s2) { if (AddSample[l_s2 - minLine] < inputS && AddSample[l_s2 - minLine] >= 0) { s_2 = input[l_s2 + AddSample[l_s2 - minLine] * (short)inputL]; s_1 = input[l_s1 + AddSample[l_s1 - minLine] * (short)inputL]; mult = s_2 * apodisation[(int)((l_s2 - minLine)*apod_mult)] * s_1 * apodisation[(int)((l_s1 - minLine)*apod_mult)]; output[sample*(short)outputL + line] += sqrt(fabs(mult)) * ((mult > 0) - (mult < 0)); } } } else --usedLines; } output[sample*(short)outputL + line] = output[sample*(short)outputL + line] / (float)(pow(usedLines, 2) - (usedLines - 1)); delete[] AddSample; } } void mitk::BeamformingFilter::DMASSphericalLine(float* input, float* output, float inputDim[2], float outputDim[2], const short& line, float* apodisation, const short& apodArraySize) { float& inputS = inputDim[1]; float& inputL = inputDim[0]; float& outputS = outputDim[1]; float& outputL = outputDim[0]; short maxLine = 0; short minLine = 0; float l_i = 0; float s_i = 0; float part = 0.07 * inputL; float tan_phi = std::tan(m_Conf.Angle / 360 * 2 * M_PI); float part_multiplicator = tan_phi * m_Conf.TimeSpacing * m_Conf.SpeedOfSound / m_Conf.Pitch * inputL / (float)m_Conf.TransducerElements; float apod_mult = 1; float mult = 0; short usedLines = (maxLine - minLine); //exact delay l_i = (float)line / outputL * inputL; for (short sample = 0; sample < outputS; ++sample) { s_i = (float)sample / outputS * inputS / 2; part = part_multiplicator*s_i; if (part < 1) part = 1; maxLine = (short)std::min((l_i + part) + 1, inputL); minLine = (short)std::max((l_i - part), 0.0f); usedLines = (maxLine - minLine); apod_mult = (float)apodArraySize / (float)usedLines; //calculate the AddSamples beforehand to save some time short* AddSample = new short[maxLine - minLine]; for (short l_s = 0; l_s < maxLine - minLine; ++l_s) { AddSample[l_s] = (short)sqrt( pow(s_i, 2) + pow((1 / (m_Conf.TimeSpacing*m_Conf.SpeedOfSound) * (((float)minLine + (float)l_s - l_i)*m_Conf.Pitch*(float)m_Conf.TransducerElements) / inputL), 2) ) + (1 - m_Conf.isPhotoacousticImage)*s_i; } float s_1 = 0; float s_2 = 0; for (short l_s1 = minLine; l_s1 < maxLine - 1; ++l_s1) { if (AddSample[l_s1 - minLine] < inputS && AddSample[l_s1 - minLine] >= 0) { for (short l_s2 = l_s1 + 1; l_s2 < maxLine; ++l_s2) { if (AddSample[l_s2 - minLine] < inputS && AddSample[l_s2 - minLine] >= 0) { s_2 = input[l_s2 + AddSample[l_s2 - minLine] * (short)inputL]; s_1 = input[l_s1 + AddSample[l_s1 - minLine] * (short)inputL]; mult = s_2 * apodisation[(int)((l_s2 - minLine)*apod_mult)] * s_1 * apodisation[(int)((l_s1 - minLine)*apod_mult)]; output[sample*(short)outputL + line] += sqrt(fabs(mult)) * ((mult > 0) - (mult < 0)); } } } else --usedLines; } output[sample*(short)outputL + line] = output[sample*(short)outputL + line] / (float)(pow(usedLines, 2) - (usedLines - 1)); delete[] AddSample; } } void mitk::BeamformingFilter::sDMASQuadraticLine(float* input, float* output, float inputDim[2], float outputDim[2], const short& line, float* apodisation, const short& apodArraySize) { float& inputS = inputDim[1]; float& inputL = inputDim[0]; float& outputS = outputDim[1]; float& outputL = outputDim[0]; short maxLine = 0; short minLine = 0; float delayMultiplicator = 0; float l_i = 0; float s_i = 0; float part = 0.07 * inputL; float tan_phi = std::tan(m_Conf.Angle / 360 * 2 * M_PI); float part_multiplicator = tan_phi * m_Conf.TimeSpacing * m_Conf.SpeedOfSound / m_Conf.Pitch * inputL / (float)m_Conf.TransducerElements; float apod_mult = 1; float mult = 0; short usedLines = (maxLine - minLine); //quadratic delay l_i = line / outputL * inputL; for (short sample = 0; sample < outputS; ++sample) { s_i = sample / outputS * inputS / 2; part = part_multiplicator*s_i; if (part < 1) part = 1; maxLine = (short)std::min((l_i + part) + 1, inputL); minLine = (short)std::max((l_i - part), 0.0f); usedLines = (maxLine - minLine); apod_mult = (float)apodArraySize / (float)usedLines; delayMultiplicator = pow((1 / (m_Conf.TimeSpacing*m_Conf.SpeedOfSound) * (m_Conf.Pitch*m_Conf.TransducerElements) / inputL), 2) / s_i / 2; //calculate the AddSamples beforehand to save some time short* AddSample = new short[maxLine - minLine]; for (short l_s = 0; l_s < maxLine - minLine; ++l_s) { AddSample[l_s] = (short)(delayMultiplicator * pow((minLine + l_s - l_i), 2) + s_i) + (1 - m_Conf.isPhotoacousticImage)*s_i; } float s_1 = 0; float s_2 = 0; float sign = 0; for (short l_s1 = minLine; l_s1 < maxLine - 1; ++l_s1) { if (AddSample[l_s1 - minLine] < inputS && AddSample[l_s1 - minLine] >= 0) { s_1 = input[l_s1 + AddSample[l_s1 - minLine] * (short)inputL]; sign += s_1; for (short l_s2 = l_s1 + 1; l_s2 < maxLine; ++l_s2) { if (AddSample[l_s2 - minLine] < inputS && AddSample[l_s2 - minLine] >= 0) { s_2 = input[l_s2 + AddSample[l_s2 - minLine] * (short)inputL]; mult = s_2 * apodisation[(int)((l_s2 - minLine)*apod_mult)] * s_1 * apodisation[(int)((l_s1 - minLine)*apod_mult)]; output[sample*(short)outputL + line] += sqrt(fabs(mult)) * ((mult > 0) - (mult < 0)); } } } else --usedLines; } output[sample*(short)outputL + line] = output[sample*(short)outputL + line] / (float)(pow(usedLines, 2) - (usedLines - 1)) * ((sign > 0) - (sign < 0)); delete[] AddSample; } } void mitk::BeamformingFilter::sDMASSphericalLine(float* input, float* output, float inputDim[2], float outputDim[2], const short& line, float* apodisation, const short& apodArraySize) { float& inputS = inputDim[1]; float& inputL = inputDim[0]; float& outputS = outputDim[1]; float& outputL = outputDim[0]; short maxLine = 0; short minLine = 0; float l_i = 0; float s_i = 0; float part = 0.07 * inputL; float tan_phi = std::tan(m_Conf.Angle / 360 * 2 * M_PI); float part_multiplicator = tan_phi * m_Conf.TimeSpacing * m_Conf.SpeedOfSound / m_Conf.Pitch * inputL / (float)m_Conf.TransducerElements; float apod_mult = 1; float mult = 0; short usedLines = (maxLine - minLine); //exact delay l_i = (float)line / outputL * inputL; for (short sample = 0; sample < outputS; ++sample) { s_i = (float)sample / outputS * inputS / 2; part = part_multiplicator*s_i; if (part < 1) part = 1; maxLine = (short)std::min((l_i + part) + 1, inputL); minLine = (short)std::max((l_i - part), 0.0f); usedLines = (maxLine - minLine); apod_mult = (float)apodArraySize / (float)usedLines; //calculate the AddSamples beforehand to save some time short* AddSample = new short[maxLine - minLine]; for (short l_s = 0; l_s < maxLine - minLine; ++l_s) { AddSample[l_s] = (short)sqrt( pow(s_i, 2) + pow((1 / (m_Conf.TimeSpacing*m_Conf.SpeedOfSound) * (((float)minLine + (float)l_s - l_i)*m_Conf.Pitch*(float)m_Conf.TransducerElements) / inputL), 2) ) + (1 - m_Conf.isPhotoacousticImage)*s_i; } float s_1 = 0; float s_2 = 0; float sign = 0; for (short l_s1 = minLine; l_s1 < maxLine - 1; ++l_s1) { if (AddSample[l_s1 - minLine] < inputS && AddSample[l_s1 - minLine] >= 0) { s_1 = input[l_s1 + AddSample[l_s1 - minLine] * (short)inputL]; sign += s_1; for (short l_s2 = l_s1 + 1; l_s2 < maxLine; ++l_s2) { if (AddSample[l_s2 - minLine] < inputS && AddSample[l_s2 - minLine] >= 0) { s_2 = input[l_s2 + AddSample[l_s2 - minLine] * (short)inputL]; mult = s_2 * apodisation[(int)((l_s2 - minLine)*apod_mult)] * s_1 * apodisation[(int)((l_s1 - minLine)*apod_mult)]; output[sample*(short)outputL + line] += sqrt(fabs(mult)) * ((mult > 0) - (mult < 0)); } } } else --usedLines; } output[sample*(short)outputL + line] = output[sample*(short)outputL + line] / (float)(pow(usedLines, 2) - (usedLines - 1)) * ((sign > 0) - (sign < 0)); delete[] AddSample; } }