hpp/doc/spectral_utils_c_u_d_a_8h_source.html

 #ifndef HPP_SPECTRAL_UTILS_CUDA_H
 #define HPP_SPECTRAL_UTILS_CUDA_H

 #include <hpp/config.h>
 HPP_CHECK_CUDA_ENABLED_BUILD
 #include <hpp/crystal.h>
 #include <hpp/cudaUtils.h>

 namespace hpp
 {

 template<typename T, unsigned int N>
 struct SpectralDataCUDA {
     unsigned int coords[N];
     T coeff[2];
 };

 template<unsigned int N>
 class SpectralCoordCUDA {
 public:
     __host__ __device__ SpectralCoordCUDA(){;}
     __host__ __device__ unsigned int getVal(const unsigned int i) const {
         return data[i];
     }
     __host__ __device__ unsigned int& operator()(const unsigned int i) {
         return data[i];
     }

 private:
     unsigned int data[N];
 };

 template<typename T>
 struct SpectralCoeffCUDA {
     T re;
     T im;
 };

 template<typename T, unsigned int N>
 struct SpectralDatasetCUDA {
     SpectralCoeffCUDA<T> *coeffs;
     SpectralCoordCUDA<N> *coords;
     unsigned int nTerms;
 };

 template<typename T, unsigned int N>
 class SpectralDatabaseCUDA
 {
 public:
     SpectralDatabaseCUDA();
     SpectralDatabaseCUDA(const SpectralDatabase<T>& dbIn, const std::vector<SpectralDatasetID>& dsetIDs);
     __device__ T getIDFTRealD(unsigned int dsetIdx, unsigned int *spatialCoord) const;
     __device__ T getIDFTRealDShared(unsigned int dsetIdx, unsigned int *spatialCoord, unsigned int nShared, SpectralCoordCUDA<N> *sharedCoords, SpectralCoeffCUDA<T> *sharedCoeffs) const;
     T getIDFTRealH(unsigned int dsetIdx, std::vector<unsigned int> spatialCoord) const;

     // Getters
     __device__ T* getGridStarts() {return gridStarts;}
     __device__ T* getGridSteps() {return gridSteps;}
     __device__ unsigned int * getGridDims() {return gridDims;}
     unsigned int getNDsets() const {return nDsets;}
     unsigned int getNTermsTypical() const {return nTermsTypical;}

 protected:

 private:
     // Grid dimensions
     unsigned int *gridDims;
     std::shared_ptr<unsigned int> gridDimsSharedPtr;

     // Grid spatial parameters
     T *gridStarts;
     std::shared_ptr<T> gridStartsSharedPtr;
     T *gridSteps;
     std::shared_ptr<T> gridStepsSharedPtr;

     // Number of datasets
     unsigned int nDsets;

     // Number of terms in a typical dataset
     unsigned int nTermsTypical;

     // Spectral data
     SpectralDatasetCUDA<T,N> *dsets;

     // Shared pointers to assure correct copying and destruction
     std::shared_ptr<SpectralDatasetCUDA<T,N>> dsetsSharedPtr;
     std::vector<std::shared_ptr<SpectralCoordCUDA<N>>> coordSharedPtrs;
     std::vector<std::shared_ptr<SpectralCoeffCUDA<T>>> coeffSharedPtrs;
 };

 // Device IDFT
 template <typename T, unsigned int N>
 __device__ T SpectralDatabaseCUDA<T,N>::getIDFTRealD(unsigned int dsetIdx, unsigned int *spatialCoord) const {
     // Get correct dataset
     SpectralDatasetCUDA<T,N> dset = dsets[dsetIdx];

     // Initialise value
     T val = 0;

     // Exponential argument common factor
     T expArgFactor = 2*((T)M_PI)/gridDims[0];

     // Add terms
     for (unsigned int i=0; i<dset.nTerms; i++) {
         // Get exponential index
         unsigned int expInd = 0;
         for (unsigned int j=0; j<N; j++) {
             expInd += spatialCoord[j]*dset.coords[i](j);
         }

         // Range reduce exponential index. This is a significant (~20%) saving
         // compared with having the intrinsic trig functions do it.
         expInd = expInd&(gridDims[0]-1);// Optimisation for gridDims[0] a power of two

         // Get complex exponential
         T expArg = expInd*expArgFactor;
         T expVal[2];
         sincosIntr(expArg, &(expVal[1]), &(expVal[0]));

         // Add real part of term
         val = fmaIntr(dset.coeffs[i].re, expVal[0], val);
         val = fmaIntr(dset.coeffs[i].im, expVal[1], val);
     }

     // Return
     return val;
 }

 template <typename T, unsigned int N>
 __device__ T SpectralDatabaseCUDA<T,N>::getIDFTRealDShared(unsigned int dsetIdx, unsigned int *spatialCoord, unsigned int nShared, SpectralCoordCUDA<N> *sharedCoords, SpectralCoeffCUDA<T> *sharedCoeffs) const {
     // Get correct dataset
     SpectralDatasetCUDA<T,N> dset = dsets[dsetIdx];

     // Dataset size
     unsigned int nTerms = dset.nTerms;

     // Initialise value
     T val = 0;

     // Exponential argument common factor
     T expArgFactor = 2*((T)M_PI)/gridDims[0];

     // Read into shared memory as a block
     // termsPerRead is the number of shared values that are read per large read
     // nReads is the number of times that we must perform a large read from
     // global memory to shared memory.
     unsigned int termsPerLargeRead = nShared;
     unsigned int nReads = nTerms/termsPerLargeRead;
     if (nTerms % termsPerLargeRead != 0) nReads++;

     // termsPerBlockRead is the number of values that are read when every thread
     // in the block does a single read
     unsigned int termsPerBlockRead = blockDim.x;

     // Loop by shared memory reads
     for (unsigned int iRead=0; iRead<nReads; iRead++) {
         // Read data into shared memory
         unsigned int readStartGlobal = iRead*termsPerLargeRead+threadIdx.x;
         unsigned int readEndGlobal = umin((iRead+1)*termsPerLargeRead, nTerms);
         for (unsigned int readIdxGlobal = readStartGlobal; readIdxGlobal<readEndGlobal; readIdxGlobal+=termsPerBlockRead) {
             unsigned int readIdxShared = readIdxGlobal%termsPerLargeRead;
             sharedCoords[readIdxShared] = dset.coords[readIdxGlobal];
             sharedCoeffs[readIdxShared] = dset.coeffs[readIdxGlobal];
         }

         // Sync after read
         __syncthreads();

         // Compute available terms
         unsigned int termsStartGlobal = iRead*termsPerLargeRead;
         unsigned int termsEndGlobal = umin(termsStartGlobal+termsPerLargeRead, nTerms);
         unsigned int termsStartShared = 0;
         unsigned int termsEndShared = termsEndGlobal-termsStartGlobal;
         for (unsigned int i=termsStartShared; i<termsEndShared; i++) {
             // Get exponential index
             unsigned int expInd = spatialCoord[0]*sharedCoords[i](0);
             for (unsigned int j=1; j<N; j++) {
                 expInd += spatialCoord[j]*sharedCoords[i](j);
             }

             // Range reduce exponential index.
             expInd = expInd&(gridDims[0]-1);// Optimisation for gridDims[0] a power of two

             // Get complex exponential
             T expArg = expInd*expArgFactor;
             T expVal[2];
             sincosIntr(expArg, &(expVal[1]), &(expVal[0]));

             // Add real part of term
             val = fmaIntr(sharedCoeffs[i].re, expVal[0], val);
             val = fmaIntr(sharedCoeffs[i].im, expVal[1], val);
         }

         // Sync after compute
         __syncthreads();
     }

     // Return
     return val;
 }

 // Kernel
 template <typename T, unsigned int N>
 __global__ void GET_IDFT_REAL(SpectralDatabaseCUDA<T,N> *db, unsigned int dsetIdx, unsigned int *spatialCoord, T *val) {
     *val = db->getIDFTRealD(dsetIdx, spatialCoord);
 }

 // SPECTRAL DATABASE UNIFIED //

 template<typename T, unsigned int N, unsigned int P>
 struct ALIGN(16) SpectralDataUnifiedCUDA {
     SpectralCoordCUDA<N> coord;
     SpectralCoeffCUDA<T> coeffs[P];
 };

 template<typename T, unsigned int N, unsigned int P>
 class SpectralDatabaseUnifiedCUDA
 {
 public:
     SpectralDatabaseUnifiedCUDA();
     SpectralDatabaseUnifiedCUDA(const SpectralDatabaseUnified<T>& dbIn, const std::vector<SpectralDatasetID>& dsetIDs);
     __device__ void getIDFTRealDShared(unsigned int *spatialCoord, T *outputs, unsigned int nShared, SpectralDataUnifiedCUDA<T,N,P> *sharedData) const;
     __device__ void getIDFTRealDSharedPair(unsigned int *spatialCoord0, T *outputs0, unsigned int *spatialCoord1, T *outputs1, unsigned int nShared, SpectralDataUnifiedCUDA<T,N,P> *sharedData) const;
     // Getters
     __device__ T* getGridStarts() {return gridStarts;}
     __device__ T* getGridSteps() {return gridSteps;}
     __device__ unsigned int * getGridDims() {return gridDims;}
     unsigned int getNDsets() const {return nDsets;}
     unsigned int getNTerms() const {return nTerms;}

 protected:

 private:
     // Grid dimensions
     unsigned int *gridDims;
     std::shared_ptr<unsigned int> gridDimsSharedPtr;

     // Grid spatial parameters
     T *gridStarts;
     std::shared_ptr<T> gridStartsSharedPtr;
     T *gridSteps;
     std::shared_ptr<T> gridStepsSharedPtr;

     // Number of datasets
     unsigned int nDsets;

     // Number of terms
     unsigned int nTerms;

     // Spectral data
     SpectralDataUnifiedCUDA<T,N,P> *data;

     // Shared pointers to assure correct copying and destruction
     std::shared_ptr<SpectralDataUnifiedCUDA<T,N,P>> dataSharedPtr;
 };

 template <typename T, unsigned int N>
 __device__ void getExpVal(unsigned int *spatialCoord, SpectralCoordCUDA<N>& coord, unsigned int gridDim, T expArgFactor, T* expValRe, T* expValIm) {
     // Get exponential index
     unsigned int expInd = spatialCoord[0]*coord(0);
     for (unsigned int j=1; j<N; j++) {
         expInd += spatialCoord[j]*coord(j);
     }

     // Range reduce exponential index. This is a significant saving
     // compared with having the intrinsic trig functions do it.
     expInd = expInd&(gridDim-1);// Optimisation for gridDim a power of two

     // Get complex exponential
     T expArg = expInd*expArgFactor;
     sincosIntr(expArg, expValIm, expValRe);
 }

 template <typename T, unsigned int N, unsigned int P>
 __device__ void SpectralDatabaseUnifiedCUDA<T,N,P>::getIDFTRealDShared(unsigned int *spatialCoord, T *outputs, unsigned int nShared, SpectralDataUnifiedCUDA<T,N,P> *sharedData) const {
     // Commonly used global memory into registers
     const unsigned int gridDimReg = gridDims[0];
     const unsigned int nTermsReg = nTerms;

     // Exponential argument common factor
     T expArgFactor = 2*((T)M_PI)/gridDimReg;

     // The data is interpreted as ints for the purposes of coalesced
     // reading from global memory to shared memory
     int *globalDataAsInt = (int*)data;
     int *sharedDataAsInt = (int*)sharedData;
     unsigned int readElementSize = sizeof(int);

     // Read into shared memory as a block
     // termsPerRead is the number of shared values that are read per large read
     // nReads is the number of times that we must perform a large read from
     // global memory to shared memory.
     unsigned int termsPerLargeRead = nShared;
     unsigned int elementsPerLargeRead = termsPerLargeRead*sizeof(SpectralDataUnifiedCUDA<T,N,P>)/readElementSize;
     unsigned int totalElementsToRead = nTermsReg*sizeof(SpectralDataUnifiedCUDA<T,N,P>)/readElementSize;
     unsigned int nReads = nTermsReg/termsPerLargeRead;
     if (nTermsReg % termsPerLargeRead != 0) nReads++;

     // elementsPerBlockRead is the number of values that are read when every thread
     // in the block does a single read
     unsigned int elementsPerBlockRead = blockDim.x;

     // Initial values
     for (unsigned int i=0; i<P; i++) {
         outputs[i] = (T)0.0;
     }

     // Loop by shared memory reads
     for (unsigned int iRead=0; iRead<nReads; iRead++) {
         // Read data into shared memory
         unsigned int dataReadStartGlobal = iRead*elementsPerLargeRead+threadIdx.x;
         unsigned int dataReadEndGlobal = umin((iRead+1)*elementsPerLargeRead, totalElementsToRead);
         for (unsigned int readIdxGlobal = dataReadStartGlobal; readIdxGlobal<dataReadEndGlobal; readIdxGlobal+=elementsPerBlockRead) {
             unsigned int readIdxShared = readIdxGlobal%elementsPerLargeRead;
             sharedDataAsInt[readIdxShared] = globalDataAsInt[readIdxGlobal];
         }

         // Sync after read
         __syncthreads();

         // Range of terms to compute
         unsigned int termsStartGlobal = iRead*termsPerLargeRead;
         unsigned int termsEndGlobal = umin(termsStartGlobal+termsPerLargeRead, nTermsReg);
         unsigned int termsStartShared = 0;
         unsigned int termsEndShared = termsEndGlobal-termsStartGlobal;

         // Compute terms
         for (unsigned int i=termsStartShared; i<termsEndShared; i++) {
             // Get exponential index
             unsigned int expInd = spatialCoord[0]*sharedData[i].coord(0);
             for (unsigned int j=1; j<N; j++) {
                 expInd += spatialCoord[j]*sharedData[i].coord(j);
             }

             // Range reduce exponential index. This is a significant saving
             // compared with having the intrinsic trig functions do it.
             expInd = expInd&(gridDimReg-1);// Optimisation for gridDims[0] a power of two

             // Get complex exponential
             T expArg = expInd*expArgFactor;
             T expVal[2];
             sincosIntr(expArg, &(expVal[1]), &(expVal[0]));

             // Update values
             for (unsigned int iDset = 0; iDset<P; iDset++) {
                 outputs[iDset] = fmaIntr(sharedData[i].coeffs[iDset].re, expVal[0], outputs[iDset]);
                 outputs[iDset] = fmaIntr(sharedData[i].coeffs[iDset].im, expVal[1], outputs[iDset]);
             }
         }

         // Sync after compute
         __syncthreads();
     }
 }

 template <typename T, unsigned int N, unsigned int P>
 __device__ void SpectralDatabaseUnifiedCUDA<T,N,P>::getIDFTRealDSharedPair(unsigned int *spatialCoord0, T *outputs0, unsigned int *spatialCoord1, T *outputs1, unsigned int nShared, SpectralDataUnifiedCUDA<T,N,P> *sharedData) const {
     // Commonly used global memory into registers
     const unsigned int gridDimReg = gridDims[0];
     const unsigned int nTermsReg = nTerms;

     // Exponential argument common factor
     T expArgFactor = 2*((T)M_PI)/gridDimReg;

     // The data is interpreted as ints for the purposes of coalesced
     // reading from global memory to shared memory
     int *globalDataAsInt = (int*)data;
     int *sharedDataAsInt = (int*)sharedData;
     unsigned int readElementSize = sizeof(int);

     // Read into shared memory as a block
     // termsPerRead is the number of shared values that are read per large read
     // nReads is the number of times that we must perform a large read from
     // global memory to shared memory.
     unsigned int termsPerLargeRead = nShared;
     unsigned int elementsPerLargeRead = termsPerLargeRead*sizeof(SpectralDataUnifiedCUDA<T,N,P>)/readElementSize;
     unsigned int totalElementsToRead = nTermsReg*sizeof(SpectralDataUnifiedCUDA<T,N,P>)/readElementSize;
     unsigned int nReads = nTermsReg/termsPerLargeRead;
     if (nTermsReg % termsPerLargeRead != 0) nReads++;

     // elementsPerBlockRead is the number of values that are read when every thread
     // in the block does a single read
     unsigned int elementsPerBlockRead = blockDim.x;

     // Initial values
     for (unsigned int i=0; i<P; i++) {
         outputs0[i] = (T)0.0;
         outputs1[i] = (T)0.0;
     }

     // Loop by shared memory reads
     for (unsigned int iRead=0; iRead<nReads; iRead++) {
         // Read data into shared memory
         unsigned int dataReadStartGlobal = iRead*elementsPerLargeRead+threadIdx.x;
         unsigned int dataReadEndGlobal = umin((iRead+1)*elementsPerLargeRead, totalElementsToRead);
         for (unsigned int readIdxGlobal = dataReadStartGlobal; readIdxGlobal<dataReadEndGlobal; readIdxGlobal+=elementsPerBlockRead) {
             unsigned int readIdxShared = readIdxGlobal%elementsPerLargeRead;
             sharedDataAsInt[readIdxShared] = globalDataAsInt[readIdxGlobal];
         }

         // Sync after read
         __syncthreads();

         // Range of terms to compute
         unsigned int termsStartGlobal = iRead*termsPerLargeRead;
         unsigned int termsEndGlobal = umin(termsStartGlobal+termsPerLargeRead, nTermsReg);
         unsigned int termsStartShared = 0;
         unsigned int termsEndShared = termsEndGlobal-termsStartGlobal;

         // Compute terms
         for (unsigned int i=termsStartShared; i<termsEndShared; i++) {
             // Read in coordinate and coefficient
             SpectralDataUnifiedCUDA<T,N,P> unifiedData = sharedData[i];

             // FIRST SPATIAL COORDINATE
             T expValRe, expValIm;
             getExpVal(spatialCoord0, unifiedData.coord, gridDimReg, expArgFactor, &expValRe, &expValIm);
             for (unsigned int iDset = 0; iDset<P; iDset++) {
                 outputs0[iDset] = fmaIntr(unifiedData.coeffs[iDset].re, expValRe, outputs0[iDset]);
                 outputs0[iDset] = fmaIntr(unifiedData.coeffs[iDset].im, expValIm, outputs0[iDset]);
             }

             // SECOND SPATIAL COORDINATE
             getExpVal(spatialCoord1, unifiedData.coord, gridDimReg, expArgFactor, &expValRe, &expValIm);
             for (unsigned int iDset = 0; iDset<P; iDset++) {
                 outputs1[iDset] = fmaIntr(unifiedData.coeffs[iDset].re, expValRe, outputs1[iDset]);
                 outputs1[iDset] = fmaIntr(unifiedData.coeffs[iDset].im, expValIm, outputs1[iDset]);
             }
         }

         // Sync after compute
         __syncthreads();
     }
 }

 }//END NAMESPACE HPP

 #endif /* HPP_SPECTRAL_UTILS_CUDA_H */
hpp::SpectralDatabaseCUDA::dsetsSharedPtr
std::shared_ptr< SpectralDatasetCUDA< T, N > > dsetsSharedPtr
Definition: spectralUtilsCUDA.h:130

hpp::SpectralDatabaseUnified
Definition: spectralUtils.h:317

hpp::SpectralDatabaseCUDA::getGridStarts
__device__ T * getGridStarts()
Definition: spectralUtilsCUDA.h:101

hpp::SpectralDatabaseUnifiedCUDA::data
SpectralDataUnifiedCUDA< T, N, P > * data
Definition: spectralUtilsCUDA.h:338

hpp::SpectralDatabaseUnifiedCUDA::gridDims
unsigned int * gridDims
Definition: spectralUtilsCUDA.h:322

hpp::SpectralCoeffCUDA::im
T im
Definition: spectralUtilsCUDA.h:51

hpp::SpectralDataCUDA
Definition: spectralUtilsCUDA.h:28

hpp::SpectralDatabaseCUDA::coeffSharedPtrs
std::vector< std::shared_ptr< SpectralCoeffCUDA< T > > > coeffSharedPtrs
Definition: spectralUtilsCUDA.h:132

hpp::SpectralDatabaseCUDA::getNDsets
unsigned int getNDsets() const
Definition: spectralUtilsCUDA.h:104

hpp::SpectralDatabaseUnifiedCUDA::gridStartsSharedPtr
std::shared_ptr< T > gridStartsSharedPtr
Definition: spectralUtilsCUDA.h:327

hpp::SpectralDatabaseCUDA::gridStarts
T * gridStarts
Definition: spectralUtilsCUDA.h:115

hpp::SpectralDatabaseCUDA::dsets
SpectralDatasetCUDA< T, N > * dsets
Definition: spectralUtilsCUDA.h:127

hpp
Definition: casesUtils.cpp:4

hpp::SpectralCoordCUDA
Definition: spectralUtilsCUDA.h:34

hpp::SpectralCoeffCUDA::re
T re
Definition: spectralUtilsCUDA.h:50

hpp::SpectralDatabaseCUDA::gridDimsSharedPtr
std::shared_ptr< unsigned int > gridDimsSharedPtr
Definition: spectralUtilsCUDA.h:112

hpp::SpectralDatabaseUnifiedCUDA::getGridSteps
__device__ T * getGridSteps()
Definition: spectralUtilsCUDA.h:313

HPP_CHECK_CUDA_ENABLED_BUILD
#define HPP_CHECK_CUDA_ENABLED_BUILD
Definition: config.h:44

config.h

hpp::SpectralDatabaseCUDA::nTermsTypical
unsigned int nTermsTypical
Definition: spectralUtilsCUDA.h:124

hpp::SpectralCoordCUDA::operator()
__host__ __device__ unsigned int & operator()(const unsigned int i)
Definition: spectralUtilsCUDA.h:40

hpp::SpectralDatabase
Definition: spectralUtils.h:225

hpp::SpectralCoordCUDA::SpectralCoordCUDA
__host__ __device__ SpectralCoordCUDA()
Definition: spectralUtilsCUDA.h:36

hpp::SpectralDatabaseUnifiedCUDA::getNTerms
unsigned int getNTerms() const
Definition: spectralUtilsCUDA.h:316

hpp::SpectralDataCUDA::coords
unsigned int coords[N]
Definition: spectralUtilsCUDA.h:29

hpp::SpectralDatabaseUnifiedCUDA
Definition: spectralUtilsCUDA.h:304

hpp::SpectralDatasetCUDA
Definition: spectralUtilsCUDA.h:64

hpp::SpectralDatabaseUnifiedCUDA::getIDFTRealDShared
__device__ void getIDFTRealDShared(unsigned int *spatialCoord, T *outputs, unsigned int nShared, SpectralDataUnifiedCUDA< T, N, P > *sharedData) const
Device IDFTD.
Definition: spectralUtilsCUDA.h:371

hpp::SpectralDatabaseCUDA
Definition: spectralUtilsCUDA.h:91

hpp::SpectralDatabaseCUDA::gridStepsSharedPtr
std::shared_ptr< T > gridStepsSharedPtr
Definition: spectralUtilsCUDA.h:118

cudaUtils.h
Header file CUDA utility functions.

hpp::SpectralDatabaseUnifiedCUDA::gridDimsSharedPtr
std::shared_ptr< unsigned int > gridDimsSharedPtr
Definition: spectralUtilsCUDA.h:323

hpp::SpectralDatabaseUnifiedCUDA::getGridStarts
__device__ T * getGridStarts()
Definition: spectralUtilsCUDA.h:312

hpp::SpectralDatabaseUnifiedCUDA::gridSteps
T * gridSteps
Definition: spectralUtilsCUDA.h:328

hpp::SpectralDatabaseCUDA::nDsets
unsigned int nDsets
Definition: spectralUtilsCUDA.h:121

hpp::SpectralDatabaseUnifiedCUDA::gridStepsSharedPtr
std::shared_ptr< T > gridStepsSharedPtr
Definition: spectralUtilsCUDA.h:329

hpp::SpectralDatabaseUnifiedCUDA::gridStarts
T * gridStarts
Definition: spectralUtilsCUDA.h:326

SpectralDataUnifiedCUDA

hpp::SpectralCoordCUDA::getVal
__host__ __device__ unsigned int getVal(const unsigned int i) const
Definition: spectralUtilsCUDA.h:37

hpp::SpectralDatabaseCUDA::gridSteps
T * gridSteps
Definition: spectralUtilsCUDA.h:117

hpp::GET_IDFT_REAL
__global__ void GET_IDFT_REAL(SpectralDatabaseCUDA< T, N > *db, unsigned int dsetIdx, unsigned int *spatialCoord, T *val)
Definition: spectralUtilsCUDA.h:266

crystal.h
Header file for crystal classes.

hpp::SpectralDatabaseUnifiedCUDA::getIDFTRealDSharedPair
__device__ void getIDFTRealDSharedPair(unsigned int *spatialCoord0, T *outputs0, unsigned int *spatialCoord1, T *outputs1, unsigned int nShared, SpectralDataUnifiedCUDA< T, N, P > *sharedData) const
Device IDFTD.
Definition: spectralUtilsCUDA.h:464

hpp::SpectralDatabaseCUDA::getGridSteps
__device__ T * getGridSteps()
Definition: spectralUtilsCUDA.h:102

hpp::SpectralDatabaseUnifiedCUDA::dataSharedPtr
std::shared_ptr< SpectralDataUnifiedCUDA< T, N, P > > dataSharedPtr
Definition: spectralUtilsCUDA.h:341

hpp::SpectralDatasetCUDA::coeffs
SpectralCoeffCUDA< T > * coeffs
Definition: spectralUtilsCUDA.h:65

hpp::getExpVal
__device__ void getExpVal(unsigned int *spatialCoord, SpectralCoordCUDA< N > &coord, unsigned int gridDim, T expArgFactor, T *expValRe, T *expValIm)
Definition: spectralUtilsCUDA.h:345

hpp::SpectralDatabaseUnifiedCUDA::nDsets
unsigned int nDsets
Definition: spectralUtilsCUDA.h:332

hpp::SpectralDatabaseCUDA::getGridDims
__device__ unsigned int * getGridDims()
Definition: spectralUtilsCUDA.h:103

hpp::SpectralDatasetCUDA::coords
SpectralCoordCUDA< N > * coords
Definition: spectralUtilsCUDA.h:66

hpp::SpectralDatabaseCUDA::coordSharedPtrs
std::vector< std::shared_ptr< SpectralCoordCUDA< N > > > coordSharedPtrs
Definition: spectralUtilsCUDA.h:131

hpp::SpectralDatabaseUnifiedCUDA::nTerms
unsigned int nTerms
Definition: spectralUtilsCUDA.h:335

hpp::SpectralDatabaseUnifiedCUDA::getNDsets
unsigned int getNDsets() const
Definition: spectralUtilsCUDA.h:315

hpp::SpectralDatabaseCUDA::gridStartsSharedPtr
std::shared_ptr< T > gridStartsSharedPtr
Definition: spectralUtilsCUDA.h:116

hpp::SpectralDatasetCUDA::nTerms
unsigned int nTerms
Definition: spectralUtilsCUDA.h:67

hpp::SpectralDatabaseCUDA::getIDFTRealDShared
__device__ T getIDFTRealDShared(unsigned int dsetIdx, unsigned int *spatialCoord, unsigned int nShared, SpectralCoordCUDA< N > *sharedCoords, SpectralCoeffCUDA< T > *sharedCoeffs) const
Device IDFTD.
Definition: spectralUtilsCUDA.h:192

hpp::SpectralDatabaseCUDA::getNTermsTypical
unsigned int getNTermsTypical() const
Definition: spectralUtilsCUDA.h:105

hpp::SpectralDataCUDA::coeff
T coeff[2]
Definition: spectralUtilsCUDA.h:30

hpp::ALIGN
struct ALIGN(16) SpectralDataUnifiedCUDA
Definition: spectralUtilsCUDA.h:285

hpp::SpectralCoeffCUDA
Definition: spectralUtilsCUDA.h:49

hpp::SpectralDatabaseCUDA::getIDFTRealD
__device__ T getIDFTRealD(unsigned int dsetIdx, unsigned int *spatialCoord) const
Definition: spectralUtilsCUDA.h:137

hpp::SpectralDatabaseCUDA::gridDims
unsigned int * gridDims
Definition: spectralUtilsCUDA.h:111

hpp::SpectralDatabaseUnifiedCUDA::getGridDims
__device__ unsigned int * getGridDims()
Definition: spectralUtilsCUDA.h:314