High Performance Plasticity  0.5.0
spectralUtilsCUDA.h
Go to the documentation of this file.
1 
7 #ifndef HPP_SPECTRAL_UTILS_CUDA_H
8 #define HPP_SPECTRAL_UTILS_CUDA_H
9 
10 #include <hpp/config.h>
12 #include <hpp/crystal.h>
13 #include <hpp/cudaUtils.h>
14 
15 namespace hpp
16 {
17 
27 template<typename T, unsigned int N>
29  unsigned int coords[N];
30  T coeff[2];
31 };
32 
33 template<unsigned int N>
35 public:
36  __host__ __device__ SpectralCoordCUDA(){;}
37  __host__ __device__ unsigned int getVal(const unsigned int i) const {
38  return data[i];
39  }
40  __host__ __device__ unsigned int& operator()(const unsigned int i) {
41  return data[i];
42  }
43 
44 private:
45  unsigned int data[N];
46 };
47 
48 template<typename T>
50  T re;
51  T im;
52 };
53 
63 template<typename T, unsigned int N>
67  unsigned int nTerms;
68 };
69 
90 template<typename T, unsigned int N>
92 {
93 public:
95  SpectralDatabaseCUDA(const SpectralDatabase<T>& dbIn, const std::vector<SpectralDatasetID>& dsetIDs);
96  __device__ T getIDFTRealD(unsigned int dsetIdx, unsigned int *spatialCoord) const;
97  __device__ T getIDFTRealDShared(unsigned int dsetIdx, unsigned int *spatialCoord, unsigned int nShared, SpectralCoordCUDA<N> *sharedCoords, SpectralCoeffCUDA<T> *sharedCoeffs) const;
98  T getIDFTRealH(unsigned int dsetIdx, std::vector<unsigned int> spatialCoord) const;
99 
100  // Getters
101  __device__ T* getGridStarts() {return gridStarts;}
102  __device__ T* getGridSteps() {return gridSteps;}
103  __device__ unsigned int * getGridDims() {return gridDims;}
104  unsigned int getNDsets() const {return nDsets;}
105  unsigned int getNTermsTypical() const {return nTermsTypical;}
106 
107 protected:
108 
109 private:
110  // Grid dimensions
111  unsigned int *gridDims;
112  std::shared_ptr<unsigned int> gridDimsSharedPtr;
113 
114  // Grid spatial parameters
116  std::shared_ptr<T> gridStartsSharedPtr;
118  std::shared_ptr<T> gridStepsSharedPtr;
119 
120  // Number of datasets
121  unsigned int nDsets;
122 
123  // Number of terms in a typical dataset
124  unsigned int nTermsTypical;
125 
126  // Spectral data
128 
129  // Shared pointers to assure correct copying and destruction
130  std::shared_ptr<SpectralDatasetCUDA<T,N>> dsetsSharedPtr;
131  std::vector<std::shared_ptr<SpectralCoordCUDA<N>>> coordSharedPtrs;
132  std::vector<std::shared_ptr<SpectralCoeffCUDA<T>>> coeffSharedPtrs;
133 };
134 
135 // Device IDFT
136 template <typename T, unsigned int N>
137 __device__ T SpectralDatabaseCUDA<T,N>::getIDFTRealD(unsigned int dsetIdx, unsigned int *spatialCoord) const {
138  // Get correct dataset
139  SpectralDatasetCUDA<T,N> dset = dsets[dsetIdx];
140 
141  // Initialise value
142  T val = 0;
143 
144  // Exponential argument common factor
145  T expArgFactor = 2*((T)M_PI)/gridDims[0];
146 
147  // Add terms
148  for (unsigned int i=0; i<dset.nTerms; i++) {
149  // Get exponential index
150  unsigned int expInd = 0;
151  for (unsigned int j=0; j<N; j++) {
152  expInd += spatialCoord[j]*dset.coords[i](j);
153  }
154 
155  // Range reduce exponential index. This is a significant (~20%) saving
156  // compared with having the intrinsic trig functions do it.
157  expInd = expInd&(gridDims[0]-1);// Optimisation for gridDims[0] a power of two
158 
159  // Get complex exponential
160  T expArg = expInd*expArgFactor;
161  T expVal[2];
162  sincosIntr(expArg, &(expVal[1]), &(expVal[0]));
163 
164  // Add real part of term
165  val = fmaIntr(dset.coeffs[i].re, expVal[0], val);
166  val = fmaIntr(dset.coeffs[i].im, expVal[1], val);
167  }
168 
169  // Return
170  return val;
171 }
172 
191 template <typename T, unsigned int N>
192 __device__ T SpectralDatabaseCUDA<T,N>::getIDFTRealDShared(unsigned int dsetIdx, unsigned int *spatialCoord, unsigned int nShared, SpectralCoordCUDA<N> *sharedCoords, SpectralCoeffCUDA<T> *sharedCoeffs) const {
193  // Get correct dataset
194  SpectralDatasetCUDA<T,N> dset = dsets[dsetIdx];
195 
196  // Dataset size
197  unsigned int nTerms = dset.nTerms;
198 
199  // Initialise value
200  T val = 0;
201 
202  // Exponential argument common factor
203  T expArgFactor = 2*((T)M_PI)/gridDims[0];
204 
205  // Read into shared memory as a block
206  // termsPerRead is the number of shared values that are read per large read
207  // nReads is the number of times that we must perform a large read from
208  // global memory to shared memory.
209  unsigned int termsPerLargeRead = nShared;
210  unsigned int nReads = nTerms/termsPerLargeRead;
211  if (nTerms % termsPerLargeRead != 0) nReads++;
212 
213  // termsPerBlockRead is the number of values that are read when every thread
214  // in the block does a single read
215  unsigned int termsPerBlockRead = blockDim.x;
216 
217  // Loop by shared memory reads
218  for (unsigned int iRead=0; iRead<nReads; iRead++) {
219  // Read data into shared memory
220  unsigned int readStartGlobal = iRead*termsPerLargeRead+threadIdx.x;
221  unsigned int readEndGlobal = umin((iRead+1)*termsPerLargeRead, nTerms);
222  for (unsigned int readIdxGlobal = readStartGlobal; readIdxGlobal<readEndGlobal; readIdxGlobal+=termsPerBlockRead) {
223  unsigned int readIdxShared = readIdxGlobal%termsPerLargeRead;
224  sharedCoords[readIdxShared] = dset.coords[readIdxGlobal];
225  sharedCoeffs[readIdxShared] = dset.coeffs[readIdxGlobal];
226  }
227 
228  // Sync after read
229  __syncthreads();
230 
231  // Compute available terms
232  unsigned int termsStartGlobal = iRead*termsPerLargeRead;
233  unsigned int termsEndGlobal = umin(termsStartGlobal+termsPerLargeRead, nTerms);
234  unsigned int termsStartShared = 0;
235  unsigned int termsEndShared = termsEndGlobal-termsStartGlobal;
236  for (unsigned int i=termsStartShared; i<termsEndShared; i++) {
237  // Get exponential index
238  unsigned int expInd = spatialCoord[0]*sharedCoords[i](0);
239  for (unsigned int j=1; j<N; j++) {
240  expInd += spatialCoord[j]*sharedCoords[i](j);
241  }
242 
243  // Range reduce exponential index.
244  expInd = expInd&(gridDims[0]-1);// Optimisation for gridDims[0] a power of two
245 
246  // Get complex exponential
247  T expArg = expInd*expArgFactor;
248  T expVal[2];
249  sincosIntr(expArg, &(expVal[1]), &(expVal[0]));
250 
251  // Add real part of term
252  val = fmaIntr(sharedCoeffs[i].re, expVal[0], val);
253  val = fmaIntr(sharedCoeffs[i].im, expVal[1], val);
254  }
255 
256  // Sync after compute
257  __syncthreads();
258  }
259 
260  // Return
261  return val;
262 }
263 
264 // Kernel
265 template <typename T, unsigned int N>
266 __global__ void GET_IDFT_REAL(SpectralDatabaseCUDA<T,N> *db, unsigned int dsetIdx, unsigned int *spatialCoord, T *val) {
267  *val = db->getIDFTRealD(dsetIdx, spatialCoord);
268 }
269 
271 // SPECTRAL DATABASE UNIFIED //
273 
284 template<typename T, unsigned int N, unsigned int P>
286  SpectralCoordCUDA<N> coord;
287  SpectralCoeffCUDA<T> coeffs[P];
288 };
289 
303 template<typename T, unsigned int N, unsigned int P>
305 {
306 public:
308  SpectralDatabaseUnifiedCUDA(const SpectralDatabaseUnified<T>& dbIn, const std::vector<SpectralDatasetID>& dsetIDs);
309  __device__ void getIDFTRealDShared(unsigned int *spatialCoord, T *outputs, unsigned int nShared, SpectralDataUnifiedCUDA<T,N,P> *sharedData) const;
310  __device__ void getIDFTRealDSharedPair(unsigned int *spatialCoord0, T *outputs0, unsigned int *spatialCoord1, T *outputs1, unsigned int nShared, SpectralDataUnifiedCUDA<T,N,P> *sharedData) const;
311  // Getters
312  __device__ T* getGridStarts() {return gridStarts;}
313  __device__ T* getGridSteps() {return gridSteps;}
314  __device__ unsigned int * getGridDims() {return gridDims;}
315  unsigned int getNDsets() const {return nDsets;}
316  unsigned int getNTerms() const {return nTerms;}
317 
318 protected:
319 
320 private:
321  // Grid dimensions
322  unsigned int *gridDims;
323  std::shared_ptr<unsigned int> gridDimsSharedPtr;
324 
325  // Grid spatial parameters
327  std::shared_ptr<T> gridStartsSharedPtr;
329  std::shared_ptr<T> gridStepsSharedPtr;
330 
331  // Number of datasets
332  unsigned int nDsets;
333 
334  // Number of terms
335  unsigned int nTerms;
336 
337  // Spectral data
339 
340  // Shared pointers to assure correct copying and destruction
341  std::shared_ptr<SpectralDataUnifiedCUDA<T,N,P>> dataSharedPtr;
342 };
343 
344 template <typename T, unsigned int N>
345 __device__ void getExpVal(unsigned int *spatialCoord, SpectralCoordCUDA<N>& coord, unsigned int gridDim, T expArgFactor, T* expValRe, T* expValIm) {
346  // Get exponential index
347  unsigned int expInd = spatialCoord[0]*coord(0);
348  for (unsigned int j=1; j<N; j++) {
349  expInd += spatialCoord[j]*coord(j);
350  }
351 
352  // Range reduce exponential index. This is a significant saving
353  // compared with having the intrinsic trig functions do it.
354  expInd = expInd&(gridDim-1);// Optimisation for gridDim a power of two
355 
356  // Get complex exponential
357  T expArg = expInd*expArgFactor;
358  sincosIntr(expArg, expValIm, expValRe);
359 }
360 
370 template <typename T, unsigned int N, unsigned int P>
371 __device__ void SpectralDatabaseUnifiedCUDA<T,N,P>::getIDFTRealDShared(unsigned int *spatialCoord, T *outputs, unsigned int nShared, SpectralDataUnifiedCUDA<T,N,P> *sharedData) const {
372  // Commonly used global memory into registers
373  const unsigned int gridDimReg = gridDims[0];
374  const unsigned int nTermsReg = nTerms;
375 
376  // Exponential argument common factor
377  T expArgFactor = 2*((T)M_PI)/gridDimReg;
378 
379  // The data is interpreted as ints for the purposes of coalesced
380  // reading from global memory to shared memory
381  int *globalDataAsInt = (int*)data;
382  int *sharedDataAsInt = (int*)sharedData;
383  unsigned int readElementSize = sizeof(int);
384 
385  // Read into shared memory as a block
386  // termsPerRead is the number of shared values that are read per large read
387  // nReads is the number of times that we must perform a large read from
388  // global memory to shared memory.
389  unsigned int termsPerLargeRead = nShared;
390  unsigned int elementsPerLargeRead = termsPerLargeRead*sizeof(SpectralDataUnifiedCUDA<T,N,P>)/readElementSize;
391  unsigned int totalElementsToRead = nTermsReg*sizeof(SpectralDataUnifiedCUDA<T,N,P>)/readElementSize;
392  unsigned int nReads = nTermsReg/termsPerLargeRead;
393  if (nTermsReg % termsPerLargeRead != 0) nReads++;
394 
395  // elementsPerBlockRead is the number of values that are read when every thread
396  // in the block does a single read
397  unsigned int elementsPerBlockRead = blockDim.x;
398 
399  // Initial values
400  for (unsigned int i=0; i<P; i++) {
401  outputs[i] = (T)0.0;
402  }
403 
404  // Loop by shared memory reads
405  for (unsigned int iRead=0; iRead<nReads; iRead++) {
406  // Read data into shared memory
407  unsigned int dataReadStartGlobal = iRead*elementsPerLargeRead+threadIdx.x;
408  unsigned int dataReadEndGlobal = umin((iRead+1)*elementsPerLargeRead, totalElementsToRead);
409  for (unsigned int readIdxGlobal = dataReadStartGlobal; readIdxGlobal<dataReadEndGlobal; readIdxGlobal+=elementsPerBlockRead) {
410  unsigned int readIdxShared = readIdxGlobal%elementsPerLargeRead;
411  sharedDataAsInt[readIdxShared] = globalDataAsInt[readIdxGlobal];
412  }
413 
414  // Sync after read
415  __syncthreads();
416 
417  // Range of terms to compute
418  unsigned int termsStartGlobal = iRead*termsPerLargeRead;
419  unsigned int termsEndGlobal = umin(termsStartGlobal+termsPerLargeRead, nTermsReg);
420  unsigned int termsStartShared = 0;
421  unsigned int termsEndShared = termsEndGlobal-termsStartGlobal;
422 
423  // Compute terms
424  for (unsigned int i=termsStartShared; i<termsEndShared; i++) {
425  // Get exponential index
426  unsigned int expInd = spatialCoord[0]*sharedData[i].coord(0);
427  for (unsigned int j=1; j<N; j++) {
428  expInd += spatialCoord[j]*sharedData[i].coord(j);
429  }
430 
431  // Range reduce exponential index. This is a significant saving
432  // compared with having the intrinsic trig functions do it.
433  expInd = expInd&(gridDimReg-1);// Optimisation for gridDims[0] a power of two
434 
435  // Get complex exponential
436  T expArg = expInd*expArgFactor;
437  T expVal[2];
438  sincosIntr(expArg, &(expVal[1]), &(expVal[0]));
439 
440  // Update values
441  for (unsigned int iDset = 0; iDset<P; iDset++) {
442  outputs[iDset] = fmaIntr(sharedData[i].coeffs[iDset].re, expVal[0], outputs[iDset]);
443  outputs[iDset] = fmaIntr(sharedData[i].coeffs[iDset].im, expVal[1], outputs[iDset]);
444  }
445  }
446 
447  // Sync after compute
448  __syncthreads();
449  }
450 }
451 
463 template <typename T, unsigned int N, unsigned int P>
464 __device__ void SpectralDatabaseUnifiedCUDA<T,N,P>::getIDFTRealDSharedPair(unsigned int *spatialCoord0, T *outputs0, unsigned int *spatialCoord1, T *outputs1, unsigned int nShared, SpectralDataUnifiedCUDA<T,N,P> *sharedData) const {
465  // Commonly used global memory into registers
466  const unsigned int gridDimReg = gridDims[0];
467  const unsigned int nTermsReg = nTerms;
468 
469  // Exponential argument common factor
470  T expArgFactor = 2*((T)M_PI)/gridDimReg;
471 
472  // The data is interpreted as ints for the purposes of coalesced
473  // reading from global memory to shared memory
474  int *globalDataAsInt = (int*)data;
475  int *sharedDataAsInt = (int*)sharedData;
476  unsigned int readElementSize = sizeof(int);
477 
478  // Read into shared memory as a block
479  // termsPerRead is the number of shared values that are read per large read
480  // nReads is the number of times that we must perform a large read from
481  // global memory to shared memory.
482  unsigned int termsPerLargeRead = nShared;
483  unsigned int elementsPerLargeRead = termsPerLargeRead*sizeof(SpectralDataUnifiedCUDA<T,N,P>)/readElementSize;
484  unsigned int totalElementsToRead = nTermsReg*sizeof(SpectralDataUnifiedCUDA<T,N,P>)/readElementSize;
485  unsigned int nReads = nTermsReg/termsPerLargeRead;
486  if (nTermsReg % termsPerLargeRead != 0) nReads++;
487 
488  // elementsPerBlockRead is the number of values that are read when every thread
489  // in the block does a single read
490  unsigned int elementsPerBlockRead = blockDim.x;
491 
492  // Initial values
493  for (unsigned int i=0; i<P; i++) {
494  outputs0[i] = (T)0.0;
495  outputs1[i] = (T)0.0;
496  }
497 
498  // Loop by shared memory reads
499  for (unsigned int iRead=0; iRead<nReads; iRead++) {
500  // Read data into shared memory
501  unsigned int dataReadStartGlobal = iRead*elementsPerLargeRead+threadIdx.x;
502  unsigned int dataReadEndGlobal = umin((iRead+1)*elementsPerLargeRead, totalElementsToRead);
503  for (unsigned int readIdxGlobal = dataReadStartGlobal; readIdxGlobal<dataReadEndGlobal; readIdxGlobal+=elementsPerBlockRead) {
504  unsigned int readIdxShared = readIdxGlobal%elementsPerLargeRead;
505  sharedDataAsInt[readIdxShared] = globalDataAsInt[readIdxGlobal];
506  }
507 
508  // Sync after read
509  __syncthreads();
510 
511  // Range of terms to compute
512  unsigned int termsStartGlobal = iRead*termsPerLargeRead;
513  unsigned int termsEndGlobal = umin(termsStartGlobal+termsPerLargeRead, nTermsReg);
514  unsigned int termsStartShared = 0;
515  unsigned int termsEndShared = termsEndGlobal-termsStartGlobal;
516 
517  // Compute terms
518  for (unsigned int i=termsStartShared; i<termsEndShared; i++) {
519  // Read in coordinate and coefficient
520  SpectralDataUnifiedCUDA<T,N,P> unifiedData = sharedData[i];
521 
522  // FIRST SPATIAL COORDINATE
523  T expValRe, expValIm;
524  getExpVal(spatialCoord0, unifiedData.coord, gridDimReg, expArgFactor, &expValRe, &expValIm);
525  for (unsigned int iDset = 0; iDset<P; iDset++) {
526  outputs0[iDset] = fmaIntr(unifiedData.coeffs[iDset].re, expValRe, outputs0[iDset]);
527  outputs0[iDset] = fmaIntr(unifiedData.coeffs[iDset].im, expValIm, outputs0[iDset]);
528  }
529 
530  // SECOND SPATIAL COORDINATE
531  getExpVal(spatialCoord1, unifiedData.coord, gridDimReg, expArgFactor, &expValRe, &expValIm);
532  for (unsigned int iDset = 0; iDset<P; iDset++) {
533  outputs1[iDset] = fmaIntr(unifiedData.coeffs[iDset].re, expValRe, outputs1[iDset]);
534  outputs1[iDset] = fmaIntr(unifiedData.coeffs[iDset].im, expValIm, outputs1[iDset]);
535  }
536  }
537 
538  // Sync after compute
539  __syncthreads();
540  }
541 }
542 
543 }//END NAMESPACE HPP
544 
545 #endif /* HPP_SPECTRAL_UTILS_CUDA_H */
std::shared_ptr< SpectralDatasetCUDA< T, N > > dsetsSharedPtr
Definition: spectralUtilsCUDA.h:130
Definition: spectralUtils.h:317
__device__ T * getGridStarts()
Definition: spectralUtilsCUDA.h:101
SpectralDataUnifiedCUDA< T, N, P > * data
Definition: spectralUtilsCUDA.h:338
unsigned int * gridDims
Definition: spectralUtilsCUDA.h:322
T im
Definition: spectralUtilsCUDA.h:51
Definition: spectralUtilsCUDA.h:28
std::vector< std::shared_ptr< SpectralCoeffCUDA< T > > > coeffSharedPtrs
Definition: spectralUtilsCUDA.h:132
unsigned int getNDsets() const
Definition: spectralUtilsCUDA.h:104
std::shared_ptr< T > gridStartsSharedPtr
Definition: spectralUtilsCUDA.h:327
T * gridStarts
Definition: spectralUtilsCUDA.h:115
SpectralDatasetCUDA< T, N > * dsets
Definition: spectralUtilsCUDA.h:127
Definition: casesUtils.cpp:4
Definition: spectralUtilsCUDA.h:34
T re
Definition: spectralUtilsCUDA.h:50
std::shared_ptr< unsigned int > gridDimsSharedPtr
Definition: spectralUtilsCUDA.h:112
__device__ T * getGridSteps()
Definition: spectralUtilsCUDA.h:313
#define HPP_CHECK_CUDA_ENABLED_BUILD
Definition: config.h:44
unsigned int nTermsTypical
Definition: spectralUtilsCUDA.h:124
__host__ __device__ unsigned int & operator()(const unsigned int i)
Definition: spectralUtilsCUDA.h:40
Definition: spectralUtils.h:225
__host__ __device__ SpectralCoordCUDA()
Definition: spectralUtilsCUDA.h:36
unsigned int getNTerms() const
Definition: spectralUtilsCUDA.h:316
unsigned int coords[N]
Definition: spectralUtilsCUDA.h:29
Definition: spectralUtilsCUDA.h:304
Definition: spectralUtilsCUDA.h:64
__device__ void getIDFTRealDShared(unsigned int *spatialCoord, T *outputs, unsigned int nShared, SpectralDataUnifiedCUDA< T, N, P > *sharedData) const
Device IDFTD.
Definition: spectralUtilsCUDA.h:371
Definition: spectralUtilsCUDA.h:91
std::shared_ptr< T > gridStepsSharedPtr
Definition: spectralUtilsCUDA.h:118
Header file CUDA utility functions.
std::shared_ptr< unsigned int > gridDimsSharedPtr
Definition: spectralUtilsCUDA.h:323
__device__ T * getGridStarts()
Definition: spectralUtilsCUDA.h:312
T * gridSteps
Definition: spectralUtilsCUDA.h:328
unsigned int nDsets
Definition: spectralUtilsCUDA.h:121
std::shared_ptr< T > gridStepsSharedPtr
Definition: spectralUtilsCUDA.h:329
T * gridStarts
Definition: spectralUtilsCUDA.h:326
__host__ __device__ unsigned int getVal(const unsigned int i) const
Definition: spectralUtilsCUDA.h:37
T * gridSteps
Definition: spectralUtilsCUDA.h:117
__global__ void GET_IDFT_REAL(SpectralDatabaseCUDA< T, N > *db, unsigned int dsetIdx, unsigned int *spatialCoord, T *val)
Definition: spectralUtilsCUDA.h:266
Header file for crystal classes.
__device__ void getIDFTRealDSharedPair(unsigned int *spatialCoord0, T *outputs0, unsigned int *spatialCoord1, T *outputs1, unsigned int nShared, SpectralDataUnifiedCUDA< T, N, P > *sharedData) const
Device IDFTD.
Definition: spectralUtilsCUDA.h:464
__device__ T * getGridSteps()
Definition: spectralUtilsCUDA.h:102
std::shared_ptr< SpectralDataUnifiedCUDA< T, N, P > > dataSharedPtr
Definition: spectralUtilsCUDA.h:341
SpectralCoeffCUDA< T > * coeffs
Definition: spectralUtilsCUDA.h:65
__device__ void getExpVal(unsigned int *spatialCoord, SpectralCoordCUDA< N > &coord, unsigned int gridDim, T expArgFactor, T *expValRe, T *expValIm)
Definition: spectralUtilsCUDA.h:345
unsigned int nDsets
Definition: spectralUtilsCUDA.h:332
__device__ unsigned int * getGridDims()
Definition: spectralUtilsCUDA.h:103
SpectralCoordCUDA< N > * coords
Definition: spectralUtilsCUDA.h:66
std::vector< std::shared_ptr< SpectralCoordCUDA< N > > > coordSharedPtrs
Definition: spectralUtilsCUDA.h:131
unsigned int nTerms
Definition: spectralUtilsCUDA.h:335
unsigned int getNDsets() const
Definition: spectralUtilsCUDA.h:315
std::shared_ptr< T > gridStartsSharedPtr
Definition: spectralUtilsCUDA.h:116
unsigned int nTerms
Definition: spectralUtilsCUDA.h:67
__device__ T getIDFTRealDShared(unsigned int dsetIdx, unsigned int *spatialCoord, unsigned int nShared, SpectralCoordCUDA< N > *sharedCoords, SpectralCoeffCUDA< T > *sharedCoeffs) const
Device IDFTD.
Definition: spectralUtilsCUDA.h:192
unsigned int getNTermsTypical() const
Definition: spectralUtilsCUDA.h:105
T coeff[2]
Definition: spectralUtilsCUDA.h:30
struct ALIGN(16) SpectralDataUnifiedCUDA
Definition: spectralUtilsCUDA.h:285
Definition: spectralUtilsCUDA.h:49
__device__ T getIDFTRealD(unsigned int dsetIdx, unsigned int *spatialCoord) const
Definition: spectralUtilsCUDA.h:137
unsigned int * gridDims
Definition: spectralUtilsCUDA.h:111
__device__ unsigned int * getGridDims()
Definition: spectralUtilsCUDA.h:314