Replicate a vector multiple times using CUDA Thrust

前端未结
关注
 3  1401
广开言路 2020-12-17 05:36
I am trying to solve a problem using CUDA Thrust.
I have a host array with 3 elements. Is it possible, using Thrust, to create a device array of 3

      
      
        
          3条回答        

        
                    
            
            
                         
                
              
              
                
                   没有蜡笔的小新
                                             
                
                
                (楼主)
            
              
              
                2020-12-17 05:56
              

            
            
                        
As an apparently simpler alternative to using CUDA Thrust, I'm posting below a worked example implementing in CUDA the classical Matlab's meshgrid function.

In Matlab

x = [1 2 3];
y = [4 5 6 7];
[X, Y] = meshgrid(x, y);


produces

X =

     1     2     3
     1     2     3
     1     2     3
     1     2     3


and

Y =

     4     4     4
     5     5     5
     6     6     6
     7     7     7


X is exactly the four-fold replication of the x array, which is the OP's question and first guess of Robert Crovella's answer, while Y is the three-fold consecutive replication of each element of the y array, which is the second guess of Robert Crovella's answer.

Here is the code:

#include 

#include 

#include "Utilities.cuh"

#define BLOCKSIZE_MESHGRID_X    16
#define BLOCKSIZE_MESHGRID_Y    16

#define DEBUG

/*******************/
/* MESHGRID KERNEL */
/*******************/
template 
__global__ void meshgrid_kernel(const T * __restrict__ x, size_t Nx, const float * __restrict__ y, size_t Ny, T * __restrict__ X, T * __restrict__ Y) 
{
    unsigned int tidx = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int tidy = blockIdx.y * blockDim.y + threadIdx.y;

    if ((tidx < Nx) && (tidy < Ny)) {   
        X[tidy * Nx + tidx] = x[tidx];
        Y[tidy * Nx + tidx] = y[tidy];
    }
}

/************/
/* MESHGRID */
/************/
template 
thrust::pair meshgrid(const T *x, const unsigned int Nx, const T *y, const unsigned int Ny) {

    T *X; gpuErrchk(cudaMalloc((void**)&X, Nx * Ny * sizeof(T)));
    T *Y; gpuErrchk(cudaMalloc((void**)&Y, Nx * Ny * sizeof(T)));

    dim3 BlockSize(BLOCKSIZE_MESHGRID_X, BLOCKSIZE_MESHGRID_Y);
    dim3 GridSize (iDivUp(Nx, BLOCKSIZE_MESHGRID_X), iDivUp(BLOCKSIZE_MESHGRID_Y, BLOCKSIZE_MESHGRID_Y));

    meshgrid_kernel<<>>(x, Nx, y, Ny, X, Y);
#ifdef DEBUG
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());
#endif

    return thrust::make_pair(X, Y);
}

/********/
/* MAIN */
/********/
int main()
{
    const int Nx = 3;
    const int Ny = 4;

    float *h_x = (float *)malloc(Nx * sizeof(float));
    float *h_y = (float *)malloc(Ny * sizeof(float));

    float *h_X = (float *)malloc(Nx * Ny * sizeof(float));
    float *h_Y = (float *)malloc(Nx * Ny * sizeof(float));

    for (int i = 0; i < Nx; i++) h_x[i] = i;
    for (int i = 0; i < Ny; i++) h_y[i] = i + 4.f;

    float *d_x; gpuErrchk(cudaMalloc(&d_x, Nx * sizeof(float)));
    float *d_y; gpuErrchk(cudaMalloc(&d_y, Ny * sizeof(float)));

    gpuErrchk(cudaMemcpy(d_x, h_x, Nx * sizeof(float), cudaMemcpyHostToDevice));
    gpuErrchk(cudaMemcpy(d_y, h_y, Ny * sizeof(float), cudaMemcpyHostToDevice));

    thrust::pair meshgrid_pointers = meshgrid(d_x, Nx, d_y, Ny);
    float *d_X = (float *)meshgrid_pointers.first;
    float *d_Y = (float *)meshgrid_pointers.second;

    gpuErrchk(cudaMemcpy(h_X, d_X, Nx * Ny * sizeof(float), cudaMemcpyDeviceToHost));
    gpuErrchk(cudaMemcpy(h_Y, d_Y, Nx * Ny * sizeof(float), cudaMemcpyDeviceToHost));

    for (int j = 0; j < Ny; j++) {
        for (int i = 0; i < Nx; i++) {
            printf("i = %i; j = %i; x = %f; y = %f\n", i, j, h_X[j * Nx + i], h_Y[j * Nx + i]);
        }
    }

    return 0;

}

    
             
                                                        
            
            
              
                
                0
              
                   
                
               讨论(0)
              
                                                  
              
              
                          
             
       
          
              
                                       
     查看其它3个回答


            
                         
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
                              			
        
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复