How to pass a C++ class with array of pointers to CUDA?

前端未结

关注

 1  1286

To be more clear, what I want is passing the pointers and all the data they pointed to device. To test how I can achieve this goal, I wrote a simple class:


                      
              相关标签:


      
      
        
          1条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  醉酒成梦        
                
              
                            
                2020-12-15 02:53
              
            
            
                                                                       
Your code had several errors in it.  As I mentioned in the comments, one of the key errors is in how you are allocating memory for the data regions referenced by pointers within the class.  The key mistake there is that you are passing a pointer to cudaMalloc that already lives in device memory.  We can fix that by creating an extra set of pointers that we will use to allocate the needed device storage for the arrays that are pointed to within the class.   In addition there were a few other errors, such as the fact that you had no properly allocated device storage for dev_result.  The following code fixes all the errors I could find and I believe gives the correct result.  I've also added a reference form of cuda error checking that you may find useful to use in your projects:

#include <stdio.h>

#define N 2
#define cudaCheckErrors(msg) \
    do { \
        cudaError_t __err = cudaGetLastError(); \
        if (__err != cudaSuccess) { \
            fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                msg, cudaGetErrorString(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING\n"); \
            exit(1); \
        } \
    } while (0)

using namespace std;

class vecarray{
    public:
        int *vecptr[N];                //array of pointers pointing to array
        int dim[N];                     //store length of each array pointed to

        __device__ __host__ vecarray(); //constructor
        __device__ __host__ int sum();  //sum up all the elements in the array being
                                       //pointed to
};

vecarray::vecarray(){
    for(int i = 0; i<N; i++)
    {
        vecptr[i] = NULL;
        dim[i] = 0;
    }
}

__device__ __host__ int vecarray::sum(){
    int i=0, j=0, s=0;
    for (i=0; i<N; i++)
        for(j=0; j < dim[i]; j++)
            s += vecptr[i][j];
    return s;
}

__global__ void addvecarray( vecarray * v, int *s){
    *s = v->sum();
}

int main(){                                 //copy *V to device, do sum() and pass back
    vecarray *v, *dev_v;                    //the result by dev_v
    v = new vecarray;
    int a[3] = {1,2,3};                     //initialize v manually
    int b[4] = {4,5,6,7};
    int result = 0;
    int *dev_result;
    v->vecptr[0] = a;
    v->vecptr[1] = b;
    v->dim[0] = 3; v->dim[1] = 4;
    int *vptr[N];

    cudaMalloc((void**)&dev_v, sizeof(vecarray));
    cudaCheckErrors("cudaMalloc1 fail");
    cudaMemcpy(dev_v, v, sizeof(vecarray),cudaMemcpyHostToDevice); //copy class object
    cudaCheckErrors("cudaMemcpy1 fail");

    for(int i = 0; i < N; i++){
        cudaMalloc((void**)&(vptr[i]), v->dim[i]*sizeof(int));
        cudaCheckErrors("cudaMalloc2 fail");
        cudaMemcpy(&(dev_v->vecptr[i]), &vptr[i], sizeof(int*), cudaMemcpyHostToDevice);
        cudaCheckErrors("cudaMemcpy2 fail");
    }

    for(int i = 0; i<N; i++ ){                   //copy arrays
        cudaMemcpy(vptr[i], v->vecptr[i], v->dim[i]*sizeof(int), cudaMemcpyHostToDevice);
        cudaCheckErrors("cudaMemcpy3 fail");
    }
    cudaMalloc((void **)&dev_result, sizeof(int));
    cudaCheckErrors("cudaMalloc3 fail");
    addvecarray<<<1,1>>>(dev_v, dev_result);

    cudaMemcpy(&result, dev_result, sizeof(int), cudaMemcpyDeviceToHost);
    cudaCheckErrors("cudaMemcpy4 fail");
    printf("the result is %d\n", result);
    return 0;
}

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复