How to pass a C++ class with array of pointers to CUDA?

前端 未结 1 1286
情歌与酒
情歌与酒 2020-12-15 02:15

To be more clear, what I want is passing the pointers and all the data they pointed to device. To test how I can achieve this goal, I wrote a simple class:

c         


        
相关标签:
1条回答
  • 2020-12-15 02:53

    Your code had several errors in it. As I mentioned in the comments, one of the key errors is in how you are allocating memory for the data regions referenced by pointers within the class. The key mistake there is that you are passing a pointer to cudaMalloc that already lives in device memory. We can fix that by creating an extra set of pointers that we will use to allocate the needed device storage for the arrays that are pointed to within the class. In addition there were a few other errors, such as the fact that you had no properly allocated device storage for dev_result. The following code fixes all the errors I could find and I believe gives the correct result. I've also added a reference form of cuda error checking that you may find useful to use in your projects:

    #include <stdio.h>
    
    #define N 2
    #define cudaCheckErrors(msg) \
        do { \
            cudaError_t __err = cudaGetLastError(); \
            if (__err != cudaSuccess) { \
                fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                    msg, cudaGetErrorString(__err), \
                    __FILE__, __LINE__); \
                fprintf(stderr, "*** FAILED - ABORTING\n"); \
                exit(1); \
            } \
        } while (0)
    
    using namespace std;
    
    class vecarray{
        public:
            int *vecptr[N];                //array of pointers pointing to array
            int dim[N];                     //store length of each array pointed to
    
            __device__ __host__ vecarray(); //constructor
            __device__ __host__ int sum();  //sum up all the elements in the array being
                                           //pointed to
    };
    
    vecarray::vecarray(){
        for(int i = 0; i<N; i++)
        {
            vecptr[i] = NULL;
            dim[i] = 0;
        }
    }
    
    __device__ __host__ int vecarray::sum(){
        int i=0, j=0, s=0;
        for (i=0; i<N; i++)
            for(j=0; j < dim[i]; j++)
                s += vecptr[i][j];
        return s;
    }
    
    __global__ void addvecarray( vecarray * v, int *s){
        *s = v->sum();
    }
    
    int main(){                                 //copy *V to device, do sum() and pass back
        vecarray *v, *dev_v;                    //the result by dev_v
        v = new vecarray;
        int a[3] = {1,2,3};                     //initialize v manually
        int b[4] = {4,5,6,7};
        int result = 0;
        int *dev_result;
        v->vecptr[0] = a;
        v->vecptr[1] = b;
        v->dim[0] = 3; v->dim[1] = 4;
        int *vptr[N];
    
        cudaMalloc((void**)&dev_v, sizeof(vecarray));
        cudaCheckErrors("cudaMalloc1 fail");
        cudaMemcpy(dev_v, v, sizeof(vecarray),cudaMemcpyHostToDevice); //copy class object
        cudaCheckErrors("cudaMemcpy1 fail");
    
        for(int i = 0; i < N; i++){
            cudaMalloc((void**)&(vptr[i]), v->dim[i]*sizeof(int));
            cudaCheckErrors("cudaMalloc2 fail");
            cudaMemcpy(&(dev_v->vecptr[i]), &vptr[i], sizeof(int*), cudaMemcpyHostToDevice);
            cudaCheckErrors("cudaMemcpy2 fail");
        }
    
        for(int i = 0; i<N; i++ ){                   //copy arrays
            cudaMemcpy(vptr[i], v->vecptr[i], v->dim[i]*sizeof(int), cudaMemcpyHostToDevice);
            cudaCheckErrors("cudaMemcpy3 fail");
        }
        cudaMalloc((void **)&dev_result, sizeof(int));
        cudaCheckErrors("cudaMalloc3 fail");
        addvecarray<<<1,1>>>(dev_v, dev_result);
    
        cudaMemcpy(&result, dev_result, sizeof(int), cudaMemcpyDeviceToHost);
        cudaCheckErrors("cudaMemcpy4 fail");
        printf("the result is %d\n", result);
        return 0;
    }
    
    0 讨论(0)
提交回复
热议问题