How to quickly compact a sparse array with CUDA C?

人走茶凉 提交于 2019-12-04 11:04:07

Suppose I have:

int V[] = {1, 2, 0, 0, 5};

And my desired result is:

int R[] = {1, 2, 5}

In effect we are removing elements that are zero, or copying elements only if non-zero.

#include <thrust/device_ptr.h>
#include <thrust/copy.h>
#include <stdio.h>
#define SIZE 5

#define cudaCheckErrors(msg) \
    do { \
        cudaError_t __err = cudaGetLastError(); \
        if (__err != cudaSuccess) { \
            fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                msg, cudaGetErrorString(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING\n"); \
            exit(1); \
        } \
    } while (0)

  struct is_not_zero
  {
    __host__ __device__
    bool operator()(const int x)
    {
      return (x != 0);
    }
  };



int main(){

  int V[] = {1, 2, 0, 0, 5};
  int R[] = {0, 0, 0, 0, 0};
  int *d_V, *d_R;

  cudaMalloc((void **)&d_V, SIZE*sizeof(int));
  cudaCheckErrors("cudaMalloc1 fail");
  cudaMalloc((void **)&d_R, SIZE*sizeof(int));
  cudaCheckErrors("cudaMalloc2 fail");

  cudaMemcpy(d_V, V, SIZE*sizeof(int), cudaMemcpyHostToDevice);
  cudaCheckErrors("cudaMemcpy1 fail");

  thrust::device_ptr<int> dp_V(d_V);
  thrust::device_ptr<int> dp_R(d_R);
  thrust::copy_if(dp_V, dp_V + SIZE, dp_R, is_not_zero());

  cudaMemcpy(R, d_R, SIZE*sizeof(int), cudaMemcpyDeviceToHost);
  cudaCheckErrors("cudaMemcpy2 fail");

  for (int i = 0; i<3; i++)
    printf("R[%d]: %d\n", i, R[i]);

  return 0;


}

the struct defintion provides us with a functor that tests for zero elements. Note that in thrust, there are no kernels and we are not writing device code directly. All that happens behind the scenes. And I'd definitely suggest familiarizing yourself with the quick start guide, so as not to turn this question into a tutorial on thrust.

After reviewing the comments, I think this modified version of the code will work around the cuda 4.0 issues:

#include <thrust/device_ptr.h>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <stdio.h>
#define SIZE 5

  struct is_not_zero
  {
    __host__ __device__
    bool operator()(const int x)
    {
      return (x != 0);
    }
  };



int main(){

  int V[] = {1, 2, 0, 0, 5};
  int R[] = {0, 0, 0, 0, 0};

  thrust::host_vector<int> h_V(V, V+SIZE);
  thrust::device_vector<int> d_V = h_V;
  thrust::device_vector<int> d_R(SIZE, 0);

  thrust::copy_if(d_V.begin(), d_V.end(), d_R.begin(), is_not_zero());
  thrust::host_vector<int> h_R = d_R;

  thrust::copy(h_R.begin(), h_R.end(), R);

  for (int i = 0; i<3; i++)
    printf("R[%d]: %d\n", i, R[i]);

  return 0;


}
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!