CUDA Thrust: reduce_by_key on only some values in an array, based off values in a “key” array

后端 未结 2 999
無奈伤痛
無奈伤痛 2020-12-09 14:22

Let\'s say I have two device_vector arrays, d_keys and d_data.

If d_data is, for example, a flattened 2D 3x5 array

2条回答
  •  被撕碎了的回忆
    2020-12-09 14:40

    Based on the additional comment that instead of 3 rows there are thousands of rows, we can write a transform functor that sums an entire row. Based on the fact that there are thousands of rows, this should keep the machine pretty busy:

    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    
    #define ROW   20
    #define COL   10
    
    __device__ int *vals;
    __device__ int *keys;
    
    struct test_functor
    {
      const int a;
    
      test_functor(int _a) : a(_a) {}
    
      __device__
      int operator()(int& x, int& y ) {
        int temp = 0;
        for (int i = 0; i h_vals(ROW*COL);
      thrust::host_vector h_keys(COL);
      thrust::sequence(h_vals.begin(), h_vals.end());
      thrust::fill(h_keys.begin(), h_keys.end(), 1);
      h_keys[0] = 0;
      thrust::device_vector d_vals = h_vals;
      thrust::device_vector d_keys = h_keys;
      thrust::device_vector d_sums(ROW);
      thrust::fill(d_sums.begin(), d_sums.end(), 0);
      s_vals = thrust::raw_pointer_cast(&d_vals[0]);
      s_keys = thrust::raw_pointer_cast(&d_keys[0]);
      cudaMemcpyToSymbol(vals, &s_vals, sizeof(int *));
      cudaMemcpyToSymbol(keys, &s_keys, sizeof(int *));
      thrust::device_vector d_idx(ROW);
      thrust::sequence(d_idx.begin(), d_idx.end());
      thrust::transform(d_sums.begin(), d_sums.end(), d_idx.begin(),  d_sums.begin(), test_functor(COL));
      thrust::host_vector h_sums = d_sums;
      std::cout << "Results :" << std::endl;
      for (unsigned i = 0; i
                                                            
提交回复
热议问题