Finding the maximum element value AND its position using CUDA Thrust

后端 未结 2 369
庸人自扰
庸人自扰 2020-12-28 08:38

How do I get not only the value but also the position of the maximum (minimum) element (res.val and res.pos)?

thrust::host_vector&l         


        
相关标签:
2条回答
  • 2020-12-28 08:58

    Don't use thrust::reduce. Use thrust::max_element (thrust::min_element) in thrust/extrema.h:

    thrust::host_vector<float> h_vec(100);
    thrust::generate(h_vec.begin(), h_vec.end(), rand);
    thrust::device_vector<float> d_vec = h_vec;
    
    thrust::device_vector<float>::iterator iter =
      thrust::max_element(d_vec.begin(), d_vec.end());
    
    unsigned int position = iter - d_vec.begin();
    float max_val = *iter;
    
    std::cout << "The maximum value is " << max_val << " at position " << position << std::endl;
    

    Be careful when passing an empty range to max_element -- you won't be able to safely dereference the result.

    0 讨论(0)
  • 2020-12-28 09:00

    Jared Hoberock has already satisfactorily answered this question. I want to provide below a slight change to account for the common case when the array has been allocated by cudaMalloc and not through a device_vector container.

    The idea is to wrap a device_pointer dev_ptr around the cudaMalloc'ed raw pointer, casting the output of min_element (I'm considering the minimum instead of the maximum without any loss of generality) to a device_pointer min_ptr and then finding the minimum value as min_ptr[0] and the position by &min_ptr[0] - &dev_ptr[0].

    #include "cuda_runtime.h"
    #include "device_launch_paraMeters.h"
    
    #include <thrust\device_vector.h>
    #include <thrust/extrema.h>
    
    /***********************/
    /* CUDA ERROR CHECKING */
    /***********************/
    #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
    inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
    {
       if (code != cudaSuccess) 
       {
          fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
          if (abort) exit(code);
       }
    }
    
    /********/
    /* MAIN */
    /********/
    int main() {
    
        srand(time(NULL));
    
        const int N = 10;
    
        float *h_vec = (float *)malloc(N * sizeof(float));
        for (int i=0; i<N; i++) {
            h_vec[i] = rand() / (float)(RAND_MAX);
            printf("h_vec[%i] = %f\n", i, h_vec[i]);
        }
    
        float *d_vec; gpuErrchk(cudaMalloc((void**)&d_vec, N * sizeof(float)));
        gpuErrchk(cudaMemcpy(d_vec, h_vec, N * sizeof(float), cudaMemcpyHostToDevice));
    
        thrust::device_ptr<float> dev_ptr = thrust::device_pointer_cast(d_vec);
    
        thrust::device_ptr<float> min_ptr = thrust::min_element(dev_ptr, dev_ptr + N);
    
        float min_value = min_ptr[0];
        printf("\nMininum value = %f\n", min_value);
        printf("Position = %i\n", &min_ptr[0] - &dev_ptr[0]);
    
    }
    
    0 讨论(0)
提交回复
热议问题