Generating random number within Cuda kernel in a varying range

前端 未结 2 1524
天涯浪人
天涯浪人 2020-12-16 07:59

I am trying to generate random number random numbers within the cuda kernel. I wish to generate the random numbers from uniform distribution and in the integer form, startin

2条回答
  •  旧时难觅i
    2020-12-16 08:32

    EDIT: I've edited my answer to fix some of the deficiencies pointed out in the other answers (@tudorturcu) and comments.

    1. Use CURAND to generate a uniform distribution between 0.0 and 1.0. Note: 1.0 is included and 0.0 is excluded
    2. Then multiply this by the desired range (largest value - smallest value + 0.999999).
    3. Then add the offset (+ smallest value).
    4. Then truncate to an integer.

    Something like this in your device code:

    int idx = threadIdx.x+blockDim.x*blockIdx.x;
    // assume have already set up curand and generated state for each thread...
    // assume ranges vary by thread index
    float myrandf = curand_uniform(&(my_curandstate[idx]));
    myrandf *= (max_rand_int[idx] - min_rand_int[idx] + 0.999999);
    myrandf += min_rand_int[idx];
    int myrand = (int)truncf(myrandf);
    

    You should:

    #include 
    

    for truncf

    Here's a fully worked example:

    $ cat t527.cu
    #include 
    #include 
    #include 
    #include 
    #include 
    #define MIN 2
    #define MAX 7
    #define ITER 10000000
    
    __global__ void setup_kernel(curandState *state){
    
      int idx = threadIdx.x+blockDim.x*blockIdx.x;
      curand_init(1234, idx, 0, &state[idx]);
    }
    
    __global__ void generate_kernel(curandState *my_curandstate, const unsigned int n, const unsigned *max_rand_int, const unsigned *min_rand_int,  unsigned int *result){
    
      int idx = threadIdx.x + blockDim.x*blockIdx.x;
    
      int count = 0;
      while (count < n){
        float myrandf = curand_uniform(my_curandstate+idx);
        myrandf *= (max_rand_int[idx] - min_rand_int[idx]+0.999999);
        myrandf += min_rand_int[idx];
        int myrand = (int)truncf(myrandf);
    
        assert(myrand <= max_rand_int[idx]);
        assert(myrand >= min_rand_int[idx]);
        result[myrand-min_rand_int[idx]]++;
        count++;}
    }
    
    int main(){
    
      curandState *d_state;
      cudaMalloc(&d_state, sizeof(curandState));
      unsigned *d_result, *h_result;
      unsigned *d_max_rand_int, *h_max_rand_int, *d_min_rand_int, *h_min_rand_int;
      cudaMalloc(&d_result, (MAX-MIN+1) * sizeof(unsigned));
      h_result = (unsigned *)malloc((MAX-MIN+1)*sizeof(unsigned));
      cudaMalloc(&d_max_rand_int, sizeof(unsigned));
      h_max_rand_int = (unsigned *)malloc(sizeof(unsigned));
      cudaMalloc(&d_min_rand_int, sizeof(unsigned));
      h_min_rand_int = (unsigned *)malloc(sizeof(unsigned));
      cudaMemset(d_result, 0, (MAX-MIN+1)*sizeof(unsigned));
      setup_kernel<<<1,1>>>(d_state);
    
      *h_max_rand_int = MAX;
      *h_min_rand_int = MIN;
      cudaMemcpy(d_max_rand_int, h_max_rand_int, sizeof(unsigned), cudaMemcpyHostToDevice);
      cudaMemcpy(d_min_rand_int, h_min_rand_int, sizeof(unsigned), cudaMemcpyHostToDevice);
      generate_kernel<<<1,1>>>(d_state, ITER, d_max_rand_int, d_min_rand_int, d_result);
      cudaMemcpy(h_result, d_result, (MAX-MIN+1) * sizeof(unsigned), cudaMemcpyDeviceToHost);
      printf("Bin:    Count: \n");
      for (int i = MIN; i <= MAX; i++)
        printf("%d    %d\n", i, h_result[i-MIN]);
    
      return 0;
    }
    
    
    $ nvcc -arch=sm_20 -o t527 t527.cu -lcurand
    $ cuda-memcheck ./t527
    ========= CUDA-MEMCHECK
    Bin:    Count:
    2    1665496
    3    1668130
    4    1667644
    5    1667435
    6    1665026
    7    1666269
    ========= ERROR SUMMARY: 0 errors
    $
    

提交回复
热议问题