I need to generate a vector with random numbers between 0.0 and 1.0 using Thrust. The only documented example I could find produces ve
There are already satisfactory answers to this questions. In particular, the OP and Robert Crovella have dealt with thrust::generate while talonmies has proposed using thrust::transform.
I think there is another possibility, namely, using thrust::for_each, so I'm posting a fully worked example using such a primitive, just for the record.
I'm also timing the different solutions.
THE CODE
#include
#include
#include
#include
#include
#include
#include "TimingCPU.h"
/**************************************************/
/* RANDOM NUMBERS GENERATION STRUCTS AND FUNCTION */
/**************************************************/
template
struct rand_01 {
__host__ T operator()(T& VecElem) const { return (T)rand() / RAND_MAX; }
};
template
struct rand_01_for_each {
__host__ void operator()(T& VecElem) const { VecElem = (T)rand() / RAND_MAX; }
};
template
__host__ T rand_01_fcn() { return ((T)rand() / RAND_MAX); }
struct prg
{
float a, b;
__host__ __device__
prg(float _a = 0.f, float _b = 1.f) : a(_a), b(_b) {};
__host__ __device__
float operator()(const unsigned int n) const
{
thrust::default_random_engine rng;
thrust::uniform_real_distribution dist(a, b);
rng.discard(n);
return dist(rng);
}
};
/********/
/* MAIN */
/********/
int main() {
TimingCPU timerCPU;
const int N = 2 << 18;
//const int N = 64;
const int numIters = 50;
thrust::host_vector h_v1(N);
thrust::host_vector h_v2(N);
thrust::host_vector h_v3(N);
thrust::host_vector h_v4(N);
printf("N = %d\n", N);
double timing = 0.;
for (int k = 0; k < numIters; k++) {
timerCPU.StartCounter();
thrust::transform(thrust::host, h_v1.begin(), h_v1.end(), h_v1.begin(), rand_01());
timing = timing + timerCPU.GetCounter();
}
printf("Timing using transform = %f\n", timing / numIters);
timing = 0.;
for (int k = 0; k < numIters; k++) {
timerCPU.StartCounter();
thrust::counting_iterator index_sequence_begin(0);
thrust::transform(index_sequence_begin,
index_sequence_begin + N,
h_v2.begin(),
prg(0.f, 1.f));
timing = timing + timerCPU.GetCounter();
}
printf("Timing using transform and internal Thrust random generator = %f\n", timing / numIters);
timing = 0.;
for (int k = 0; k < numIters; k++) {
timerCPU.StartCounter();
thrust::for_each(h_v3.begin(), h_v3.end(), rand_01_for_each());
timing = timing + timerCPU.GetCounter();
}
timerCPU.StartCounter();
printf("Timing using for_each = %f\n", timing / numIters);
//std::cout << "Values generated: " << std::endl;
//for (int k = 0; k < N; k++)
// std::cout << h_v3[k] << " : ";
//std::cout << std::endl;
timing = 0.;
for (int k = 0; k < numIters; k++) {
timerCPU.StartCounter();
thrust::generate(h_v4.begin(), h_v4.end(), rand_01_fcn);
timing = timing + timerCPU.GetCounter();
}
timerCPU.StartCounter();
printf("Timing using generate = %f\n", timing / numIters);
//std::cout << "Values generated: " << std::endl;
//for (int k = 0; k < N; k++)
// std::cout << h_v4[k] << " : ";
//std::cout << std::endl;
//std::cout << "Values generated: " << std::endl;
//for (int k = 0; k < N * 2; k++)
// std::cout << h_v[k] << " : ";
//std::cout << std::endl;
return 0;
}
On a laptop Core i5 platform, I had the following timings
N = 2097152
Timing using transform = 33.202298
Timing using transform and internal Thrust random generator = 264.508662
Timing using for_each = 33.155237
Timing using generate = 35.309399
The timings are equivalent, apart from the second one which uses Thrust's internal random number generator instead of rand().
Please, note that, differently from the other solutions, the one thrust::generate is somewhat more rigid since the function used to generate the random numbers cannot have input parameters. So, for example, it is not possible to scale the input arguments by a constant.