I would like to copy memory from the host to the device using thrust as in
thrust::host_vector h_vec(1 << 28);
thrust::device_vector
Here's a worked example using thrust::cuda::experimental::pinned_allocator:
// Compile with:
// nvcc --std=c++11 mem_async.cu -o mem_async
#include
#include
#include
#include
#include
#include
#include
#define LEN 1024
int main(int argc, char *argv[]) {
thrust::host_vector> h_vec(LEN);
thrust::device_vector d_vec(LEN);
thrust::fill(d_vec.begin(), d_vec.end(), -1.0);
cudaMemcpyAsync(thrust::raw_pointer_cast(h_vec.data()),
thrust::raw_pointer_cast(d_vec.data()),
d_vec.size()*sizeof(float),
cudaMemcpyDeviceToHost);
// Comment out this line to see what happens.
cudaDeviceSynchronize();
std::cout << h_vec[0] << std::endl;
}
Comment out the synchronize step and you should get 0 printed to the console due to the async memory transfer.