How to asynchronously copy memory from the host to the device using thrust and CUDA streams

后端 未结 2 1567
[愿得一人]
[愿得一人] 2020-12-08 12:15

I would like to copy memory from the host to the device using thrust as in

thrust::host_vector h_vec(1 << 28);
thrust::device_vector

        
2条回答
  •  隐瞒了意图╮
    2020-12-08 13:06

    Here's a worked example using thrust::cuda::experimental::pinned_allocator:

    // Compile with:
    // nvcc --std=c++11 mem_async.cu -o mem_async
    
    #include 
    #include 
    #include 
    
    #include 
    #include 
    #include 
    #include 
    
    #define LEN 1024
    
    int main(int argc, char *argv[]) {
        thrust::host_vector> h_vec(LEN);
        thrust::device_vector d_vec(LEN);
    
        thrust::fill(d_vec.begin(), d_vec.end(), -1.0);
    
        cudaMemcpyAsync(thrust::raw_pointer_cast(h_vec.data()),
                        thrust::raw_pointer_cast(d_vec.data()),
                        d_vec.size()*sizeof(float),
                        cudaMemcpyDeviceToHost);
    
        // Comment out this line to see what happens.
        cudaDeviceSynchronize();
    
        std::cout << h_vec[0] << std::endl;
    }
    

    Comment out the synchronize step and you should get 0 printed to the console due to the async memory transfer.

提交回复
热议问题