问题
In the following example of adding vectors, why should add the statement (int tid = threadidx.x;)I know this is to determine the index of the thread, but I am not sure why the index should be assigned?
__global__ void add(int * dev_a, int* dev_b, int* dev_c){
int tid = threadIdx.x; //index of thread
if(tid<N)
dev_c[tid] = dev_a[tid] + dev_b[tid];
}
int main(){ // main
int a[N];
int b[N];
int c[N];
for(int i=0;i<N;i++)
a[i]=i;
for(int i=0;i<N;i++)
b[i]=i;
int* dev_a;
int* dev_b;
int* dev_c;
cudaMalloc(&dev_a, sizeof(int)*N);
cudaMalloc(&dev_b, sizeof(int)*N);
cudaMalloc(&dev_c, sizeof(int)*N);
cudaMemcpy(dev_a,a,sizeof(int)*N, cudaMemcpyHostToDevice);
cudaMemcpy(dev_b,b,sizeof(int)*N, cudaMemcpyHostToDevice);
add<<<1,N>>>(dev_a,dev_b,dev_c);
cudaMemcpy(c, dev_c, sizeof(int)*N, cudaMemcpyDeviceToHost);
for(auto x:c) std::cout<<x<<std::endl; //printf result
}
来源:https://stackoverflow.com/questions/62149638/why-does-cuda-calculate-each-index-in-vector-addition