问题
I have been reading through many of the SO questions related to constant memory and I still don't understand why my program is not working. Overall it looks like follows
Common.cuh
__constant__ int numElements;
__global__
void kernelFunction();
Common.cu
#include "Common.cuh"
#include <stdio.h>
__global__
kernelFunction()
{
printf("NumElements = %d", numElements);
}
Test.cu
#include "Common.cuh"
int main()
{
int N = 100;
cudaMemcpyToSymbol(numElements,&N,sizeof(int));
kernelFunction<<<1,1>>>();
cudaDeviceSynchronize();
return 0;
}
It compiles with no error but when printing the value of numElements
I just get a random value. Can someone point me in the right direction to get to understand this?
回答1:
This line:
__constant__ int numElements;
has compilation unit scope. That means if you compile it into one module, and also into another module, the two modules will have different instantiations of numElements
in __constant__
memory.
The solution is to use separate compilation and linking, to device-link the two modules together, at which point the symbol will be resolved between the two modules by the device linker.
nvcc -arch=sm_20 -rdc=true -o test common.cu test.cu
example:
$ cat common.cuh
#ifndef COMMON_CU
extern __constant__ int numElements;
#endif
__global__
void kernelFunction();
$ cat common.cu
#define COMMON_CU
#include "common.cuh"
#include <stdio.h>
__constant__ int numElements;
__global__
void kernelFunction()
{
printf("NumElements = %d\n", numElements);
}
$ cat test.cu
#define TEST_CU
#include "common.cuh"
int main()
{
int N = 100;
cudaMemcpyToSymbol(numElements,&N,sizeof(int));
kernelFunction<<<1,1>>>();
cudaDeviceSynchronize();
return 0;
}
$ nvcc -arch=sm_20 -rdc=true -o test common.cu test.cu
$ ./test
NumElements = 100
$
来源:https://stackoverflow.com/questions/24617318/cuda-constant-memory-value-not-correct