问题
Please look at the below code which does a simple char assignment
__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){
char* temp;
bool found = false;
for(int i = 0 ; i < 11; i++){
if(gpuHello[i] == ' '){
temp = &gpuHello[i+1];
found = true;
break;
}
}
bool sth = found;
finalPoint = temp;
}
int main()
{
// Testing one concept;
string hello = "Hello World";
char* gpuHello;
cudaMalloc((void**)&gpuHello, 11 * sizeof(char));
cudaMemcpy(gpuHello, hello.c_str(), 11 * sizeof(char), cudaMemcpyHostToDevice);
char* didItFind;
char* whatIsIt = (char*)malloc(5 * sizeof(char));
seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
cudaMemcpy(whatIsIt,didItFind, 5 * sizeof(char), cudaMemcpyDeviceToHost);
cout<<"The pointer points to : " << whatIsIt;
return 0;
}
I really dont understand that when i print whatIsIt
, why does it not print "World" as the answer but just prints some random string.
EDIT Update version after accouting for null characters as pointed out
__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){
char* temp;
bool found = false;
for(int i = 0 ; i < 11; i++){
if(gpuHello[i] == ' '){
temp = gpuHello;
found = true;
break;
}
}
bool sth = found;
finalPoint = temp;
}
int main()
{
// Testing one concept;
string hello = "Hello World";
char* gpuHello;
cudaMalloc((void**)&gpuHello, 12 * sizeof(char));
cudaMemcpy(gpuHello, hello.c_str(), 12 * sizeof(char), cudaMemcpyHostToDevice);
char* didItFind;
char* whatIsIt = (char*)malloc(6 * sizeof(char));
seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
cudaMemcpy(whatIsIt,didItFind, 6 * sizeof(char), cudaMemcpyDeviceToHost);
cout<<"The pointer points to : " << whatIsIt;
return 0;
}
回答1:
You must pass finalPoint
by reference, not by value if you want to have the kernel operate the way you have defined it. Perhaps something like this:
#include <cstdio>
#include <iostream>
#include <string>
using namespace std;
__global__ void seehowpointerwork(char * gpuHello, char ** finalPoint){
char* temp;
for(int i = 0 ; i < 11; i++){
if(gpuHello[i] == ' '){
temp = &gpuHello[i+1];
break;
}
}
*finalPoint = temp;
}
inline void gpuAssert(cudaError_t code, char *file, int line,
bool abort=true)
{
if (code != cudaSuccess) {
printf("GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
int main()
{
string hello = "Hello World";
char* gpuHello;
gpuErrchk( cudaMalloc((void**)&gpuHello, 11 * sizeof(char)) );
gpuErrchk( cudaMemcpy(gpuHello, hello.data(), 11 * sizeof(char), cudaMemcpyHostToDevice) );
char ** didItFinda, * didItFindb;
gpuErrchk( cudaMalloc((void **)&didItFinda, sizeof(char *)) );
char* whatIsIt = (char*)malloc(5 * sizeof(char));
seehowpointerwork<<<1,1>>>(gpuHello, didItFinda);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaMemcpy(&didItFindb, didItFinda, sizeof(char *), cudaMemcpyDeviceToHost) );
gpuErrchk( cudaMemcpy(whatIsIt, didItFindb, 5 * sizeof(char), cudaMemcpyDeviceToHost) );
cout<<"The pointer points to : " << whatIsIt << endl;
return 0;
}
When compiled and run, this version produces:
$ nvcc -arch=sm_12 -Xptxas="-v" programmer.cu
ptxas info : Compiling entry function '_Z17seehowpointerworkPcPS_' for 'sm_12'
ptxas info : Used 4 registers, 8+16 bytes smem, 8 bytes cmem[1]
$ ./a.out
The pointer points to : World
As it stands, the device to host copy will be failing, because didItFind
is not a valid device pointer - you passed it by value to the kernel, so its value on the host cannot be modified by the kernel. The code above contains sufficient error checking to find this sort of problem - you should always check the return status of every API call.
来源:https://stackoverflow.com/questions/11487012/simple-char-assignment-not-working-in-cuda