Output of cuda program is not what was expected

前端 未结 3 1114
隐瞒了意图╮
隐瞒了意图╮ 2020-12-12 06:31
#include
#include
#include
#include


__global__ void setVal(char **c){

c[(blockIdx.y * gridDim.x         


        
3条回答
  •  情深已故
    2020-12-12 07:17

    Try this -- I tested it on a GTX 285 under CUDA 3.2 -- so it's a bit more restrictive than the current version, but it works.

    #include
    #include
    
    __global__ void setValues(char** word)
    {
        volatile char* myWord = word[blockIdx.x];
    
        myWord[0] = 'H';
        myWord[1] = 'o';
        myWord[2] = 'l';
        myWord[3] = 'a';
        myWord[4] = '\0';
    }
    
    int main()
    {
        const size_t bufferSize = 32;
        const int nObjects = 10;
    
        char*  h_x[nObjects];
        char** d_x = 0;
    
        cudaMalloc( (void**)(&d_x), nObjects * sizeof(char*) );
    
        for ( int i=0; i < nObjects; i++ )
        {
            h_x[i] = NULL;
            cudaMalloc( (void**)(&h_x[i]), bufferSize * sizeof(char) );
            printf("h_x[%d] = %lx\n",i,(unsigned long)h_x[i]);
        }
    
        cudaMemcpy( d_x, h_x, nObjects*sizeof(char*), cudaMemcpyHostToDevice);
        printf("Copied h_x[] to d_x[]\n");
    
        char msg[] = "Hello World!";
        cudaMemcpy( h_x[0], msg, 13*sizeof(char), cudaMemcpyHostToDevice );
    
        /*  Force Thread Synchronization  */
        cudaError err = cudaThreadSynchronize();
    
        /*  Check for and display Error  */
        if ( cudaSuccess != err )
        {
            fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
                    __FILE__, __LINE__, cudaGetErrorString( err) );
        }
    
        setValues<<>>(d_x);
    
        /*  Force Thread Synchronization  */
        err = cudaThreadSynchronize();
    
        /*  Check for and display Error  */
        if ( cudaSuccess != err )
        {
            fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
                    __FILE__, __LINE__, cudaGetErrorString( err) );
        }
    
        printf("Kernel Completed Successfully.  Woot.\n\n");
    
        char p[bufferSize];
    
        printf("d_x = %lx\n", (unsigned long)d_x );
        printf("h_x = %lx\n", (unsigned long)h_x );
    
        cudaMemcpy( h_x, d_x, nObjects*sizeof(char*), cudaMemcpyDeviceToHost);
    
        printf("d_x = %lx\n", (unsigned long)d_x );
        printf("h_x = %lx\n", (unsigned long)h_x );
    
        for ( int i=0; i < nObjects; i++ )
        {
            cudaMemcpy( &p, h_x[i], bufferSize*sizeof(char), cudaMemcpyDeviceToHost);
            printf("%d p[] = %s\n",i,p);
        }
    
        /*  Force Thread Synchronization  */
        err = cudaThreadSynchronize();
    
        /*  Check for and display Error  */
        if ( cudaSuccess != err )
        {
            fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
                    __FILE__, __LINE__, cudaGetErrorString( err) );
        }
    
        getchar();
    
        return 0;
    }
    

    As @Jon notes, you can't pass x (as you had declared) it to the GPU, because it's an address which lives on the CPU. In the code above, I create an array of char*'s and pass them to a char** which I also allocated on the GPU. Hope this helps!

提交回复
热议问题