Sorting Pixels from opengl using CUDA and Thrust (Windows Port Issues…)

百般思念 提交于 2019-11-28 12:51:13

问题


I tried to port this example to WINDOWS with GLFW, since I don't have access to Linux box .. but the only thing I get is the clear color and nothing comes up ..

Did others get this example to work / Did I miss something here?

I do not even get the original image, before the sort either ...

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 

#include <thrust/device_vector.h>
#include <thrust/sort.h>

#include <GL/glew.h>

#include <GL/glfw.h>
#include <cuda_gl_interop.h> 


const int WIDTH=800;  
const int HEIGHT=800;
const int DIM = 800;

GLuint  bufferObj;
cudaGraphicsResource *resource;



struct sort_functor
{
    __host__ __device__
        bool operator()(uchar4 left, uchar4 right) const
    {
        return (left.y < right.y);
    }
};



// create a green/black pattern
__global__ void kernel(uchar4 *ptr) {
    // map from threadIdx/BlockIdx to pixel position 
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;

    // now calculate the value at that position 
    float fx = x / (float)DIM - 0.5f;
    float fy = y / (float)DIM - 0.5f;
    unsigned char   green = 128 + 127 * sin(abs(fx * 100) - abs(fy * 100));

    // accessing uchar4 vs unsigned char* 
    ptr[offset].x = 0;
    ptr[offset].y = green;
    ptr[offset].z = 0;
    ptr[offset].w = 255;
}


static void sort_pixels(){
    cudaGraphicsMapResources(1, &resource, NULL);
    uchar4* devPtr;
    size_t  size;

    cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &size, resource);

    thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
    thrust::sort(tptr, tptr + (DIM*DIM), sort_functor());
    cudaGraphicsUnmapResources(1, &resource, NULL);

}


void GLFWCALL Keyboard_Callback(int key, int action)
{
    if (key == 32 && action == GLFW_PRESS)
        sort_pixels();

    return;
}



int main ()
{

    if( !glfwInit() )
    {
        fprintf( stderr, "Failed to initialize GLFW\n" );
        return -1;
    }

    glfwOpenWindowHint(GLFW_FSAA_SAMPLES, 4);
    glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 3);
    glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 3);
    glfwOpenWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);

    if( !glfwOpenWindow( WIDTH, HEIGHT, 0,0,0,0, 32,0, GLFW_WINDOW ) )
    {
        fprintf( stderr, "Failed to open GLFW window. If you have an Intel GPU, they are not 3.3 compatible.\n" );
        glfwTerminate();
        return -1;
    }

    glewExperimental = GL_TRUE; 
    if (glewInit() != GLEW_OK) {
        fprintf(stderr, "Failed to initialize GLEW\n");
        return -1;
    }

    glfwSetWindowTitle( "Sort Test" );
    glfwEnable( GLFW_STICKY_KEYS );

    glEnable(GL_DEPTH_TEST);
    glDepthFunc(GL_LEQUAL);
    glClearColor(0.087, 0.087, 0.087, 1.0);

    glfwSetKeyCallback(Keyboard_Callback);



    // SORT CODE
    glGenBuffers(1, &bufferObj);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bufferObj);
    glBufferData(GL_PIXEL_UNPACK_BUFFER, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW);

    cudaGraphicsGLRegisterBuffer(&resource, bufferObj, cudaGraphicsMapFlagsNone);

    cudaGraphicsMapResources(1, &resource, NULL);
    uchar4* devPtr;
    size_t  size;
    cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &size, resource);

    dim3    grid(DIM / 16, DIM / 16);
    dim3    threads(16, 16);

    kernel << <grid, threads >> >(devPtr);

    cudaGraphicsUnmapResources(1, &resource, NULL);



    do{ 
        glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);
        glEnable (GL_BLEND);
        glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);

        glDrawPixels(DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0);

        glfwSwapBuffers();


    } // Check if the ESC key was pressed or the window was closed
    while( glfwGetKey( GLFW_KEY_ESC ) != GLFW_PRESS &&
           glfwGetWindowParam( GLFW_OPENED ) );

    // Close OpenGL window and terminate GLFW
    glfwTerminate();

    return 0;

}

Edit:

I added :

static void HandleError(cudaError_t err, int line) {
    if (err != cudaSuccess) {
        printf("%s in %s at line %d\n", cudaGetErrorString(err), "main", line);
        exit(EXIT_FAILURE);
    }
    std::cout << "What happened : " << line << " " << cudaGetErrorString(err)<< std::endl;
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) 

with and example call:

HandleError(cudaGraphicsGLRegisterBuffer(&resource, bufferObj, cudaGraphicsMapFlagsNone), 134);

Per the comments, no CUDA errors? so does that mean there is something wrong with the binding of the PBO or draw commands?


回答1:


Here's a version of the program that seems to work for me under windows.

Rather than going through the details of project setup, just drop this code in the simpleGL cuda sample code, in place of the code that is there, and rebuild that project.

windows version:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// OpenGL Graphics includes
#include <GL/glew.h>
#include <GL/freeglut.h>
#include <cuda_gl_interop.h>

#include <thrust/device_ptr.h>
#include <thrust/sort.h>


static void HandleError( cudaError_t err, const char *file,  int line ) {
    if (err != cudaSuccess) {
            printf( "%s in %s at line %d\n", cudaGetErrorString( err ),  file, line );
            exit( EXIT_FAILURE );
    }
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))

#define     DIM    512

GLuint  bufferObj;
cudaGraphicsResource *resource;

struct sort_functor
{
  __host__ __device__
    bool operator()(uchar4 left, uchar4 right) const
    {
      return (left.y < right.y);
    }
};



// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
  int x = threadIdx.x + blockIdx.x * blockDim.x;
  int y = threadIdx.y + blockIdx.y * blockDim.y;
  int offset = x + y * blockDim.x * gridDim.x;

// now calculate the value at that position
  float fx = x/(float)DIM - 0.5f;
  float fy = y/(float)DIM - 0.5f;
  unsigned char   green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );

// accessing uchar4 vs unsigned char*
  ptr[offset].x = 0;
  ptr[offset].y = green;
  ptr[offset].z = 0;
  ptr[offset].w = 255;
}

static void draw_func( void ) {

  glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
  glutSwapBuffers();
}
static void sort_pixels(){
  cudaGraphicsMapResources( 1, &resource, NULL );
  uchar4* devPtr;
  size_t  size;

  cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);

  thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
  thrust::sort(tptr, tptr+(DIM*DIM), sort_functor());
  cudaGraphicsUnmapResources( 1, &resource, NULL );
  draw_func();
}

static void key_func( unsigned char key, int x, int y ) {
  switch (key) {
    case 27:
        HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
        glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
        glDeleteBuffers( 1, &bufferObj );
        exit(0);
        break;
    case 32:
        sort_pixels();
        break;
    default:
        break;
  }
}

int main(int argc, char *argv[]) {

  cudaGLSetGLDevice( 0 );

  glutInit( &argc, argv );
  glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
  glutInitWindowSize( DIM, DIM );
  glutCreateWindow( "sort test" );
  glewInit();
  glGenBuffers( 1, &bufferObj );
  glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
  glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );

  cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );


  cudaGraphicsMapResources( 1, &resource, NULL );
  uchar4* devPtr;
  size_t  size;

  cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
  dim3    grid(DIM/16,DIM/16);
  dim3    threads(16,16);
  kernel<<<grid,threads>>>( devPtr );
  cudaGraphicsUnmapResources( 1, &resource, NULL );

// set up GLUT and kick off main loop
  glutKeyboardFunc( key_func );
  glutDisplayFunc( draw_func );
  glutMainLoop();
}

The previous linux version probably has some cruft in it. It wasn't intended to be a paragon of programming, just something to demonstrate the concept, so I picked up a previous example I had laying around. This version is probably a little bit "cleaner".

Also note that the above code works as-is (for me, anyway) on linux if I compile with:

nvcc -arch=sm_20 -o ogltest3 ogltest3.cu -lglut -lGLEW

Here's an updated version, that works on windows or linux, and does not use glDrawPixels:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// OpenGL Graphics includes
#include <GL/glew.h>
#include <GL/freeglut.h>
#include <cuda_gl_interop.h>

#include <thrust/device_ptr.h>
#include <thrust/sort.h>


static void HandleError( cudaError_t err, const char *file,  int line ) {
    if (err != cudaSuccess) {
            printf( "%s in %s at line %d\n", cudaGetErrorString( err ),  file, line );
            exit( EXIT_FAILURE );
    }
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))

#define     DIM    512

GLuint  bufferObj;
GLuint  textureID;
cudaGraphicsResource *resource;

struct sort_functor
{
  __host__ __device__
    bool operator()(uchar4 left, uchar4 right) const
    {
      return (left.y < right.y);
    }
};



// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
  int x = threadIdx.x + blockIdx.x * blockDim.x;
  int y = threadIdx.y + blockIdx.y * blockDim.y;
  int offset = x + y * blockDim.x * gridDim.x;

// now calculate the value at that position
  float fx = x/(float)DIM - 0.5f;
  float fy = y/(float)DIM - 0.5f;
  unsigned char   green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );

// accessing uchar4 vs unsigned char*
  ptr[offset].x = 0;
  ptr[offset].y = green;
  ptr[offset].z = 0;
  ptr[offset].w = 255;
}

static void draw_func( void ) {

//  glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );

  glutSwapBuffers();
}
static void sort_pixels(){
  cudaGraphicsMapResources( 1, &resource, NULL );
  uchar4* devPtr;
  size_t  size;

  cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);

  thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
  thrust::sort(tptr, tptr+(DIM*DIM), sort_functor());
  cudaGraphicsUnmapResources( 1, &resource, NULL );
  glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj);
  glBindTexture(GL_TEXTURE_2D, textureID);
  glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, DIM, DIM, GL_BGRA, GL_UNSIGNED_BYTE, NULL);

  glBegin(GL_QUADS);
  glTexCoord2f( 0, 1.0f);
  glVertex3f(-1.0,1.0f,0);
  glTexCoord2f(0,0);
  glVertex3f(-1.0f,-1.0f,0);
  glTexCoord2f(1.0f,0);
  glVertex3f(1.0f,-1.0f,0);
  glTexCoord2f(1.0f,1.0f);
  glVertex3f(1.0f,1.0f,0);
  glEnd();

  draw_func();
}

static void close_func( void ){
        HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
        glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
        glDeleteBuffers( 1, &bufferObj );
        exit(0);
}

static void key_func( unsigned char key, int x, int y ) {
  switch (key) {
    case 27:
        close_func();
        break;
    case 32:
        sort_pixels();
        break;
    default:
        break;
  }
}

int main(int argc, char *argv[]) {

  cudaGLSetGLDevice( 0 );

  glutInit( &argc, argv );
  glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
  glutInitWindowSize( DIM, DIM );
  glutCreateWindow( "sort test" );
  glewInit();
  glGenBuffers( 1, &bufferObj );
  glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
  glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );

  cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );
  glEnable(GL_TEXTURE_2D);
  glGenTextures(1, &textureID);
  glBindTexture(GL_TEXTURE_2D, textureID);
  glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, DIM, DIM, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);


  cudaGraphicsMapResources( 1, &resource, NULL );
  uchar4* devPtr;
  size_t  size;

  cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
  dim3    grid(DIM/16,DIM/16);
  dim3    threads(16,16);
  kernel<<<grid,threads>>>( devPtr );
  cudaGraphicsUnmapResources( 1, &resource, NULL );

  glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj);
  glBindTexture(GL_TEXTURE_2D, textureID);
  glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, DIM, DIM, GL_BGRA, GL_UNSIGNED_BYTE, NULL);

  glBegin(GL_QUADS);
  glTexCoord2f( 0, 1.0f);
  glVertex3f(-1.0,1.0f,0);
  glTexCoord2f(0,0);
  glVertex3f(-1.0f,-1.0f,0);
  glTexCoord2f(1.0f,0);
  glVertex3f(1.0f,-1.0f,0);
  glTexCoord2f(1.0f,1.0f);
  glVertex3f(1.0f,1.0f,0);
  glEnd();
  draw_func();

// set up GLUT and kick off main loop
  glutCloseFunc( close_func );
  glutKeyboardFunc( key_func );
  glutDisplayFunc( draw_func );
  glutMainLoop();
}


来源:https://stackoverflow.com/questions/25041436/sorting-pixels-from-opengl-using-cuda-and-thrust-windows-port-issues

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!