问题
I tried to port this example to WINDOWS with GLFW, since I don't have access to Linux box .. but the only thing I get is the clear color and nothing comes up ..
Did others get this example to work / Did I miss something here?
I do not even get the original image, before the sort either ...
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <GL/glew.h>
#include <GL/glfw.h>
#include <cuda_gl_interop.h>
const int WIDTH=800;
const int HEIGHT=800;
const int DIM = 800;
GLuint bufferObj;
cudaGraphicsResource *resource;
struct sort_functor
{
__host__ __device__
bool operator()(uchar4 left, uchar4 right) const
{
return (left.y < right.y);
}
};
// create a green/black pattern
__global__ void kernel(uchar4 *ptr) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x / (float)DIM - 0.5f;
float fy = y / (float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin(abs(fx * 100) - abs(fy * 100));
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void sort_pixels(){
cudaGraphicsMapResources(1, &resource, NULL);
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &size, resource);
thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
thrust::sort(tptr, tptr + (DIM*DIM), sort_functor());
cudaGraphicsUnmapResources(1, &resource, NULL);
}
void GLFWCALL Keyboard_Callback(int key, int action)
{
if (key == 32 && action == GLFW_PRESS)
sort_pixels();
return;
}
int main ()
{
if( !glfwInit() )
{
fprintf( stderr, "Failed to initialize GLFW\n" );
return -1;
}
glfwOpenWindowHint(GLFW_FSAA_SAMPLES, 4);
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 3);
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 3);
glfwOpenWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
if( !glfwOpenWindow( WIDTH, HEIGHT, 0,0,0,0, 32,0, GLFW_WINDOW ) )
{
fprintf( stderr, "Failed to open GLFW window. If you have an Intel GPU, they are not 3.3 compatible.\n" );
glfwTerminate();
return -1;
}
glewExperimental = GL_TRUE;
if (glewInit() != GLEW_OK) {
fprintf(stderr, "Failed to initialize GLEW\n");
return -1;
}
glfwSetWindowTitle( "Sort Test" );
glfwEnable( GLFW_STICKY_KEYS );
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LEQUAL);
glClearColor(0.087, 0.087, 0.087, 1.0);
glfwSetKeyCallback(Keyboard_Callback);
// SORT CODE
glGenBuffers(1, &bufferObj);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bufferObj);
glBufferData(GL_PIXEL_UNPACK_BUFFER, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW);
cudaGraphicsGLRegisterBuffer(&resource, bufferObj, cudaGraphicsMapFlagsNone);
cudaGraphicsMapResources(1, &resource, NULL);
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &size, resource);
dim3 grid(DIM / 16, DIM / 16);
dim3 threads(16, 16);
kernel << <grid, threads >> >(devPtr);
cudaGraphicsUnmapResources(1, &resource, NULL);
do{
glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);
glEnable (GL_BLEND);
glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glDrawPixels(DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glfwSwapBuffers();
} // Check if the ESC key was pressed or the window was closed
while( glfwGetKey( GLFW_KEY_ESC ) != GLFW_PRESS &&
glfwGetWindowParam( GLFW_OPENED ) );
// Close OpenGL window and terminate GLFW
glfwTerminate();
return 0;
}
Edit:
I added :
static void HandleError(cudaError_t err, int line) {
if (err != cudaSuccess) {
printf("%s in %s at line %d\n", cudaGetErrorString(err), "main", line);
exit(EXIT_FAILURE);
}
std::cout << "What happened : " << line << " " << cudaGetErrorString(err)<< std::endl;
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
with and example call:
HandleError(cudaGraphicsGLRegisterBuffer(&resource, bufferObj, cudaGraphicsMapFlagsNone), 134);
Per the comments, no CUDA errors? so does that mean there is something wrong with the binding of the PBO or draw commands?
回答1:
Here's a version of the program that seems to work for me under windows.
Rather than going through the details of project setup, just drop this code in the simpleGL cuda sample code, in place of the code that is there, and rebuild that project.
windows version:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// OpenGL Graphics includes
#include <GL/glew.h>
#include <GL/freeglut.h>
#include <cuda_gl_interop.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
static void HandleError( cudaError_t err, const char *file, int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
#define DIM 512
GLuint bufferObj;
cudaGraphicsResource *resource;
struct sort_functor
{
__host__ __device__
bool operator()(uchar4 left, uchar4 right) const
{
return (left.y < right.y);
}
};
// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void sort_pixels(){
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
thrust::sort(tptr, tptr+(DIM*DIM), sort_functor());
cudaGraphicsUnmapResources( 1, &resource, NULL );
draw_func();
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
break;
case 32:
sort_pixels();
break;
default:
break;
}
}
int main(int argc, char *argv[]) {
cudaGLSetGLDevice( 0 );
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "sort test" );
glewInit();
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );
cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
dim3 grid(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grid,threads>>>( devPtr );
cudaGraphicsUnmapResources( 1, &resource, NULL );
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}
The previous linux version probably has some cruft in it. It wasn't intended to be a paragon of programming, just something to demonstrate the concept, so I picked up a previous example I had laying around. This version is probably a little bit "cleaner".
Also note that the above code works as-is (for me, anyway) on linux if I compile with:
nvcc -arch=sm_20 -o ogltest3 ogltest3.cu -lglut -lGLEW
Here's an updated version, that works on windows or linux, and does not use glDrawPixels
:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// OpenGL Graphics includes
#include <GL/glew.h>
#include <GL/freeglut.h>
#include <cuda_gl_interop.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
static void HandleError( cudaError_t err, const char *file, int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
#define DIM 512
GLuint bufferObj;
GLuint textureID;
cudaGraphicsResource *resource;
struct sort_functor
{
__host__ __device__
bool operator()(uchar4 left, uchar4 right) const
{
return (left.y < right.y);
}
};
// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
// glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void sort_pixels(){
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
thrust::sort(tptr, tptr+(DIM*DIM), sort_functor());
cudaGraphicsUnmapResources( 1, &resource, NULL );
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj);
glBindTexture(GL_TEXTURE_2D, textureID);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, DIM, DIM, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBegin(GL_QUADS);
glTexCoord2f( 0, 1.0f);
glVertex3f(-1.0,1.0f,0);
glTexCoord2f(0,0);
glVertex3f(-1.0f,-1.0f,0);
glTexCoord2f(1.0f,0);
glVertex3f(1.0f,-1.0f,0);
glTexCoord2f(1.0f,1.0f);
glVertex3f(1.0f,1.0f,0);
glEnd();
draw_func();
}
static void close_func( void ){
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
close_func();
break;
case 32:
sort_pixels();
break;
default:
break;
}
}
int main(int argc, char *argv[]) {
cudaGLSetGLDevice( 0 );
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "sort test" );
glewInit();
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );
cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &textureID);
glBindTexture(GL_TEXTURE_2D, textureID);
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, DIM, DIM, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
dim3 grid(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grid,threads>>>( devPtr );
cudaGraphicsUnmapResources( 1, &resource, NULL );
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj);
glBindTexture(GL_TEXTURE_2D, textureID);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, DIM, DIM, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
glBegin(GL_QUADS);
glTexCoord2f( 0, 1.0f);
glVertex3f(-1.0,1.0f,0);
glTexCoord2f(0,0);
glVertex3f(-1.0f,-1.0f,0);
glTexCoord2f(1.0f,0);
glVertex3f(1.0f,-1.0f,0);
glTexCoord2f(1.0f,1.0f);
glVertex3f(1.0f,1.0f,0);
glEnd();
draw_func();
// set up GLUT and kick off main loop
glutCloseFunc( close_func );
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}
来源:https://stackoverflow.com/questions/25041436/sorting-pixels-from-opengl-using-cuda-and-thrust-windows-port-issues