问题
I am trying to apply a sobel filter on a grayscale image in the x direction on each pixel and displaying the result. X direction sobel filter is:-
-1 0 1
-2 0 2
-1 0 1
I am not getting the required results. Can someone point out my mistakes? I am trying to use textures and I am not so sure as to whether I have used it correctly:
#include <cuda.h>
#include<iostream>
using namespace std;
#define CudaSafeCall( err ) __cudaSafeCall( err, __FILE__, __LINE__ )
#define CudaCheckError() __cudaCheckError( __FILE__, __LINE__ )
texture <float,2,cudaReadModeElementType> tex1;
//Kernel for x direction sobel
__global__ void implement_x_sobel(float* garbage,float* output,int width,int height,int widthStep)
{
int x=blockIdx.x*blockDim.x+threadIdx.x;
int y=blockIdx.y*blockDim.y+threadIdx.y;
float output_value=((0*tex2D(tex1,x,y))+(2*tex2D(tex1,x+1,y))+(-2*tex2D(tex1,x- 1,y))+(0*tex2D(tex1,x,y+1))+(1*tex2D(tex1,x+1,y+1))+(-1*tex2D(tex1,x-1,y+1))+ (1*tex2D(tex1,x+1,y-1))+(0*tex2D(tex1,x,y-1))+(-1*tex2D(tex1,x-1,y-1)));
output[y*widthStep+x]=output_value;
}
//Kernel for y direction sobel
//__global__ void implement_y_sobel(float* input,float* output,int width,int height,int widthStep)
//{
//}
//Host Code
inline void __cudaSafeCall( cudaError err, const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
if ( cudaSuccess != err )
{
printf("cudaSafeCall() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
inline void __cudaCheckError( const char *file, const int line )
{
#ifdef CUDA_ERROR_CHECK
cudaError err = cudaGetLastError();
if ( cudaSuccess != err )
{
printf("cudaCheckError() failed at %s:%i : %s\n",
file, line, cudaGetErrorString( err ) );
exit( -1 );
}
#endif
return;
}
void sobel(float* input,float* output,int width,int height,int widthStep)
{
cudaChannelFormatDesc channelDesc=cudaCreateChannelDesc(32,32,0,0,cudaChannelFormatKindFloat);
cudaArray * cuArray;
CudaSafeCall(cudaMallocArray(&cuArray,&channelDesc,width,height));
cudaMemcpyToArray(cuArray,0,0,input,widthStep*height,cudaMemcpyHostToDevice);
tex1.addressMode[0]=cudaAddressModeClamp;
tex1.addressMode[1]=cudaAddressModeClamp;
tex1.filterMode=cudaFilterModeLinear;
tex1.normalized=false;
cudaBindTextureToArray(tex1,cuArray,channelDesc);
float * D_output_x;
float * garbage=NULL;
CudaSafeCall(cudaMalloc(&D_output_x,widthStep*height));
dim3 blocksize(16,16);
dim3 gridsize;
gridsize.x=(width+blocksize.x-1)/blocksize.x;
gridsize.y=(height+blocksize.y-1)/blocksize.y;
//kernel call
implement_x_sobel<<<gridsize,blocksize>>>(garbage,D_output_x,width,height,widthStep/sizeof(float));
cudaThreadSynchronize();
CudaCheckError();
CudaSafeCall(cudaMemcpy(output,D_output_x,height*widthStep,cudaMemcpyDeviceToHost));
cudaFree(D_output_x);
cudaFree(garbage);
cudaFreeArray(cuArray);
}
My main file:-
#include<iostream>
#include <stdio.h>
#include <stdlib.h>
#include<opencv/highgui.h>
#include<opencv/cv.h>
#include"header.h"
using namespace std;
void main()
{
IplImage* img1=cvLoadImage("C://test.jpg",CV_LOAD_IMAGE_GRAYSCALE);
if( !img1) {
printf("ERROR: couldnt load file!\n");
}
IplImage* img2=cvCreateImage(cvGetSize(img1),IPL_DEPTH_32F,img1->nChannels);
IplImage* img3=cvCreateImage(cvGetSize(img1),IPL_DEPTH_32F,img1->nChannels);
unsigned char * pseudo_input=(unsigned char *)img1->imageData;
float * output=(float*)img2->imageData;
float *input=(float*)img3->imageData;
int s=img1->widthStep/sizeof(float);
for(int w=0;w<=(img1->height);w++)
for(int h=0;h<(img1->width*img1->nChannels);h++)
{
input[w*s+h]= pseudo_input[w*s+h];
}
sobel(input,output,img1->width,img1->height,img1->widthStep);
cvShowImage("Original Image",img1);
cvShowImage("Sobeled Image",img2);
cvWaitKey(0);
}}
回答1:
cudaCreateChannelDesc
expects as first 4 parameters the number of bits for x, y, z, and w components. It should be 32 for float
texture.
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat);
回答2:
It is hard to diagnose the problem without more information. If you are getting no meaningful output (e.g. texture is reading all 0's), that implies a problem with your texture setup or binding.
If you are off by a little bit, that is probably because you need to offset the coordinates by 0.5f, and while you are at it, be more careful about explicitly converting your ints to floats. The code won't run any slower if you declare and assign float-valued variables before calling tex2D().
来源:https://stackoverflow.com/questions/11091274/sobel-filter-in-x-direction-in-cuda