计算RGB图像的直方图
// kernel
__kernel void histogram(__global uchar* imgdata,
__global uint *histogram,
__local uint *local_histogram,
uint data_size_item,
uint all_byte_size)
{
// 对局部数据进行初始化
for(uchar i =0;i<32;i++)
{
local_histogram[0]=0;
}
barrier(CLK_LOCAL_MEM_FENCE);// 局部同步
int item_offset = get_global_id(0) * data_size_item *3;
// 遍历该工作项所处理的数据
for(int i = item_offset;i<item_offset+data_size_item *3&&i<all_byte_size;i+=3)
{
// B
atomic_inc(local_histogram+imgdata[i]/8+64);
// G
atomic_inc(local_histogram+imgdata[i+1]/8+32);
// R
atomic_inc(local_histogram+imgdata[i+2]/8);
}
barrier(CLK_GLOBAL_MEM_FENCE); // 全局同步
// 归并
int i = get_local_id(0);
if(i < 96)
{
atomic_add(histogram+i,local_histogram[i]);
}
}
#include <iostream>
#include <opencv2/opencv.hpp>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
const char histogram_cl_kernel_filename[]= "histogram.cl";
/**
* 获取设备
* @return cl_device_id
*/
cl_device_id getdevice()
{
cl_platform_id platform;
cl_device_id dev;
int err;
// 获取一个平台
err = clGetPlatformIDs(1,&platform,NULL);
if(err<0)
{
perror("获取平台失败!");
exit(1);
}
// 获取一个GPU设备
err=clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&dev,NULL);
if(err<0)
{
perror("获取设备失败!");
exit(1);
}
return dev;
}
/**
* 创建并编译程序
* cl_context ctx:上下文
* cl_device_id dev : 设备
* filename: 文件名称
* @return cl_program
*/
cl_program build_program(cl_context ctx, cl_device_id dev,const char* filename)
{
cl_program program;
FILE *program_handle;
char *program_buffer ,*program_log;
size_t program_size, log_size;
int err;
// 从文件中读取程序内容
program_handle = fopen(filename,"r");
if(program_handle == NULL)
{
perror("程序文件无法打开!");
exit(1);
}
fseek(program_handle,0,SEEK_END);
program_size=ftell(program_handle);
rewind(program_handle);
program_buffer = (char*)malloc(program_size + 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
// 创建 cl_program;
program = clCreateProgramWithSource(ctx,1,(const char **)&program_buffer,&program_size,&err);
if(err<0)
{
perror("创建cl_program失败!");
exit(1);
}
free(program_buffer);
// 编译 cl_program
err = clBuildProgram(program,0,NULL,NULL,NULL,NULL);
if(err<0)
{
// 编译失败获取 失败信息
clGetProgramBuildInfo(program,dev,CL_PROGRAM_BUILD_LOG,0,NULL,&log_size);
program_log = (char*)malloc(log_size+1);
program_log[log_size]='\0';
clGetProgramBuildInfo(program,dev,CL_PROGRAM_BUILD_LOG,log_size+1,program_log,NULL);
std::cout<<"program_log:\n "<<program_log<<std::endl;
free(program_log);
exit(1);
}
return program;
}
void calhistogram()
{
int err;
// 获取设备
cl_device_id device = getdevice();
// 创建上下文
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if(err<0)
{
perror("创建上下文失败!");
exit(1);
}
// 创建并编译程序
cl_program program = build_program(context,device,histogram_cl_kernel_filename);
// 创建内核
cl_kernel kernel = clCreateKernel(program,"histogram",&err);
// 创建缓存对象
// 读取图片数据
cv::Mat img = cv::imread("7.jpg");
unsigned char * data = img.data;
unsigned int size = img.cols*img.rows*3;
std::cout<<"字节数:="<<size<<std::endl;
std::cout<<"像素点数:="<<img.cols*img.rows<<std::endl;
cl_mem imgdata = clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,size,(void *)data,&err);
if(err<0)
{
std::cout<<err<<std::endl;
perror("创建图像缓存对象失败!");
exit(1);
}
unsigned int result[96]={0};
cl_mem result_buffer = clCreateBuffer(context,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,sizeof(result),result,NULL);
// 获取 CU 的个数
unsigned int size_CU =0;
clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(size_CU), &size_CU, NULL);
std::cout<<"计算单元(CU)的个数为: "<<size_CU<<std::endl;
// 获取每个工作组中工作项的 数量限制
size_t item_size_per_group =0;
clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(item_size_per_group),&item_size_per_group,NULL);
std::cout<<"每个工作组中工作的最大数量为: "<<item_size_per_group<<std::endl;
// 工作项的总个数
size_t item_num = size_CU*item_size_per_group;
std::cout<<"工作项的总个数为:"<<item_num<<std::endl;
// 每个工作项负的 点个数
unsigned int size_per_item = img.cols*img.rows/item_num +1;
std::cout<<"每个工作项负责的点数为:"<<size_per_item<<std::endl;
// 设置核参数
err = clSetKernelArg(kernel,0,sizeof(imgdata),&imgdata); // 图像数据
err |= clSetKernelArg(kernel,1,sizeof(result_buffer),&result_buffer); // 存储结果的地址
err |= clSetKernelArg(kernel,2,sizeof(result),NULL); // 局部结果
err |= clSetKernelArg(kernel,3,sizeof(size_per_item),&size_per_item); // 每个项处理的数据点数大小
err |= clSetKernelArg(kernel,4,sizeof(size),&size);
if(err<0)
{
perror("设置参数失败!");
exit(1);
}
// 创建命令队列
cl_command_queue queue = clCreateCommandQueue(context, device , 0 ,&err);
if(err<0)
{
perror("创建命令队列失败!");
exit(1);
}
size_t offset =0;
err = clEnqueueNDRangeKernel(queue,kernel,1,&offset,&item_num, &item_size_per_group,0,NULL,NULL);
if(err<0)
{
perror("Enqueue the kernel failed!");
exit(1);
}
// 读取结果命令
err = clEnqueueReadBuffer(queue,result_buffer,CL_TRUE,0,sizeof(result),&result,0,NULL,NULL);
if(err<0)
{
perror("读取结果失败!");
exit(1);
}
// 输出结果
// R
int temp=0;
for(int i=0;i<32;i++)
{
temp+=result[i];
std::cout<<i*8<<"---"<<(i+1)*8-1<<":"<<result[i]<<std::endl;
}
std::cout<<temp<<std::endl;
}
int main()
{
std::cout << "Hello, World!" << std::endl;
calhistogram();
return 0;
}
来源:https://www.cnblogs.com/feihu-h/p/12107537.html