gpu::morphologyEx is slower than morphologyEx in CPU?

我怕爱的太早我们不能终老 提交于 2019-12-11 10:22:56

问题


I am writing a c++ code for comparing the performance of morphologyEx method of opencv using the CPU and GPU versions. Here is my code:

#include <opencv2/opencv.hpp>
#include <opencv2/gpu/gpu.hpp>
#include <sys/time.h>       
#include <ctime>
using namespace cv;
using namespace std;


double start_timer()
{
     double start_time = (double) getTickCount();
     return start_time;
}

double end_timer(double start_time,int num_tests)
{
    double time = (1000 * ((double) getTickCount() - start_time)/ getTickFrequency());
    cout << "Average time of " << num_tests  << " frames is: " << time/num_tests <<  " ms" << endl;
    return time;
}


int main()
{
    Mat cpuSrc;
    cv::gpu::GpuMat src_gpu, dst_gpu;
    Mat dst;
    Mat element;
    int element_shape = MORPH_RECT;
    element = getStructuringElement(element_shape, Size(10, 10 ), Point(-1, -1) );
    cpuSrc = imread("images.jpeg",CV_LOAD_IMAGE_ANYDEPTH);

    if (!cpuSrc.data)
    {
        cerr << "Cannot read the data" << endl;
        return -1;
    }


    cout << "Starting calculating time for CPU ....." << endl;
    double start_time = start_timer();
    int d = 0;
    while(d<100)
    {
        cv::morphologyEx(cpuSrc, dst, CV_MOP_OPEN, element,Point(-1,-1),1);
    }

    double total_time_cpu = end_timer(start_time,d);



//--------------------------------------------------------------
    cout << "Starting calculating time for GPU ....." << endl;
    d = 0;
    cv::gpu::GpuMat buf1, buf2;
    gpu::Stream stream;
    double start_time_1 = start_timer();

    while(d<100)
    {
        stream.enqueueUpload(cpuSrc, src_gpu);
        cv::gpu::morphologyEx(src_gpu,dst_gpu,CV_MOP_OPEN,element,
                   buf1,buf2,Point(-1,-1),1,stream);
        stream.enqueueDownload(dst_gpu, dst);

    }
    stream.waitForCompletion();
    double total_time_gpu = end_timer(start_time_1,d);

    cout << "Gain is: " << total_time_cpu / total_time_gpu << endl;
    return 0;
}

I am using a loop as if i am simulating a video that contains 100 frames. I am using NVIDIA Corporation GF110 [GeForce GTX 570] and Intel Corporation Xeon E5/Core i7 DMI2. Moreover, i tested the time for uploading and downloading and it is very large in the first frame but after that it can be neglected approximately for uploading it is 0.02ms per frame and downloading is 0.1ms and the main time consumption is with the morphologyEx operation.


The time results for this simulations are as follows:

for CPU morphology version, The average time of 100 frames is:: 0.027349 ms and for the GPU version is:: 18.0128 ms

Could you please help me to figure out what might be the reasons for such unexpected performance?!!

Thank you so much in advance.


回答1:


In the initialization you should call:

cv::gpu::setDevice(0);

It will speed up initialization.



来源:https://stackoverflow.com/questions/21054874/gpumorphologyex-is-slower-than-morphologyex-in-cpu

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!