Poor opengl image processing performance

纵然是瞬间 提交于 2019-12-07 14:26:45

问题


I'm trying to do some simple image processing using opengl. Since I couldn't find any good library that does this alrdy I've been trying to do my own solution.

I simply want to compose a few images on the gpu and then read them back. However the performance of my implementation seems almost equal to what it takes do on the cpu... something is wrong...

I've tried to follow the best practices I've found on the net. But still it's doing something wrong.

I've tried removing all the irrelevant code.

Any ideas as to why this implementation has poor performance?

int image_width = 1280;
int image_height = 720;
int image_size = image_width * image_height;

class texture
{
public:
    texture()
    {   
        glGenTextures(1, &texture_);    
        bind();
        glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
        glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, image_width, image_height, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
    }

    ~texture(){ glDeleteTextures(1, &texture_); }
    void bind(){ glBindTexture(GL_TEXTURE_2D, texture_); }
    GLuint handle() { return texture_; }

private:
    GLuint texture_;
};
typedef std::shared_ptr<texture> texture_ptr;

class pixel_buffer // pixel buffer with associated texture
{
public:
    pixel_buffer()
    {
        glGenBuffersARB(1, &pbo_);
        bind_pbo();
        glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, image_size, 0, GL_STREAM_DRAW);
    }

    ~pixel_buffer(){ glDeleteBuffers(1, &pbo_); }

    void begin_write(void* src)
    {
        texture_.bind();
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_);
        glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, image_size, 0, GL_STREAM_DRAW);
        void* ptr = glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
        assert(ptr); 
        memcpy(ptr, src, image_size);
        glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
    }

    void end_write()
    {
        bind_texture();
        bind_pbo();
        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, image_width, image_height, GL_BGRA, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
        unbind_pbo();
    }

    void begin_read(GLuint buffer)
    {
        glReadBuffer(buffer);
        glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo_);
        glBufferData(GL_PIXEL_PACK_BUFFER_ARB, image_size, NULL, GL_STREAM_READ);
        glReadPixels(0, 0, image_width, image_height, GL_BGRA, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
    }

    void end_read(void* dest)
    {
        void* ptr = glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY);   
        memcpy(dest, ptr, image_size);
        glUnmapBuffer(GL_PIXEL_PACK_BUFFER_ARB);
        unbind_pbo();
    }

    void bind_pbo(){ glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_); }
    void unbind_pbo(){ glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_); }
    void bind_texture() { texture_.bind(); }

    GLuint texture_handle() { return texture_.handle(); }

private:
    texture texture_;
    GLuint pbo_;
};
typedef std::shared_ptr<pixel_buffer> pixel_buffer_ptr;

class frame_buffer// frame buffer with associated pixel buffer
{
public:
    frame_buffer()
    {
        glGenFramebuffersEXT(1, &fbo_);

        bind();
        pbo_.bind_texture();
        glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, pbo_.texture_handle(), 0);
    }

    ~frame_buffer() { glDeleteFramebuffersEXT(1, &fbo_); }

    void bind() { glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo_); }
    void unbind() { glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);    }

    void begin_read()
    {
        bind();
        pbo_.begin_read(GL_COLOR_ATTACHMENT0_EXT);
    }

    void end_read(void* dest)
    {
        pbo_.end_read(dest);
        unbind();
    }

private:
    pixel_buffer pbo_;
    GLuint fbo_;
};
typedef std::shared_ptr<frame_buffer> frame_buffer_ptr;

struct image_processor::implementation
{   
    void compose(const std::vector<image_ptr>& images)
    {                   
        // END PREVIOUS READ
        if(reading_fbo_)
        {
            image_ptr result_image = std::make_shared<image>(image_size);
            reading_fbo_->end_read(result_image->data());
            output_.push(reading_result_image_);

            reading_fbo_ = nullptr;
        }

        // END PREVIOUS WRITE
        frame_buffer_ptr written_fbo;
        if(!writing_pbo_group_.empty())
        {
            // END
            written_fbo = get_fbo();
            written_fbo->bind();
            glClear(GL_COLOR_BUFFER_BIT);   

            for(size_t n = 0; n < writing_pbo_group_.size(); ++n)
            {
                writing_pbo_group_[n]->end_write();
                writing_pbo_group_[n]->bind_texture();
                quad_->draw(); // DRAW FULLSCREEN QUAD
            }
            written_fbo->unbind();

            writing_pbo_group_.clear();
        }

        // BEGIN NEW WRITE
        if(!images.empty())
        {           
            for(size_t n = 0; n < images.size(); ++n)
            {
                auto pbo = get_pbo();
                pbo->begin_write(images[n]->data());

                writing_pbo_group_.push_back(pbo);
            }
        }

        // BEGIN NEW READ       
        if(written_fbo)
        {   
            written_fbo->begin_read();

            reading_fbo_ = written_fbo; 
        }
    }

    pixel_buffer_ptr get_pbo()
    {
        if(pbo_pool_.empty())
            pbo_pool_.push_back(std::make_shared<pixel_buffer>());

        auto pbo = pbo_pool_.front();
        pbo_pool_.pop_front();

        return pixel_buffer_ptr(pbo.get(), [=](pixel_buffer*){pbo_pool_.push_back(pbo);});
    }

    frame_buffer_ptr get_fbo()
    {
        if(fbo_pool_.empty())
            fbo_pool_.push_back(std::make_shared<frame_buffer>());

        auto fbo = fbo_pool_.front();
        fbo_pool_.pop_front();

        return frame_buffer_ptr(fbo.get(), [=](frame_buffer*){fbo_pool_.push_back(fbo);});
    }

    std::vector<pixel_buffer_ptr>   writing_pbo_group_;
    frame_buffer_ptr                reading_fbo_;

    std::deque<pixel_buffer_ptr> pbo_pool_;
    std::deque<frame_buffer_ptr> fbo_pool_;
};

EDIT:

Did some profiling. Most cpu time seems to be spent in begin_write();

Can't see anything wrong with it though...

void begin_write(void* src)
{
    texture_.bind();
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_);
    glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, image_size, 0, GL_STREAM_DRAW);
    void* ptr = glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
    assert(ptr); 
    memcpy(ptr, src, image_size);
    glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
}

回答1:


Your bottleneck is the memory transfer from the CPU to the GPU. GPU allows you to boost performance when you need to do many parallel calculations on the same small amount of data that is stored in the GPU memory (for example draw the same scene 30 frames per second). When you need to transfer data back and forth from the main memory and GPU memory, most of the time is wasted on exactly that.




回答2:


Try altering the pixelformat (i.e. BGRA instead of ARGB).

I'm not sure of the specifics, but a mismatch in what the card actually uses and what you want could produce disastrous performance.

(It's hard to help more without info on your dataset, the algorithms you're running and some hard benchmark data...)



来源:https://stackoverflow.com/questions/3755950/poor-opengl-image-processing-performance

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!