我正在尝试使用 opengl 进行一些简单的图像处理。由于我找不到任何可以做到这一点的好图书馆,我一直在尝试做自己的解决方案。
我只是想在 gpu 上合成一些图像,然后将它们读回来。然而,我的实现的性能似乎几乎等于它在 cpu 上所做的......出了点问题......
我试图遵循我在网上找到的最佳实践。但它仍然做错了什么。
我试过删除所有不相关的代码。
关于此实现为何性能不佳的任何想法?
int image_width = 1280;
int image_height = 720;
int image_size = image_width * image_height;
class texture
{
public:
texture()
{
glGenTextures(1, &texture_);
bind();
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, image_width, image_height, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
}
~texture(){ glDeleteTextures(1, &texture_); }
void bind(){ glBindTexture(GL_TEXTURE_2D, texture_); }
GLuint handle() { return texture_; }
private:
GLuint texture_;
};
typedef std::shared_ptr<texture> texture_ptr;
class pixel_buffer // pixel buffer with associated texture
{
public:
pixel_buffer()
{
glGenBuffersARB(1, &pbo_);
bind_pbo();
glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, image_size, 0, GL_STREAM_DRAW);
}
~pixel_buffer(){ glDeleteBuffers(1, &pbo_); }
void begin_write(void* src)
{
texture_.bind();
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_);
glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, image_size, 0, GL_STREAM_DRAW);
void* ptr = glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
assert(ptr);
memcpy(ptr, src, image_size);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
}
void end_write()
{
bind_texture();
bind_pbo();
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, image_width, image_height, GL_BGRA, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
unbind_pbo();
}
void begin_read(GLuint buffer)
{
glReadBuffer(buffer);
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, pbo_);
glBufferData(GL_PIXEL_PACK_BUFFER_ARB, image_size, NULL, GL_STREAM_READ);
glReadPixels(0, 0, image_width, image_height, GL_BGRA, GL_UNSIGNED_BYTE, BUFFER_OFFSET(0));
}
void end_read(void* dest)
{
void* ptr = glMapBuffer(GL_PIXEL_PACK_BUFFER_ARB, GL_READ_ONLY);
memcpy(dest, ptr, image_size);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER_ARB);
unbind_pbo();
}
void bind_pbo(){ glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_); }
void unbind_pbo(){ glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_); }
void bind_texture() { texture_.bind(); }
GLuint texture_handle() { return texture_.handle(); }
private:
texture texture_;
GLuint pbo_;
};
typedef std::shared_ptr<pixel_buffer> pixel_buffer_ptr;
class frame_buffer// frame buffer with associated pixel buffer
{
public:
frame_buffer()
{
glGenFramebuffersEXT(1, &fbo_);
bind();
pbo_.bind_texture();
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, pbo_.texture_handle(), 0);
}
~frame_buffer() { glDeleteFramebuffersEXT(1, &fbo_); }
void bind() { glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo_); }
void unbind() { glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); }
void begin_read()
{
bind();
pbo_.begin_read(GL_COLOR_ATTACHMENT0_EXT);
}
void end_read(void* dest)
{
pbo_.end_read(dest);
unbind();
}
private:
pixel_buffer pbo_;
GLuint fbo_;
};
typedef std::shared_ptr<frame_buffer> frame_buffer_ptr;
struct image_processor::implementation
{
void compose(const std::vector<image_ptr>& images)
{
// END PREVIOUS READ
if(reading_fbo_)
{
image_ptr result_image = std::make_shared<image>(image_size);
reading_fbo_->end_read(result_image->data());
output_.push(reading_result_image_);
reading_fbo_ = nullptr;
}
// END PREVIOUS WRITE
frame_buffer_ptr written_fbo;
if(!writing_pbo_group_.empty())
{
// END
written_fbo = get_fbo();
written_fbo->bind();
glClear(GL_COLOR_BUFFER_BIT);
for(size_t n = 0; n < writing_pbo_group_.size(); ++n)
{
writing_pbo_group_[n]->end_write();
writing_pbo_group_[n]->bind_texture();
quad_->draw(); // DRAW FULLSCREEN QUAD
}
written_fbo->unbind();
writing_pbo_group_.clear();
}
// BEGIN NEW WRITE
if(!images.empty())
{
for(size_t n = 0; n < images.size(); ++n)
{
auto pbo = get_pbo();
pbo->begin_write(images[n]->data());
writing_pbo_group_.push_back(pbo);
}
}
// BEGIN NEW READ
if(written_fbo)
{
written_fbo->begin_read();
reading_fbo_ = written_fbo;
}
}
pixel_buffer_ptr get_pbo()
{
if(pbo_pool_.empty())
pbo_pool_.push_back(std::make_shared<pixel_buffer>());
auto pbo = pbo_pool_.front();
pbo_pool_.pop_front();
return pixel_buffer_ptr(pbo.get(), [=](pixel_buffer*){pbo_pool_.push_back(pbo);});
}
frame_buffer_ptr get_fbo()
{
if(fbo_pool_.empty())
fbo_pool_.push_back(std::make_shared<frame_buffer>());
auto fbo = fbo_pool_.front();
fbo_pool_.pop_front();
return frame_buffer_ptr(fbo.get(), [=](frame_buffer*){fbo_pool_.push_back(fbo);});
}
std::vector<pixel_buffer_ptr> writing_pbo_group_;
frame_buffer_ptr reading_fbo_;
std::deque<pixel_buffer_ptr> pbo_pool_;
std::deque<frame_buffer_ptr> fbo_pool_;
};
编辑:
做了一些分析。大多数 cpu 时间似乎都花在了 begin_write();
不过看不出有什么不妥...
void begin_write(void* src)
{
texture_.bind();
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_);
glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, image_size, 0, GL_STREAM_DRAW);
void* ptr = glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
assert(ptr);
memcpy(ptr, src, image_size);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
}