像素缓冲区对象(PBO) 的Streaming-Texture上传 源码解析

举报
ShaderJoy 发表于 2021/12/30 01:41:03 2021/12/30
【摘要】 接这篇文章 OpenGL深入探索——像素缓冲区对象 (PBO)(附完整工程代码地址) 原理示意图: 首选检查显卡是否支持 PBO : #if defined(_WIN32) // check PBO is supported by your video card // 检查显卡是否支持 PBO if (glI...

接这篇文章 OpenGL深入探索——像素缓冲区对象 (PBO)(附完整工程代码地址)

原理示意图:



首选检查显卡是否支持 PBO :


  
  1. #if defined(_WIN32)
  2. // check PBO is supported by your video card
  3. // 检查显卡是否支持 PBO
  4. if (glInfo.isExtensionSupported("GL_ARB_pixel_buffer_object"))
  5. {
  6. // get pointers to GL functions
  7. glGenBuffersARB = (PFNGLGENBUFFERSARBPROC)wglGetProcAddress("glGenBuffersARB");
  8. glBindBufferARB = (PFNGLBINDBUFFERARBPROC)wglGetProcAddress("glBindBufferARB");
  9. glBufferDataARB = (PFNGLBUFFERDATAARBPROC)wglGetProcAddress("glBufferDataARB");
  10. glBufferSubDataARB = (PFNGLBUFFERSUBDATAARBPROC)wglGetProcAddress("glBufferSubDataARB");
  11. glDeleteBuffersARB = (PFNGLDELETEBUFFERSARBPROC)wglGetProcAddress("glDeleteBuffersARB");
  12. glGetBufferParameterivARB = (PFNGLGETBUFFERPARAMETERIVARBPROC)wglGetProcAddress("glGetBufferParameterivARB");
  13. glMapBufferARB = (PFNGLMAPBUFFERARBPROC)wglGetProcAddress("glMapBufferARB");
  14. glUnmapBufferARB = (PFNGLUNMAPBUFFERARBPROC)wglGetProcAddress("glUnmapBufferARB");
  15. // check once again PBO extension
  16. if (glGenBuffersARB && glBindBufferARB && glBufferDataARB && glBufferSubDataARB &&
  17. glMapBufferARB && glUnmapBufferARB && glDeleteBuffersARB && glGetBufferParameterivARB)
  18. {
  19. pboSupported = true;
  20. pboMode = 1; // using 1 PBO
  21. cout << "Video card supports GL_ARB_pixel_buffer_object." << endl;
  22. }
  23. else
  24. {
  25. pboSupported = false;
  26. pboMode = 0; // without PBO
  27. cout << "Video card does NOT support GL_ARB_pixel_buffer_object." << endl;
  28. }
  29. }
  30. // Query the system memory page size and update the default value
  31. SYSTEM_INFO si;
  32. GetSystemInfo(&si);
  33. if (si.dwPageSize > 0)
  34. {
  35. systemPageSize = si.dwPageSize;
  36. }
  37. #elif defined (__gnu_linux__)
  38. // for linux, do not need to get function pointers, it is up-to-date
  39. if (glInfo.isExtensionSupported("GL_ARB_pixel_buffer_object"))
  40. {
  41. pboSupported = true;
  42. cout << "Video card supports GL_ARB_pixel_buffer_object" << endl;
  43. }
  44. else
  45. {
  46. cout << "Video card does NOT support GL_ARB_pixel_buffer_object" << endl;
  47. }
  48. if (glInfo.isExtensionSupported("GL_AMD_pinned_memory"))
  49. {
  50. amdSupported = true;
  51. cout << "Video card supports GL_AMD_pinned_memory" << endl;
  52. }
  53. else
  54. {
  55. cout << "Video card does NOT support GL_AMD_pinned_memory" << endl;
  56. }
  57. // Query the system memory page size and update the default value
  58. if (sysconf(_SC_PAGE_SIZE) > 0)
  59. {
  60. systemPageSize = sysconf(_SC_PAGE_SIZE);
  61. }
  62. #endif


  
  1. // 创建 count 个 PBO
  2. void setPboCount(int count)
  3. {
  4. if (!pboSupported)
  5. return;
  6. // 如果 count 大于 当前的 PBO 数
  7. if (count > pboCount)
  8. {
  9. if (pboMethod != AMD)
  10. {
  11. // Generate each Pixel Buffer object and allocate memory for it(生成 PBO,并为其分配内存)
  12. // Hopefully, PBOs will get allocated in VRAM
  13. glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); // Unbind any buffer object previously bound(释放之前所绑定的 PBO)
  14. for (int i = pboCount; i < count; ++i)
  15. {
  16. GLuint pboId;
  17. glGenBuffers(1, &pboId); // Generate new Buffer Object ID
  18. glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboId); // Create a zero-sized memory Pixel Buffer Object and bind it
  19. glBufferData(GL_PIXEL_UNPACK_BUFFER, DATA_SIZE, NULL, GL_STREAM_DRAW); // Reserve the memory space for the PBO
  20. glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); // Release the PBO binding
  21. pboIds.push_back(pboId); // Update our list of PBO IDs
  22. pboFences.push_back(NULL);
  23. cout << "Created PBO buffer #" << i << " of size: " << DATA_SIZE << endl;
  24. }
  25. pboCount = pboIds.size();
  26. assert(GL_NO_ERROR == glGetError());
  27. }
  28. // 特殊的 DMA 模式,需要自己手动分配对齐的内存,并提供内存指针
  29. else
  30. {
  31. // Generate each Pixel Buffer object and allocate memory for it
  32. // PBOs will get allocated in System RAM, and GPU will access it through DMA
  33. glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); // Unbind any buffer object previously bound
  34. for (int i = pboCount; i < count; ++i)
  35. {
  36. GLuint pboId;
  37. glGenBuffers(1, &pboId); // Generate new Buffer Object ID
  38. glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, pboId); // Create a zero-sized memory Pixel Buffer Object and bind it
  39. assert(GL_NO_ERROR == glGetError());
  40. // Memory alignment functions are compiler-specific
  41. GLubyte *ptAlignedBuffer = (GLubyte *)alignedMalloc(systemPageSize, DATA_SIZE);
  42. if (NULL == ptAlignedBuffer)
  43. {
  44. cout << "ERROR [setPboCount] (alignedMalloc) size: " << DATA_SIZE << " alignment: " << systemPageSize << endl;
  45. break;
  46. }
  47. cout << "Created memory buffer #" << i << " of size: " << DATA_SIZE << " alignment: " << systemPageSize << endl;
  48. glBufferData(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, DATA_SIZE, ptAlignedBuffer, GL_STREAM_DRAW); // Take control of the memory space for the PBO
  49. GLenum error = glGetError();
  50. if (GL_NO_ERROR != error)
  51. {
  52. cout << "ERROR [setPboCount] (glBufferData): " << (char *)gluErrorString(error) << endl;
  53. alignedFree(ptAlignedBuffer);
  54. cout << "Freed memory buffer #" << i << endl;
  55. break;
  56. }
  57. glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); // Release the PBO binding
  58. assert(GL_NO_ERROR == glGetError());
  59. pboIds.push_back(pboId); // Update our list of PBO IDs
  60. pboFences.push_back(NULL);
  61. alignedBuffers.push_back((GLubyte *)ptAlignedBuffer);
  62. cout << "Created PBO buffer #" << i << endl;
  63. }
  64. pboCount = pboIds.size();
  65. assert(GL_NO_ERROR == glGetError());
  66. }
  67. }
  68. // 如果 count 小于当前的 PBO 数
  69. else if (count < pboCount)
  70. {
  71. if (pboMethod != AMD)
  72. {
  73. glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); // Unbind any buffer object previously bound
  74. for (int i = pboCount - 1; i >= count; --i)
  75. {
  76. glDeleteSync(pboFences.back());
  77. pboFences.pop_back();
  78. GLuint pboId = pboIds.back();
  79. glDeleteBuffers(1, &pboId);
  80. pboIds.pop_back(); // Update our list of PBO IDs
  81. cout << "Deleted PBO buffer #" << i << endl;
  82. }
  83. pboCount = pboIds.size();
  84. assert(GL_NO_ERROR == glGetError());
  85. }
  86. else
  87. {
  88. glBindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0); // Unbind any buffer object previously bound
  89. for (int i = pboCount - 1; i >= count; --i)
  90. {
  91. glDeleteSync(pboFences.back());
  92. pboFences.pop_back();
  93. GLuint pboId = pboIds.back();
  94. glDeleteBuffers(1, &pboId);
  95. pboIds.pop_back(); // Update our list of PBO IDs
  96. cout << "Deleted PBO buffer #" << i << endl;
  97. // 手动释放自己分配的内存
  98. alignedFree(alignedBuffers.back());
  99. alignedBuffers.pop_back();
  100. cout << "Freed memory buffer #" << i << endl;
  101. }
  102. pboCount = pboIds.size();
  103. assert(GL_NO_ERROR == glGetError());
  104. }
  105. }
  106. cout << "PBO Count: " << pboCount << endl;
  107. }

最关键的显示回调方法:


  
  1. void displayCB()
  2. {
  3. if (pboMethod == NONE)
  4. {
  5. /*
  6. * Update data in System Memory.
  7. */
  8. t1.start();
  9. updatePixels(imageData, DATA_SIZE); // 更新 imageData 的像素
  10. t1.stop();
  11. updateTime = t1.getElapsedTimeInMilliSec();
  12. /*
  13. * Copy data from System Memory to texture object. (将 imageData 的内容从内存拷贝到纹理当中)
  14. */
  15. t1.start();
  16. glBindTexture(GL_TEXTURE_2D, textureId);
  17. glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, IMAGE_WIDTH, IMAGE_HEIGHT, PIXEL_FORMAT, GL_UNSIGNED_BYTE, (GLvoid *)imageData);
  18. t1.stop();
  19. copyTime = t1.getElapsedTimeInMilliSec();
  20. }
  21. else
  22. {
  23. /*
  24. * Update buffer indices used in data upload & copy.
  25. *
  26. * "uploadIdx": index used to upload pixels to a Pixel Buffer Object.(上传像素至 uploadIdx 指定的 PBO)
  27. * "copyIdx": index used to copy pixels from a Pixel Buffer Object to a GPU texture.(拷贝 cpyIdx 指定的 PBO 的像素到 GPU 纹理)
  28. *
  29. * When (pboCount > 1), this will allow to perform(当 pboCount 数大于1时,就允许使用备用buffer来同时进行上传和拷贝)
  30. * simultaneous upload & copy, by using alternative buffers.
  31. * That is a good thing, unless the double buffering is being already
  32. * done somewhere else in the code.
  33. */
  34. static int copyIdx = 0;
  35. copyIdx = (copyIdx + 1) % pboCount;
  36. int uploadIdx = (copyIdx + 1) % pboCount;
  37. /*
  38. * Upload new data to a Pixel Buffer Object.
  39. */
  40. t1.start();
  41. glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pboIds[uploadIdx]); // Access the Pixel Buffer Object and bind it
  42. // pboMethod 表示不同的纹理流机制(Texture Stream methods)
  43. if (pboMethod == ORPHAN)
  44. {
  45. // GL_STREAM_DRAW 表示每次渲染都会更新该 PBO的像素数据
  46. // GL_DYNAMITC_DRAW 表示每帧都会更新该 Buffer
  47. // GL_STATIC_DRAW 表示几乎或从不更新该 Buffer
  48. glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, DATA_SIZE, NULL, GL_STREAM_DRAW_ARB);
  49. // 获得 PBO 的映射 Buffer 指针,以待写入操作
  50. GLubyte *ptr = (GLubyte *)glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY_ARB);
  51. if (NULL == ptr)
  52. {
  53. cout << "ERROR [displayCB] (glMapBufferARB): " << (char *)gluErrorString(glGetError()) << endl;
  54. return;
  55. }
  56. else
  57. {
  58. // update data directly on the mapped buffer(在映射的 Buffer 上直接更新 PBO 的像素数据)
  59. updatePixels(ptr, DATA_SIZE);
  60. // release pointer to mapping buffer(释放映射 Buffer 的指针)
  61. if (!glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB))
  62. {
  63. cout << "ERROR [displayCB] (glUnmapBufferARB): " << (char *)gluErrorString(glGetError()) << endl;
  64. }
  65. }
  66. }
  67. // 异步模式
  68. else if (pboMethod == UNSYNCH_ORPHAN || pboMethod == UNSYNCH_FENCES)
  69. {
  70. if (pboMethod == UNSYNCH_FENCES)
  71. {
  72. // 检查索引的pboFences 是否是同步对象(Sync Object)
  73. if (glIsSync(pboFences[uploadIdx]))
  74. {
  75. // 阻塞并 wait 同步对象,等待被 signal
  76. GLenum result = glClientWaitSync(pboFences[uploadIdx], 0, GL_TIMEOUT_IGNORED);
  77. switch (result)
  78. {
  79. case GL_ALREADY_SIGNALED:
  80. // Transfer was already done when trying to use buffer
  81. // (说明此时传输已经完成)
  82. cout << "DEBUG (glClientWaitSync): ALREADY_SIGNALED (good timing!) uploadIdx: " << uploadIdx << endl;
  83. break;
  84. case GL_CONDITION_SATISFIED:
  85. // This means that we had to wait for the fence to synchronize us after using all the buffers,
  86. // which implies that the GPU command queue is full and that we are GPU-bound (DMA transfers aren't fast enough).
  87. // (说明我们不得不等待 fence 的同步,即 我们所绑定的 GPU 命令队列已满[DMA传输还不够快])
  88. cout << "WARNING (glClientWaitSync): CONDITION_SATISFIED (had to wait for the sync) uploadIdx: " << uploadIdx << endl;
  89. break;
  90. case GL_TIMEOUT_EXPIRED:
  91. cout << "WARNING (glClientWaitSync): TIMEOUT_EXPIRED (DMA transfers are too slow!) uploadIdx: " << uploadIdx << endl;
  92. break;
  93. case GL_WAIT_FAILED:
  94. cout << "ERROR (glClientWaitSync): WAIT_FAILED: " << (char *)gluErrorString(glGetError()) << endl;
  95. break;
  96. }
  97. // 删除同步对象
  98. glDeleteSync(pboFences[uploadIdx]);
  99. pboFences[uploadIdx] = NULL;
  100. }
  101. }
  102. // 注意和 ORPHAN 的区别
  103. else if (pboMethod == UNSYNCH_ORPHAN)
  104. {
  105. // Buffer 需要重新指定
  106. glBufferData(GL_PIXEL_UNPACK_BUFFER, DATA_SIZE, NULL, GL_STREAM_DRAW); // Buffer re-specification (orphaning)
  107. }
  108. // 获得 PBO 的映射 Buffer 指针,以待写入操作
  109. GLubyte *ptr = (GLubyte *)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, DATA_SIZE, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
  110. if (NULL == ptr)
  111. {
  112. cout << "ERROR [displayCB] (glMapBufferRange): " << (char *)gluErrorString(glGetError()) << endl;
  113. return;
  114. }
  115. else
  116. {
  117. updatePixels(ptr, DATA_SIZE); // Update data directly on the mapped buffer(直接更新映射 Buffer 的数据)
  118. if (!glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER))
  119. {
  120. cout << "ERROR [displayCB] (glUnmapBuffer): " << (char *)gluErrorString(glGetError()) << endl;
  121. }
  122. }
  123. }
  124. else if (pboMethod == AMD)
  125. {
  126. // 同样要进行手动同步
  127. if (glIsSync(pboFences[uploadIdx]))
  128. {
  129. GLenum result = glClientWaitSync(pboFences[uploadIdx], 0, GL_TIMEOUT_IGNORED);
  130. switch (result)
  131. {
  132. case GL_ALREADY_SIGNALED:
  133. // Transfer was already done when trying to use buffer
  134. //cout << "DEBUG (glClientWaitSync): ALREADY_SIGNALED (good timing!) uploadIdx: " << uploadIdx << endl;
  135. break;
  136. case GL_CONDITION_SATISFIED:
  137. // This means that we had to wait for the fence to synchronize us after using all the buffers,
  138. // which implies that the GPU command queue is full and that we are GPU-bound (DMA transfers aren't fast enough).
  139. //cout << "WARNING (glClientWaitSync): CONDITION_SATISFIED (had to wait for the sync) uploadIdx: " << uploadIdx << endl;
  140. break;
  141. case GL_TIMEOUT_EXPIRED:
  142. cout << "WARNING (glClientWaitSync): TIMEOUT_EXPIRED (DMA transfers are too slow!) uploadIdx: " << uploadIdx << endl;
  143. break;
  144. case GL_WAIT_FAILED:
  145. cout << "ERROR (glClientWaitSync): WAIT_FAILED: " << (char *)gluErrorString(glGetError()) << endl;
  146. break;
  147. }
  148. glDeleteSync(pboFences[uploadIdx]);
  149. pboFences[uploadIdx] = NULL;
  150. }
  151. // alignedBuffers 存储的是手动分配的一块对齐过的内存指针
  152. updatePixels(alignedBuffers[uploadIdx], DATA_SIZE); // Update data directly on the mapped buffer
  153. }
  154. t1.stop();
  155. updateTime = t1.getElapsedTimeInMilliSec();
  156. /*
  157. * Protect each Pixel Buffer Object against being overwritten.(防止 PBO 被重复写入)
  158. *
  159. * Tipically the data upload will be slower than our main loop, so this
  160. * function will be called again before the previous frame was uploaded
  161. * and processed. The main bottleneck is the PCI bus transfer speed,
  162. * which limits how fast the DMA (System Memory --> VRAM) can work.
  163. *
  164. * 通常数据上传将会慢于主循环,意味着在先前的帧被上传处理之前,该方法将会被再次调用。
  165. * 主要的性能瓶颈在于 PCI 总线的传输速度, 它限制了 DMA 的传输速度(内存到显存)。
  166. *
  167. * OpenGL Sync Fences will block until the PBO is released.(GL 同步对象将会阻塞主线程,直到 PBO 被释放为止)
  168. */
  169. if (pboMethod == UNSYNCH_FENCES || pboMethod == AMD)
  170. {
  171. // 创建一个同步对象,并将其加入 GL 的命令流中(❤ 具体请参看第八版红宝书的 P589 第11章 Memory ❤)
  172. pboFences[uploadIdx] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
  173. }
  174. /*
  175. * Copy data from a Pixel Buffer Object to a GPU texture.
  176. * glTexSubImage2D() will copy pixels to the corresponding texture in the GPU.
  177. * 传输 PBO 中的数据到 GPU 纹理
  178. */
  179. t1.start();
  180. glBindTexture(GL_TEXTURE_2D, textureId); // Bind the texture
  181. glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pboIds[copyIdx]); // Access the Pixel Buffer Object and bind it
  182. // Use offset instead of pointer(由于使用了 PBO,所以传递的是偏移量)
  183. glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, IMAGE_WIDTH, IMAGE_HEIGHT, PIXEL_FORMAT, GL_UNSIGNED_BYTE, 0);
  184. t1.stop();
  185. copyTime = t1.getElapsedTimeInMilliSec();
  186. // it is good idea to release PBOs with ID 0 after use.
  187. // Once bound with 0, all pixel operations behave normal ways.
  188. glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
  189. }
  190. // clear buffer
  191. glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
  192. // save the initial ModelView matrix before modifying ModelView matrix
  193. glPushMatrix();
  194. // tramsform camera
  195. glTranslatef(0, 0, -cameraDistance);
  196. glRotatef(cameraAngleX, 1, 0, 0); // pitch
  197. glRotatef(cameraAngleY, 0, 1, 0); // heading
  198. // draw a point with texture
  199. glBindTexture(GL_TEXTURE_2D, textureId);
  200. glColor4f(1, 1, 1, 1);
  201. glBegin(GL_QUADS);
  202. glNormal3f(0, 0, 1);
  203. glTexCoord2f(0.0f, 0.0f);
  204. glVertex3f(-1.0f, -1.0f, 0.0f);
  205. glTexCoord2f(1.0f, 0.0f);
  206. glVertex3f(1.0f, -1.0f, 0.0f);
  207. glTexCoord2f(1.0f, 1.0f);
  208. glVertex3f(1.0f, 1.0f, 0.0f);
  209. glTexCoord2f(0.0f, 1.0f);
  210. glVertex3f(-1.0f, 1.0f, 0.0f);
  211. glEnd();
  212. // unbind texture
  213. glBindTexture(GL_TEXTURE_2D, 0);
  214. // draw info messages
  215. showInfo();
  216. //showTransferRate();
  217. printTransferRate();
  218. glPopMatrix();
  219. glutSwapBuffers();
  220. }


运行结果对比:


【未使用 PBO 的情况】



【Orphaning模式,PBO 个数 = 1】



【异步 Orphaning模式, PBO个数 = 1】



【Fences 的 同步模式, PBO个数 = 1】



【Orphaning模式, PBO个数=3】



【异步的 Orphaning 模式, PBO个数=3】



可见使用了 PBO 的确比未使用 PBO 性能要略优,但一味增加 PBO 的数量,并不能显著提高性能。

本例中 不同的 PBO 模式性能差别不大,但是 Orphan 模式的写法最简单,不需要自己手动同步和创建额外对齐的内存。


文章来源: panda1234lee.blog.csdn.net,作者:panda1234lee,版权归原作者所有,如需转载,请联系作者。

原文链接:panda1234lee.blog.csdn.net/article/details/53283007

【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。