NIVIDIA 硬解码学习3
今天主要学习的 Demo 是AppDecGL
This sample application illustrates the decoding of media file and display of decoded framesin a window. This is done by CUDA interop with OpenGL.
CUDA与GL互操作:
https://www.cnblogs.com/csuftzzk/p/cuda_opengl_interoperability.html
CUDA-GL互操作主要流程
- 在OpenGL里面初始化Buffer Object
- 在CUDA中注册OpenGL中的Buffer Object
- CUDA锁定资源,获取操作资源的指针,在CUDA核函数中进行处理
- CUDA释放资源,在OpenGL中使用Buffer Object
源码阅读
- 主体框架:
int Decode(CUcontext cuContext, char *szInFilePath) {
// 创建码流解析工具类
FFmpegDemuxer demuxer(szInFilePath);
// 创建解码器
NvDecoder dec(cuContext, demuxer.GetWidth(), demuxer.GetHeight(), true, FFmpeg2NvCodecId(demuxer.GetVideoCodec()));
// 创建GL渲染
FramePresenterGL presenter(cuContext, demuxer.GetWidth(), demuxer.GetHeight());
uint8_t *dpFrame = 0; // 定义数据源指针
int nPitch = 0;
int nVideoBytes = 0, nFrameReturned = 0, nFrame = 0;
uint8_t *pVideo = NULL;
uint8_t **ppFrame;
do {
demuxer.Demux(&pVideo, &nVideoBytes);//解析码流
dec.Decode(pVideo, nVideoBytes, &ppFrame, &nFrameReturned);// 解码,得到的数据在ppFrame中,格式为NV12.
if (!nFrame && nFrameReturned)
LOG(INFO) << dec.GetVideoInfo();
for (int i = 0; i < nFrameReturned; i++) {
presenter.GetDeviceFrameBuffer(&dpFrame, &nPitch); // 获得GL的数据源指针.
if (dec.GetBitDepth() == 8)
// 将解码获得是数据由NV12转成BGRA32 存放在dpFrame.
Nv12ToBgra32((uint8_t *)ppFrame[i], dec.GetWidth(), (uint8_t *)dpFrame, nPitch, dec.GetWidth(), dec.GetHeight());
else
P016ToBgra32((uint8_t *)ppFrame[i], 2 * dec.GetWidth(), (uint8_t *)dpFrame, nPitch, dec.GetWidth(), dec.GetHeight());
}
nFrame += nFrameReturned;
} while (nVideoBytes);
std::cout << "Total frame decoded: " << nFrame << std::endl;
return 0;
}
- 上述的代码能看到数据流的变化,但是看不到具体的互操作逻辑.
- 下面的是初始化GL渲染对象所具体做的工作:
// FramePresenterGL presenter(cuContext, demuxer.GetWidth(), demuxer.GetHeight())
FramePresenterGL(CUcontext cuContext, int nWidth, int nHeight) :cuContext(cuContext), nWidth(nWidth), nHeight(nHeight)
{
pthMessageLoop = new std::thread(ThreadProc, this);
while (!pInstance) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
- 由上面代码可见,开启了一个ThreaPoc线程.下面看一下这个.
void Run() {
int w = nWidth, h = nHeight;
double r = (std::max)(nWidth / 1280.0, nHeight / 720.0);
if (r > 1.0) {
w = (int)(nWidth / r);
h = (int)(nHeight / r);
}
int argc = 1;
const char *argv[] = {"dummy"};
/// glut初始化
glutInit(&argc, (char **)argv);
glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
glutInitWindowSize(w, h);
glutCreateWindow("FramePresenterGL");
glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE, GLUT_ACTION_CONTINUE_EXECUTION);
glViewport(0, 0, w, h);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
// glew初始化
glewInit();
glGenBuffersARB(1, &pbo);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, nWidth * nHeight * 4, NULL, GL_STREAM_DRAW_ARB);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
// 设置纹理
glGenTextures(1, &tex); // tex
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, tex);
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, nWidth, nHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);// 格式为RGBA8,最后一个为NULL,说明是从GPU里传数据
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
static const char *code =
"!!ARBfp1.0\n"
"TEX result.color, fragment.texcoord, texture[0], RECT; \n"
"END";
glGenProgramsARB(1, &shader);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader);
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(code), (GLubyte *)code);
glutDisplayFunc(DisplayProc);// glutDisplayFunc函数用于注册一个绘图函数,
glutCloseFunc(CloseWindowProc);
ck(cuCtxSetCurrent(cuContext));
ck(cuMemAlloc(&dpFrame, nWidth * nHeight * 4));
ck(cuMemsetD8(dpFrame, 0, nWidth * nHeight * 4));
pInstance = this;
while (!bStop) {
glutMainLoopEvent();
}
pInstance = NULL;
ck(cuMemFree(dpFrame));
glDeleteBuffersARB(1, &pbo);
glDeleteTextures(1, &tex);
glDeleteProgramsARB(1, &shader);
}
- 下面主要来看 一下
DisplayProc
绘图函数,这里涉及了CUDA-GL互操作
void Display(void) {
CUgraphicsResource cuResource;
// 将pbo在CUDA注册cuResource
ck(cuGraphicsGLRegisterBuffer(&cuResource, pbo, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
ck(cuGraphicsMapResources(1, &cuResource, 0));
CUdeviceptr dpBackBuffer;
size_t nSize = 0;
// 获得cuResource映射到CUDA中的指针dpBackBuffer
ck(cuGraphicsResourceGetMappedPointer(&dpBackBuffer, &nSize, cuResource));
// 数据拷贝!!!Device2Device
CUDA_MEMCPY2D m = { 0 };
m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
m.srcDevice = dpFrame; // source,解码获得的NV12转成BGRA8的指针
m.srcPitch = nWidth * 4;
m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
m.dstDevice = dpBackBuffer;// GL映射到CUDA中的指针
m.dstPitch = nSize / nHeight;
m.WidthInBytes = nWidth * 4;
m.Height = nHeight;
ck(cuMemcpy2DAsync(&m, 0)); // 异步拷贝
// 解除map
ck(cuGraphicsUnmapResources(1, &cuResource, 0));
// 解除注册
ck(cuGraphicsUnregisterResource(cuResource));
// 绘图
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, tex);
glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, nWidth, nHeight, GL_BGRA, GL_UNSIGNED_BYTE, 0);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader);
glEnable(GL_FRAGMENT_PROGRAM_ARB);
glDisable(GL_DEPTH_TEST);
glBegin(GL_QUADS);
glTexCoord2f(0, (GLfloat)nHeight);
glVertex2f(0, 0);
glTexCoord2f((GLfloat)nWidth, (GLfloat)nHeight);
glVertex2f(1, 0);
glTexCoord2f((GLfloat)nWidth, 0);
glVertex2f(1, 1);
glTexCoord2f(0, 0);
glVertex2f(0, 1);
glEnd();
glBindTexture(GL_TEXTURE_RECTANGLE_ARB, 0);
glDisable(GL_FRAGMENT_PROGRAM_ARB);
if (!strText.empty()) {
PrintText(0, strText, 0, 0, true);
}
glutSwapBuffers();
glutPostRedisplay();
}