cuda与opengl互操作之VBO

最新推荐文章于 2022-04-18 16:09:37 发布

wozhengtao

最新推荐文章于 2022-04-18 16:09:37 发布

阅读量1k

点赞数

opengl的缓冲区可以映射到CUDA的地址空间，当做global memory被访问。

这样做可以使计算得到的数据直接可视化，提升速度。

因为数据存储在设备端，没有设备端到主机端的传输耗费，不论计算还是可是化都相当的快。

具体使用步骤：
1、创建VBO

[cpp]view plaincopy 
   
 glGenBuffers(1, vbo);  
 glBindBuffer(GL_ARRAY_BUFFER, *vbo);  
 glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);  
 glBindBuffer(GL_ARRAY_BUFFER, 0);  

2、注册VBO

[cpp]view plaincopy 
   
 struct cudaGraphicsResource *cuda_vbo_resource;  
 cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, *vbo, cudaGraphicsMapFlagsWriteDiscard);  

3、映射VBO

[cpp]view plaincopy 
   
 cudaGraphicsMapResources(1, &cuda_vbo_resource, 0);  
 cudaGraphicsResourceGetMappedPointer((void**)&dptr, &num_bytes, cuda_vbo_resource);  

4、使用

[cpp]view plaincopy 
   
 launch_kernel(dptr, mesh_width, mesh_height, animTime);  

5、解除映射

[cpp]view plaincopy 
   
 cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0);  

6、解除注册

[cpp]view plaincopy 
   
 cudaGraphicsUnregisterResource(cuda_vbo_resource);  

7、删除VBO

[cpp]view plaincopy 
   
 glBindBuffer(GL_ARRAY_BUFFER, *vbo);  
 glDeleteBuffers(1, vbo);  

代码：

[cpp]view plaincopy 
   
 //myVBO.cpp  
 #include <gl/glew.h>  
 #include <cuda_runtime.h>  
 #include <cutil_inline.h>  
 #include <cutil_gl_inline.h>  
 #include <cutil_gl_error.h>  
 #include <rendercheck_gl.h>  
   
   
 unsigned int window_width = 512;  
 unsigned int window_height = 512;  
 unsigned int mesh_width = 256;  
 unsigned int mesh_height= 256;  
   
 unsigned int timer = 0;  
   
 int animFlag = 1;  
 float animTime = 0.0f;  
 float animInc  = 0.01f;  
   
 GLuint vbo = NULL;  
   
 float rotate_x = 0.0, rotate_y = 0.0;  
 float translate_z = -3.0;  
   
 struct cudaGraphicsResource *cuda_vbo_resource;  
   
 extern "C" void launch_kernel(float4 *pos, unsigned int mesh_width, unsigned int mesh_height, float time);  
   
 void createVBO(GLuint *vbo)  
 {  
     if (vbo)  
     {  
         glGenBuffers(1, vbo);  
         glBindBuffer(GL_ARRAY_BUFFER, *vbo);  
   
         unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);  
         glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);  
   
         glBindBuffer(GL_ARRAY_BUFFER, 0);     
   
         cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, *vbo, cudaGraphicsMapFlagsWriteDiscard);  
     }  
 }  
   
 void deleteVBO(GLuint *vbo)  
 {  
     if (vbo)  
     {  
         cudaGraphicsUnregisterResource(cuda_vbo_resource);  
   
         glBindBuffer(GL_ARRAY_BUFFER, *vbo);  
         glDeleteBuffers(1, vbo);  
   
         *vbo = NULL;  
     }  
 }  
   
 void cleanupCuda()  
 {  
     if(vbo) deleteVBO(&vbo);  
 }  
   
 void runCuda()  
 {  
     float4 *dptr = NULL;  
     size_t num_bytes;  
   
     cudaGraphicsMapResources(1, &cuda_vbo_resource, 0);  
     cudaGraphicsResourceGetMappedPointer((void**)&dptr, &num_bytes, cuda_vbo_resource);  
   
     launch_kernel(dptr, mesh_width, mesh_height, animTime);  
   
     cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0);  
 }  
   
 void initCuda(int argc, char **argv)  
 {  
     if(cutCheckCmdLineFlag(argc, (const char**)argv, "device"))  
         cutilGLDeviceInit(argc, argv);  
     else  
         cudaGLSetGLDevice(cutGetMaxGflopsDeviceId());  
   
     createVBO(&vbo);  
   
     atexit(cleanupCuda);  
   
     runCuda();  
 }  
   
 void computeFPS()  
 {  
     static int fpsCount = 0;  
     static int fpsLimit = 100;  
   
     fpsCount++;  
   
     if(fpsCount == fpsLimit)  
     {  
         char fps[256];  
         float ifps = 1.0f / (cutGetAverageTimerValue(timer) / 1000.0f);  
         sprintf(fps, "Cuda GL Interop Wrapper: %3.1f fps", ifps);  
   
         glutSetWindowTitle(fps);  
         fpsCount = 0;  
   
         cutilCheckError(cutResetTimer(timer));    
     }  
 }  
   
 void display()  
 {  
     runCuda();  
   
     glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);  
   
     glMatrixMode(GL_MODELVIEW);  
     glLoadIdentity();  
     glTranslatef(0.0, 0.0, translate_z);  
     glRotatef(rotate_x, 1.0, 0.0, 0.0);  
     glRotatef(rotate_y, 0.0, 1.0, 0.0);  
   
     glBindBuffer(GL_ARRAY_BUFFER, vbo);  
     glVertexPointer(4, GL_FLOAT, 0, 0);  
   
     glEnableClientState(GL_VERTEX_ARRAY);  
     glColor3f(1.0, 0.0, 0.0);  
     glDrawArrays(GL_POINTS, 0, mesh_width*mesh_height);  
     glDisableClientState(GL_VERTEX_ARRAY);  
   
     glutSwapBuffers();  
   
     if (animFlag)  
     {  
         glutPostRedisplay();  
         animTime += animInc;  
     }  
   
   
 }  
   
 void fpsDisplay()  
 {  
     cutilCheckError(cutStartTimer(timer));  
   
     display();  
   
     cutilCheckError(cutStopTimer(timer));  
   
     computeFPS();  
 }  
   
   
   
 CUTBoolean initGL(int argc, char **argv)  
 {  
     glutInit(&argc, argv);  
     glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);  
     glutInitWindowSize(window_width, window_height);  
     glutCreateWindow("Cuda GL Interop Demo (adapted from NVDIA's simpleGL)");  
   
     glutDisplayFunc(fpsDisplay);  
   
     glewInit();  
     if(!glewIsSupported("GL_VERSION_2_0"))  
     {  
         fprintf(stderr, "ERROR: Support for necessary OpengGL extensions missing.");  
         return CUTFalse;  
     }  
   
     glClearColor(0.0, 0.0, 0.0, 1.0);  
     glDisable(GL_DEPTH_TEST);  
   
     glViewport(0, 0, window_width, window_height);  
   
     glMatrixMode(GL_PROJECTION);  
     glLoadIdentity();  
     gluPerspective(60.0, (GLfloat)window_width / (GLfloat)window_height, 0.1, 10.0);  
   
     return CUTTrue;  
 }  
   
   
 int main(int argc, char **argv)  
 {  
     cutilCheckError(cutCreateTimer(&timer));  
   
     if(CUTFalse == initGL(argc, argv))  
         return CUTFalse;  
   
     initCuda(argc, argv);  
     CUT_CHECK_ERROR_GL();  
   
     glutDisplayFunc(fpsDisplay);  
       
   
     glutMainLoop();  
   
     cudaThreadExit();           ///  
     cutilExit(argc, argv);      //  
 }  

[cpp]view plaincopy 
   
 //kernelVBO.cu  
   
 #include <cuda.h>  
   
   
 __global__ void kernel(float4 *pos, unsigned int width, unsigned int height, float time)  
 {  
     unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;  
     unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;  
   
     float u = x / (float)width;  
     float v = y / (float)height;  
   
     u = u * 2.0f - 1.0f;  
     v = v * 2.0f - 1.0f;  
   
     float freq = 4.0f;  
   
     float w = sinf(u*freq + time) * cosf(v*freq + time) * 0.5f;  
   
     pos[y*width+x] = make_float4(u, w, v, 1.0f);  
   
 }  
   
 extern "C" void launch_kernel(float4 *pos, unsigned int mesh_width, unsigned int mesh_height, float time)  
 {  
     dim3 block(8, 8, 1);  
     dim3 grid(mesh_width/block.x, mesh_height/block.y, 1);  
     kernel<<<grid, block>>>(pos, mesh_width, mesh_height, time);  
   
     cudaThreadSynchronize();  
 }