opengl的缓冲区可以映射到CUDA的地址空间,当做global memory被访问。
3、映射VBO
4、使用
5、解除映射
6、解除注册
7、删除VBO
运行结果:
这样做可以使计算得到的数据直接可视化,提升速度。
因为数据存储在设备端,没有设备端到主机端的传输耗费,不论计算还是可是化都相当的快。
具体使用步骤:
1、创建VBO
- glGenBuffers(1, vbo);
- glBindBuffer(GL_ARRAY_BUFFER, *vbo);
- glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
- glBindBuffer(GL_ARRAY_BUFFER, 0);
2、注册VBO
- struct cudaGraphicsResource *cuda_vbo_resource;
- cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, *vbo, cudaGraphicsMapFlagsWriteDiscard);
3、映射VBO
- cudaGraphicsMapResources(1, &cuda_vbo_resource, 0);
- cudaGraphicsResourceGetMappedPointer((void**)&dptr, &num_bytes, cuda_vbo_resource);
4、使用
- launch_kernel(dptr, mesh_width, mesh_height, animTime);
5、解除映射
- cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0);
6、解除注册
- cudaGraphicsUnregisterResource(cuda_vbo_resource);
7、删除VBO
- glBindBuffer(GL_ARRAY_BUFFER, *vbo);
- glDeleteBuffers(1, vbo);
代码:
- //myVBO.cpp
- #include <gl/glew.h>
- #include <cuda_runtime.h>
- #include <cutil_inline.h>
- #include <cutil_gl_inline.h>
- #include <cutil_gl_error.h>
- #include <rendercheck_gl.h>
- unsigned int window_width = 512;
- unsigned int window_height = 512;
- unsigned int mesh_width = 256;
- unsigned int mesh_height= 256;
- unsigned int timer = 0;
- int animFlag = 1;
- float animTime = 0.0f;
- float animInc = 0.01f;
- GLuint vbo = NULL;
- float rotate_x = 0.0, rotate_y = 0.0;
- float translate_z = -3.0;
- struct cudaGraphicsResource *cuda_vbo_resource;
- extern "C" void launch_kernel(float4 *pos, unsigned int mesh_width, unsigned int mesh_height, float time);
- void createVBO(GLuint *vbo)
- {
- if (vbo)
- {
- glGenBuffers(1, vbo);
- glBindBuffer(GL_ARRAY_BUFFER, *vbo);
- unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
- glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
- glBindBuffer(GL_ARRAY_BUFFER, 0);
- cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, *vbo, cudaGraphicsMapFlagsWriteDiscard);
- }
- }
- void deleteVBO(GLuint *vbo)
- {
- if (vbo)
- {
- cudaGraphicsUnregisterResource(cuda_vbo_resource);
- glBindBuffer(GL_ARRAY_BUFFER, *vbo);
- glDeleteBuffers(1, vbo);
- *vbo = NULL;
- }
- }
- void cleanupCuda()
- {
- if(vbo) deleteVBO(&vbo);
- }
- void runCuda()
- {
- float4 *dptr = NULL;
- size_t num_bytes;
- cudaGraphicsMapResources(1, &cuda_vbo_resource, 0);
- cudaGraphicsResourceGetMappedPointer((void**)&dptr, &num_bytes, cuda_vbo_resource);
- launch_kernel(dptr, mesh_width, mesh_height, animTime);
- cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0);
- }
- void initCuda(int argc, char **argv)
- {
- if(cutCheckCmdLineFlag(argc, (const char**)argv, "device"))
- cutilGLDeviceInit(argc, argv);
- else
- cudaGLSetGLDevice(cutGetMaxGflopsDeviceId());
- createVBO(&vbo);
- atexit(cleanupCuda);
- runCuda();
- }
- void computeFPS()
- {
- static int fpsCount = 0;
- static int fpsLimit = 100;
- fpsCount++;
- if(fpsCount == fpsLimit)
- {
- char fps[256];
- float ifps = 1.0f / (cutGetAverageTimerValue(timer) / 1000.0f);
- sprintf(fps, "Cuda GL Interop Wrapper: %3.1f fps", ifps);
- glutSetWindowTitle(fps);
- fpsCount = 0;
- cutilCheckError(cutResetTimer(timer));
- }
- }
- void display()
- {
- runCuda();
- glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
- glMatrixMode(GL_MODELVIEW);
- glLoadIdentity();
- glTranslatef(0.0, 0.0, translate_z);
- glRotatef(rotate_x, 1.0, 0.0, 0.0);
- glRotatef(rotate_y, 0.0, 1.0, 0.0);
- glBindBuffer(GL_ARRAY_BUFFER, vbo);
- glVertexPointer(4, GL_FLOAT, 0, 0);
- glEnableClientState(GL_VERTEX_ARRAY);
- glColor3f(1.0, 0.0, 0.0);
- glDrawArrays(GL_POINTS, 0, mesh_width*mesh_height);
- glDisableClientState(GL_VERTEX_ARRAY);
- glutSwapBuffers();
- if (animFlag)
- {
- glutPostRedisplay();
- animTime += animInc;
- }
- }
- void fpsDisplay()
- {
- cutilCheckError(cutStartTimer(timer));
- display();
- cutilCheckError(cutStopTimer(timer));
- computeFPS();
- }
- CUTBoolean initGL(int argc, char **argv)
- {
- glutInit(&argc, argv);
- glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
- glutInitWindowSize(window_width, window_height);
- glutCreateWindow("Cuda GL Interop Demo (adapted from NVDIA's simpleGL)");
- glutDisplayFunc(fpsDisplay);
- glewInit();
- if(!glewIsSupported("GL_VERSION_2_0"))
- {
- fprintf(stderr, "ERROR: Support for necessary OpengGL extensions missing.");
- return CUTFalse;
- }
- glClearColor(0.0, 0.0, 0.0, 1.0);
- glDisable(GL_DEPTH_TEST);
- glViewport(0, 0, window_width, window_height);
- glMatrixMode(GL_PROJECTION);
- glLoadIdentity();
- gluPerspective(60.0, (GLfloat)window_width / (GLfloat)window_height, 0.1, 10.0);
- return CUTTrue;
- }
- int main(int argc, char **argv)
- {
- cutilCheckError(cutCreateTimer(&timer));
- if(CUTFalse == initGL(argc, argv))
- return CUTFalse;
- initCuda(argc, argv);
- CUT_CHECK_ERROR_GL();
- glutDisplayFunc(fpsDisplay);
- glutMainLoop();
- cudaThreadExit(); ///
- cutilExit(argc, argv); //
- }
- //kernelVBO.cu
- #include <cuda.h>
- __global__ void kernel(float4 *pos, unsigned int width, unsigned int height, float time)
- {
- unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
- unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
- float u = x / (float)width;
- float v = y / (float)height;
- u = u * 2.0f - 1.0f;
- v = v * 2.0f - 1.0f;
- float freq = 4.0f;
- float w = sinf(u*freq + time) * cosf(v*freq + time) * 0.5f;
- pos[y*width+x] = make_float4(u, w, v, 1.0f);
- }
- extern "C" void launch_kernel(float4 *pos, unsigned int mesh_width, unsigned int mesh_height, float time)
- {
- dim3 block(8, 8, 1);
- dim3 grid(mesh_width/block.x, mesh_height/block.y, 1);
- kernel<<<grid, block>>>(pos, mesh_width, mesh_height, time);
- cudaThreadSynchronize();
- }
运行结果:
参考自CUDA SDK