如果算法有多个步骤,一个步骤的计算结果是下一个步骤的输入数据,那么就可以使用乒乓技术(Ping Pong)。有些缓存在前一个步骤可能是作为输入的缓存,然后下一个步骤中,这个输入的缓存又当做输出的缓存,从这个描述中大家可以看到,这种技术适合在循环里面实现,而且缓存的利用效率也大大提升了。关于乒乓技术的更详细的讲解,请参考下面的链接。
这篇博客的实现目的就是简单的使用乒乓技术,其原理见下图,我把一幅图像加上一个权值矩阵,当然这个权值矩阵也被处理为纹理,放在一个纹理缓存中,然后两者相加,放到第三个缓存中,然后在把这个缓存中的数据和权值矩阵相加,放到原来的图像缓存中,这样循环往复3次。
下面是代码:
//
#include "stdafx.h"
#include<windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <GL/glew.h>
#include <GL/glut.h>
#include "savePicture.h"
//纹理的编号
static GLuint xTexID;
static GLuint yTexID[2];
// ping pong management vars
int writeTex = 0;
int readTex = 1;
GLenum attachmentpoints[] = { GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT };
static GLuint fb; //FBO编号
GLhandleARB progHandle = 0; //GLSL的全局句柄
#define printOpenGLError() printOglError(__FILE__, __LINE__)
const GLint imgHeight = 512, imgWidth = 512;
static GLfloat dataY[imgWidth*imgHeight*4]; //权值矩阵1
GLfloat outData[imgWidth*imgHeight]; //输出数据
static GLubyte pData[imgHeight * imgWidth]; //存放最终的图像的byte数据
const char *vShader = {
//"#version 110 \n "
"void main()"
"{"
"gl_TexCoord[0] = gl_MultiTexCoord0;"
"gl_TexCoord[1] = gl_MultiTexCoord1;"
"gl_Position = ftransform();"
"}"
};
//gl_Position = gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex;
const char *fShader = {
//"#version 110 \n "
"#extension GL_ARB_texture_rectangle : enable \n"
"uniform sampler2DRect LenaTexture; \n"
"uniform sampler2DRect textureY; \n"
"void main() \n"
"{ \n"
" vec2 coord = vec2(gl_TexCoord[0].s,512.0-gl_TexCoord[0].t); \n"
"vec4 texColor = texture2DRect(LenaTexture,coord ); \n"
"vec4 texYValue = texture2DRect(textureY, gl_TexCoord[1].st ); \n "
"texColor.yzw = vec3(0.0,0.0,0.0); \n"
"texYValue.yzw = vec3(0.0,0.0,0.0); \n"
" gl_FragColor = texColor + texYValue; \n"
"} \n"
};
//检查FBO的状态,返回是否有错误
bool checkFramebufferStatus() {
GLenum status;
status = (GLenum) glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
switch(status) {
case GL_FRAMEBUFFER_COMPLETE_EXT:
return true;
case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT:
printf("Framebuffer incomplete, incomplete attachment\n");
return false;
case GL_FRAMEBUFFER_UNSUPPORTED_EXT:
printf("Unsupported framebuffer format\n");
return false;
case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT:
printf("Framebuffer incomplete, missing attachment\n");
return false;
case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_EXT:
printf("Framebuffer incomplete, attached images must have same dimensions\n");
return false;
case GL_FRAMEBUFFER_INCOMPLETE_FORMATS_EXT:
printf("Framebuffer incomplete, attached images must have same format\n");
return false;
case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT:
printf("Framebuffer incomplete, missing draw buffer\n");
return false;
case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER_EXT:
printf("Framebuffer incomplete, missing read buffer\n");
return false;
}
return false;
}
int printOglError(char *file, int line)
{
GLenum glErr;
int retCode = 0;
glErr = glGetError();
while (glErr != GL_NO_ERROR)
{
printf("glError in file %s @ line %d: %s\n", file, line, gluErrorString(glErr));
retCode = 1;
glErr = glGetError();
}
return retCode;
}
void printInfoLog(GLhandleARB obj)
{
int infologLength = 0;
int charsWritten = 0;
GLcharARB *infoLog;
printOpenGLError();
glGetObjectParameterivARB(obj, GL_OBJECT_INFO_LOG_LENGTH_ARB, &infologLength);
printOpenGLError();
if(infologLength > 0)
{
infoLog = (GLcharARB*)malloc(infologLength);
if(infoLog == NULL)
{
printf("ERROR: Could not allocate InfoLog buffer\n");
exit(1);
}
glGetInfoLogARB(obj,infologLength,&charsWritten,infoLog);
printf("InfoLog:\n%s\n\n",infoLog);
free(infoLog);
}
printOpenGLError();
}
/*************************************************************
function name: initShaders
input: 1. const char *vShaderCode,
2. const char *fShaderCode,
output: 1. -1 compile error
2. -2 link error
3. progHandle
description:
*****************************************************************/
GLhandleARB initShaders( const char *vShaderCode, const char *fShaderCode ) //改为初始化的函数
{
GLhandleARB vertHandle, fragHandle, progHandle; //对象句柄
GLint vertCompiled, fragCompiled; //状态值
GLint linked;
//创建顶点着色器对象和片元着色器对象
vertHandle = glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB);
fragHandle = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB);
//将源代码字符串加载到着色器中
glShaderSource( vertHandle, 1, &vShaderCode, NULL );
glShaderSource( fragHandle, 1, &fShaderCode, NULL );
printf("编译码块顶点着色器并打印编译器日志文件:\n");
//编译码块顶点着色器并打印编译器日志文件
glCompileShaderARB(vertHandle);
printOpenGLError(); //检查opengl错误
glGetObjectParameterivARB(vertHandle,GL_OBJECT_COMPILE_STATUS_ARB, &vertCompiled);
printInfoLog(vertHandle);
printf("编译码块片元着色器并打印编译器日志文件:\n");
//编译码块片元着色器并打印编译器日志文件
glCompileShaderARB(fragHandle);
printOpenGLError(); //检查opengl错误
glGetObjectParameterivARB(fragHandle,GL_OBJECT_COMPILE_STATUS_ARB, &fragCompiled);
printInfoLog(fragHandle);
if(!vertCompiled || !fragCompiled)
return -1;
//创建一个程序对象并附加两个编译好的着色器
progHandle = glCreateProgramObjectARB();
glAttachObjectARB(progHandle, vertHandle);
glAttachObjectARB(progHandle, fragHandle);
printf("链接程序对象并打印信息日志:\n");
//链接程序对象并打印信息日志
glLinkProgramARB(progHandle);
printOpenGLError(); //检查opengl错误
glGetObjectParameterivARB(progHandle, GL_OBJECT_LINK_STATUS_ARB, &linked);
printInfoLog(progHandle);
if(!linked)
return -2;
//将程序对象安装为当前状态的一部分
glUseProgramObjectARB(progHandle); //改为运行的函数,用于测试该算法的时间
return progHandle;
}
//交换标示位,这是乒乓技术的一个实现方式
void swap(void) {
if (writeTex == 0) {
writeTex = 1;
readTex = 0;
} else {
writeTex = 0;
readTex = 1;
}
}
int LoadBmpAsTexture(char *textureFilePath, GLuint &texID )
{
unsigned char *pTexData = NULL;
long bitCnt = 0;
long iw =0;
long ih = 0;
long status = LoadBMP( textureFilePath, &pTexData,&iw, &ih, &bitCnt );
printf("status: %d \n", status);
glGenTextures( 1, &texID );
glBindTexture(GL_TEXTURE_RECTANGLE_ARB,texID);
//当卷积内核超过了图像边界时使用图像边缘的像素值
glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER );
glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER );
//纹理过滤的方式不应该设置为线性插值
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB,GL_TEXTURE_MAG_FILTER,GL_NEAREST);
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB,GL_TEXTURE_MIN_FILTER,GL_NEAREST);
glTexImage2D( GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGB,
imgWidth, imgHeight, 0,GL_RGB,GL_UNSIGNED_BYTE,pTexData );
glTexEnvi( GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_REPLACE );
return 0;
}
/*定义自己的图像的权值,这个权值矩阵与图像大小相同,
里面的数值加到原图像上去,然后再进行乒乓技术 */
void MakeCoefTexImage1()
{
//初始化权值1
for ( int i = 0; i < imgHeight * imgHeight * 4; ++i )
dataY[ i] = 0.05;
}
//对于即将作为FBO的纹理做初始化
void SetFBOTexture( GLfloat data[], GLuint &texID )
{
glGenTextures( 1, &texID );
glBindTexture( GL_TEXTURE_RECTANGLE_ARB, texID );
//当卷积内核超过了图像边界时使用图像边缘的像素值
glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER );
glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER );
//纹理过滤的方式不应该设置为线性插值
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB,GL_TEXTURE_MAG_FILTER,GL_NEAREST);
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB,GL_TEXTURE_MIN_FILTER,GL_NEAREST);
//这里面必须为GL_RGBA,如果为灰度,就会生成不了FBO
glTexImage2D( GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA,
imgWidth, imgHeight, 0,GL_RGBA,GL_FLOAT,data );
glTexEnvi( GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_REPLACE );
}
void init()
{
glShadeModel( GL_FLAT );
glClearColor( 0.0f, 0.0f, 0.0f, 0.0f );
glViewport(0,0, imgWidth, imgHeight );
glEnable ( GL_DEPTH_TEST );
}
void initFBO()
{
//创建FBO,准备屏幕外帧缓存
glGenFramebuffersEXT( 1, &fb );
//绑定屏幕外帧缓存,即避开了窗口系统默认的渲染目标
glBindFramebufferEXT ( GL_FRAMEBUFFER_EXT, fb );
}
void createTextures( void )
{
MakeCoefTexImage1();
SetFBOTexture( dataY, yTexID[readTex] );
SetFBOTexture( outData, yTexID[writeTex] );
printf("ytexID %d \n", yTexID[1]);
LoadBmpAsTexture("texture.bmp",xTexID );
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
}
void readFromFBO( GLfloat *data )
{
glReadBuffer(attachmentpoints[readTex]);
glReadPixels( 0, 0, imgWidth, imgHeight,
GL_LUMINANCE, GL_FLOAT, data );
}
void PerformComputation()
{
glClear ( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, attachmentpoints[writeTex],
GL_TEXTURE_RECTANGLE_ARB, yTexID[writeTex], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, attachmentpoints[readTex],
GL_TEXTURE_RECTANGLE_ARB, yTexID[readTex], 0);
if (!checkFramebufferStatus())
printf("glFramebufferTexture2DEXT():\t [FAIL]\n");
//glUseProgram(glslProgram);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB,xTexID);
glUniform1i( glGetUniformLocation( progHandle, "LenaTexture" ), 0 ); //0 是纹理的句柄
//乒乓技术的实现
for ( int i = 0; i < 4; ++i )
{
glDrawBuffer (attachmentpoints[writeTex]);
// enable texture y_old (read-only)
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB,yTexID[readTex]);
glUniform1i( glGetUniformLocation( progHandle, "textureY" ), 1 );
glPolygonMode(GL_FRONT,GL_FILL);
glBegin( GL_QUADS );
glTexCoord2f( 0.0, 0.0);
glVertex2f( -1.0, -1.0);
glTexCoord2f(0.0, 512.0 );
glVertex2f( -1.0, 1.0 );
glTexCoord2f(512.0, 512.0 );
glVertex2f( 1.0, 1.0 );
glTexCoord2f(512.0, 0.0 );
glVertex2f ( 1.0, -1.0 );
glEnd( );
swap();
}
}
int _tmain(int argc, char* argv[])
{
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_SINGLE| GLUT_LUMINANCE);
glutInitWindowSize ( imgWidth, imgHeight);
glutInitWindowPosition( 100, 100 );
glutCreateWindow(" 着色器 纹理 ");
glewInit();
initFBO();
init();
progHandle = initShaders(vShader, fShader);
if ( progHandle <= 0 )
printf("Failed to run shader.\n");
createTextures();
PerformComputation();
readFromFBO( outData );
for ( int i =0; i < imgHeight * imgWidth; ++i )
{
pData[i] = (char)(outData[i] * 255);
}
asrSaveBMP("result.bmp",pData,imgWidth,imgHeight,8);
return 0;
}
处理的原图
处理后的图片
reference:
http://dev.gameres.com/Program/Visual/3D/GPGPU_math_Tutorial.html#feedback2
http://www.mathematik.uni-dortmund.de/~goeddeke/gpgpu/tutorial.html
《GPGPU编程技术——从GLSL、CUDA到OpenCL》 仇德元编著