在Pudn.com 复制了一个 ”helloGPGPU_GLSL.cpp“ ,并在《GPU通用计算——从Hello GPGPU开始》一文中有中文注解。
该程序中 使用了一个纹理,用来保存着色器中间数据并再传着色器。
这里也用1个纹理来回传中间结果相加,其它7个纹理用于卷积。
生成一个空数据纹理:
//加法纹理
void initadd()
{
float * TexData= new float[imgWidth * imgHeight];
memset(TexData, 0,imgWidth * imgHeight*sizeof(float));//清零
if(texture == 0)
{
//设置输出纹理的参数
glGenTextures( 1, &texture );
glBindTexture( GL_TEXTURE_RECTANGLE_ARB, texture );
glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER );
glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER );
//纹理过滤的方式不应该设置为线性插值
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB,GL_TEXTURE_MAG_FILTER,GL_NEAREST);
glTexParameteri(GL_TEXTURE_RECTANGLE_ARB,GL_TEXTURE_MIN_FILTER,GL_NEAREST);
//没有给输出的纹理数据,等待程序进行赋值
//glTexImage2D( GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA, imgWidth, imgHeight, 0,GL_RGBA,GL_FLOAT,0 );
glTexImage2D( GL_TEXTURE_RECTANGLE_ARB, 0, GL_R32F, imgWidth, imgHeight, 0,GL_RED,GL_FLOAT,TexData );
glTexEnvi( GL_TEXTURE_ENV,GL_TEXTURE_ENV_MODE,GL_REPLACE );
}
else
{
glBindTexture( GL_TEXTURE_RECTANGLE_ARB, texture );
glTexImage2D( GL_TEXTURE_RECTANGLE_ARB, 0, GL_R32F, imgWidth, imgHeight, 0,GL_RED,GL_FLOAT,TexData );
}
delete []TexData; TexData=NULL;
}
结果回传:
//结果转到 texture 中
glBindTexture( GL_TEXTURE_RECTANGLE_ARB, texture );
glCopyTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, 0, 0, imgWidth, imgHeight);
着色器:
//卷积着色器 convolution.h
#version 400 compatibility
//层数据 8纹理
uniform sampler2DRect Texture0;
uniform sampler2DRect Texture1;
uniform sampler2DRect Texture2;
uniform sampler2DRect Texture3;
uniform sampler2DRect Texture4;
uniform sampler2DRect Texture5;
uniform sampler2DRect Texture6;
uniform sampler2DRect Texture7;
//核宽
uniform int kw;
//核数量
uniform int kNum;
//9x9的核大小 , 5x5则只使用前面部分
// uniform float kernel[81];
uniform float kernel[648];
//偏置
uniform float bias;
//激活
uniform int active;
float relu(float i){
if (i > 0.0){
return i;
} else {
return 0.0;
}
}
vec4 vec4relu(vec4 v){
return vec4(relu(v[0]), relu(v[1]), relu(v[2]), relu(v[3]));
}
void main()
{
vec2 pos = gl_TexCoord[0].st; //坐标
vec4 fSum = vec4(0.0,0.0,0.0,0.0); //卷积值
ivec2 vecSize = textureSize(Texture0); //层数据宽高
int index = 0;
int coreSize2=kw/2; //半核宽
for (float i = pos.x - coreSize2; i < pos.x + coreSize2 + 1.0; i += 1.0) //对准核心
for (float j = pos.y - coreSize2; j < pos.y + coreSize2 + 1.0; j += 1.0)
{
if (i >=0 && j >= 0 && i < vecSize.x && j < vecSize.y) //相当于边界以 0 填充
{
vec4 currentColor = texture2DRect(Texture0,vec2(i,j));
fSum += currentColor*kernel[index]; //积和
}
index++; //遍历核
}
if (kNum>1)
{
for (float i = pos.x - coreSize2; i < pos.x + coreSize2 + 1.0; i += 1.0) //对准核心
for (float j = pos.y - coreSize2; j < pos.y + coreSize2 + 1.0; j += 1.0)
{
if (i >=0 && j >= 0 && i < vecSize.x && j < vecSize.y) //相当于边界以 0 填充
{
vec4 currentColor = texture2DRect(Texture1,vec2(i,j));
fSum += currentColor*kernel[index]; //积和
}
index++; //遍历核
}
}
if (kNum>2)
{
for (float i = pos.x - coreSize2; i < pos.x + coreSize2 + 1.0; i += 1.0) //对准核心
for (float j = pos.y - coreSize2; j < pos.y + coreSize2 + 1.0; j += 1.0)
{
if (i >=0 && j >= 0 && i < vecSize.x && j < vecSize.y) //相当于边界以 0 填充
{
vec4 currentColor = texture2DRect(Texture2,vec2(i,j));
fSum += currentColor*kernel[index]; //积和
}
index++; //遍历核
}
}
if (kNum>3)
{
for (float i = pos.x - coreSize2; i < pos.x + coreSize2 + 1.0; i += 1.0) //对准核心
for (float j = pos.y - coreSize2; j < pos.y + coreSize2 + 1.0; j += 1.0)
{
if (i >=0 && j >= 0 && i < vecSize.x && j < vecSize.y) //相当于边界以 0 填充
{
vec4 currentColor = texture2DRect(Texture3,vec2(i,j));
fSum += currentColor*kernel[index]; //积和
}
index++; //遍历核
}
}
if (kNum>4)
{
for (float i = pos.x - coreSize2; i < pos.x + coreSize2 + 1.0; i += 1.0) //对准核心
for (float j = pos.y - coreSize2; j < pos.y + coreSize2 + 1.0; j += 1.0)
{
if (i >=0 && j >= 0 && i < vecSize.x && j < vecSize.y) //相当于边界以 0 填充
{
vec4 currentColor = texture2DRect(Texture4,vec2(i,j));
fSum += currentColor*kernel[index]; //积和
}
index++; //遍历核
}
}
if (kNum>5)
{
for (float i = pos.x - coreSize2; i < pos.x + coreSize2 + 1.0; i += 1.0) //对准核心
for (float j = pos.y - coreSize2; j < pos.y + coreSize2 + 1.0; j += 1.0)
{
if (i >=0 && j >= 0 && i < vecSize.x && j < vecSize.y) //相当于边界以 0 填充
{
vec4 currentColor = texture2DRect(Texture5,vec2(i,j));
fSum += currentColor*kernel[index]; //积和
}
index++; //遍历核
}
}
if (kNum>6)
{
for (float i = pos.x - coreSize2; i < pos.x + coreSize2 + 1.0; i += 1.0) //对准核心
for (float j = pos.y - coreSize2; j < pos.y + coreSize2 + 1.0; j += 1.0)
{
if (i >=0 && j >= 0 && i < vecSize.x && j < vecSize.y) //相当于边界以 0 填充
{
vec4 currentColor = texture2DRect(Texture6,vec2(i,j));
fSum += currentColor*kernel[index]; //积和
}
index++; //遍历核
}
}
if (kNum>7)
{
for (float i = pos.x - coreSize2; i < pos.x + coreSize2 + 1.0; i += 1.0) //对准核心
for (float j = pos.y - coreSize2; j < pos.y + coreSize2 + 1.0; j += 1.0)
{
if (i >=0 && j >= 0 && i < vecSize.x && j < vecSize.y) //相当于边界以 0 填充
{
vec4 currentColor = texture2DRect(Texture7,vec2(i,j));
fSum += currentColor*kernel[index]; //积和
}
index++; //遍历核
}
}
//加法 加上 上次结果
if (kNum==1)
fSum += texture2DRect(Texture1,vec2(pos.x,pos.y));
else if (kNum==2)
fSum += texture2DRect(Texture2,vec2(pos.x,pos.y));
else if (kNum==3)
fSum += texture2DRect(Texture3,vec2(pos.x,pos.y));
else if (kNum==4)
fSum += texture2DRect(Texture4,vec2(pos.x,pos.y));
else if (kNum==5)
fSum += texture2DRect(Texture5,vec2(pos.x,pos.y));
else if (kNum==6)
fSum += texture2DRect(Texture6,vec2(pos.x,pos.y));
else if (kNum==7)
fSum += texture2DRect(Texture7,vec2(pos.x,pos.y));
fSum += bias; //加偏置
//激活
if(active==1)
fSum = vec4relu(fSum);
gl_FragColor = fSum;
}
层卷积:
//核数据,核宽,输入维度,输出维度,偏置数据,输入数据,输出数据,是否激活
//核大小:kw * kw * 输入维度 * 输出维度。
//输入数据大小:wh * 输入维度。 输出数据大小:wh * 输出维度。
//加法结果在fbo中,中间不用读出,relu也在GLSL中完成
void conv_GLSL_层_add7(float* kernel,int kw,int inNum,int outNum,float *bias,float *inData, float * &outData,bool active=true)
{
//可以不是7个
#define CONV_NUM 7
//7个纹理用于卷积,1个纹理用于加法
int p=0;
int q=0;
//printf("核宽:%d,输入维度:%d,输出维度:%d\n",kw,inNum,outNum);
//加载全部输入通道到多纹理
init(inData,inNum);
initadd();
//最大 7个9x9 核
float kernel648[648]={0.0f};
clock_t start_t, end_t;//计算时间
double total_t;
start_t = clock();
int wh=imgHeight * imgWidth;
outData=new float[wh*outNum];
memset(outData, 0, wh*outNum*sizeof(float));//清零
float * _0Data= new float[imgWidth * imgHeight];
memset(_0Data, 0,imgWidth * imgHeight*sizeof(float));//清零
float *out=outData;
float *in=inData;
float * kernel_s=kernel;//当前核
for(int k=0;k<outNum;++k)
{
//一次实现7个卷积
int kNum=(inNum<CONV_NUM)?inNum:CONV_NUM;
for(int i=0;i<inNum;i+=CONV_NUM)
{
//最多7核
if((kNum==CONV_NUM) && (i+CONV_NUM>inNum))//余数
kNum= inNum % CONV_NUM;
//if(kNum==CONV_NUM)
// printf(".");
//else
// printf("%d ",kNum);
memcpy(kernel648, kernel_s, kNum*kw*kw*sizeof(float));
kernel_s+=kw*kw*kNum;
if ( progHandle <= 0 )
printf("Failed to run shader.\n");
else{
char txt[255];
//传送纹理 Texture0--- Texture7
for(int j=0;j<kNum;++j)
{
sprintf_s(txt,255,"Texture%d\0",j);
glActiveTexture( GL_TEXTURE0 +j);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB,textureN[i+j]);
glUniform1i( glGetUniformLocation( progHandle, txt ), j);
//printf("%s.\n",txt);
}
//激活 纹理
sprintf_s(txt,255,"Texture%d\0",kNum);
glActiveTexture( GL_TEXTURE0 +kNum);
glBindTexture(GL_TEXTURE_RECTANGLE_ARB,texture);
glUniform1i( glGetUniformLocation( progHandle, txt ), kNum);
//printf("%s.\n",txt);
glUniform1i( glGetUniformLocation( progHandle, "kw" ), kw ); //核宽
glUniform1i( glGetUniformLocation( progHandle, "kNum" ), kNum ); //核1--8个
//glUniform1fv( glGetUniformLocation( progHandle, "kernel"),81,kernel81);//核数据
glUniform1fv( glGetUniformLocation( progHandle, "kernel"),kw*kw*kNum,kernel648);//核数据
//加偏置
if(i+CONV_NUM>=inNum && bias!=NULL)
{
glUniform1f( glGetUniformLocation( progHandle, "bias"),bias[k]);//偏置数据
}
else
{
glUniform1f( glGetUniformLocation( progHandle, "bias"),0.0f);//无偏置
}
}
display();
//readFromTexture( outPutFb );
//相加
//for(int j=0;j<wh;++j)
//{
// out[j]+=outPutFb[j];
//}
//取出数据
if(i+kNum==inNum)
{
p++;
readFromTexture( outPutFb );
memcpy(out,outPutFb,wh*sizeof(float));
//清0
glBindTexture( GL_TEXTURE_RECTANGLE_ARB, texture );
glTexImage2D( GL_TEXTURE_RECTANGLE_ARB, 0, GL_R32F, imgWidth, imgHeight, 0,GL_RED,GL_FLOAT,_0Data );
}
else
{
//结果转到 texture 中
glBindTexture( GL_TEXTURE_RECTANGLE_ARB, texture );
//glTexImage2D( GL_TEXTURE_RECTANGLE_ARB, 0, GL_R32F, imgWidth, imgHeight, 0,GL_RED,GL_FLOAT,outPutFb );
glCopyTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, 0, 0, 0, 0, imgWidth, imgHeight);
}
end_t = clock();
total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
if(total_t>1.0){
printf("%d/%d\n",k,outNum);
start_t = clock();
}
}
out+=wh;
}
//if(active)//激活
// relu(outData,wh*outNum);
//清理
for(int i=0;i<64;++i)
{
if( textureN[i] !=0)
{
//删除纹理
glDeleteTextures(1,textureN+i);
textureN[i]=0;
}
}
delete []_0Data; _0Data=NULL;
}
这样有没有快一点呢?
结束