perflab 课程设计

初始状态

rotate

版本I

因为本题步步都在寻址而寻址的目标每一步又不同,并且在一个地址的值一次就赋值完毕,不会对同一个地址进行二次寻址,所以我首先想的改进方向就是使得寻址更加快速,于是我使得寻址的地址更加连续。

int i, j, t,n;
    n = dim - 1;
    for (j = 0; j < dim; j++){
        t = (n-j)*dim;
        for (i = 0; i < dim; i++)
            dst[t+i] = src[RIDX(i, j, dim)];
    }

 

 版本II

为了降低CPE,可以降低cache miss,所以可以用块化即通过不断使用一些数据块,而不是完整地遍历一行和一列,来改进空间局部性。

int i,j,ki,kj;
   for (i = 0; i < dim; i+=8) 
    for (j = 0; j < dim; j+=8)
        for(ki=i; ki<i+8; ki++) 
            for(kj=j; kj<j+8; kj++) 
                    dst[RIDX(dim-1-kj, ki, dim)] = src[RIDX(ki, kj, dim)];

Rotate的Summary由5.0提高至7.9,Dim规模较小时CPE优化不明显,当Dim规模较大时CPE明显下降

版本III

将前两种方法结合

int i, j, a, b, t;
    int sdim = dim - 1;
    for (i = 0; i < dim; i += 8)
    {
        for (j = 0; j < dim; j += 8)
        {
            for (b = j; b < j + 8; b++)
            {
                t = (sdim - b)*dim; 
                for (a = i; a < i + 8; a++)
                {
                    dst[t+a] = src[RIDX(a, b, dim)];
                }
            }
        }
    }

 版本IV

考虑到程序过多次调用RIDX函数,故消除该函数的调用。此外,改善读写顺序。具体来说,先处理矩阵第一列的前32个元素,再处理第二列前32个元素,以此类推直到处理完毕矩阵的前32行,再以相同的方法继续处理余下的矩阵元素。

int i,j,k;
for (i = 0; i < dim; i+=32)
    for (j = 0; j < dim; j++)
        for(k=0; k<32; k++) {
           dst[(dim-1-j)*dim+i+k] = src[(i+k)*dim+j];
        }

smooth

版本I

虽然不同位置的像素点需要取相邻的不同数目的像素点的平均值,但数目只有4、6、9。对于四个顶点,取相邻四个像素点的平均值;对于和顶点接壤的像素点,取相邻六个像素点的平均值;剩下的取相邻九个像素点的平均值。

	int i=1,j=0;
	//左上角
	dst[0].red=(src[0].red+src[1].red+src[dim].red+src[dim+1].red)/4;
	dst[0].green=(src[0].green+src[1].green+src[dim].green+src[dim+1].green)/4;
	dst[0].blue=(src[0].blue+src[1].blue+src[dim].blue+src[dim+1].blue)/4;
	//第一行其他非右上角顶点
	for(j=1; j<dim-1; j++) {
		dst[j].red=(src[j-1].red+src[j].red+src[j+1].red+src[dim+j-1].red+src[dim+j].red+src[dim+j+1].red)/6;
		dst[j].green=(src[j-1].green+src[j].green+src[j+1].green+src[dim+j-1].green+src[dim+j].green+src[dim+j+1].green)/6;
		dst[j].blue=(src[j-1].blue+src[j].blue+src[j+1].blue+src[dim+j-1].blue+src[dim+j].blue+src[dim+j+1].blue)/6;
	}
	//右上角顶点
	dst[j].red=(src[j].red+src[j-1].red+src[dim+j].red+src[dim+j-1].red)/4;
	dst[j].green=(src[j].green+src[j-1].green+src[dim+j].green+src[dim+j-1].green)/4;
	dst[j].blue=(src[j].blue+src[j-1].blue+src[dim+j].blue+src[dim+j-1].blue)/4;
	//1至dim-2行
	for(; i<dim-1; i++) {
		//每行第一个像素点
		dst[i*dim].red=(src[(i-1)*dim].red+src[(i-1)*dim+1].red+src[i*dim].red+src[i*dim+1].red+src[(i+1)*dim].red+src[(i+1)*dim+1].red)/6;
		dst[i*dim].green=(src[(i-1)*dim].green+src[(i-1)*dim+1].green+src[i*dim].green+src[i*dim+1].green+src[(i+1)*dim].green+src[(i+1)*dim+1].green)/6;
		dst[i*dim].blue=(src[(i-1)*dim].blue+src[(i-1)*dim+1].blue+src[i*dim].blue+src[i*dim+1].blue+src[(i+1)*dim].blue+src[(i+1)*dim+1].blue)/6;
		//每行第二个至第dim-1个像素点
		for(j=1; j<dim-1; j++) {
			dst[i*dim+j].red=(src[(i-1)*dim+j-1].red+src[(i-1)*dim+j].red+src[(i-1)*dim+j+1].red+src[i*dim+j-1].red+src[i*dim+j].red+src[i*dim+j+1].red+src[(i+1)*dim+j-1].red+src[(i+1)*dim+j].red+src[(i+1)*dim+j+1].red)/9;
			dst[i*dim+j].green=(src[(i-1)*dim+j-1].green+src[(i-1)*dim+j].green+src[(i-1)*dim+j+1].green+src[i*dim+j-1].green+src[i*dim+j].green+src[i*dim+j+1].green+src[(i+1)*dim+j-1].green+src[(i+1)*dim+j].green+src[(i+1)*dim+j+1].green)/9;
			dst[i*dim+j].blue=(src[(i-1)*dim+j-1].blue+src[(i-1)*dim+j].blue+src[(i-1)*dim+j+1].blue+src[i*dim+j-1].blue+src[i*dim+j].blue+src[i*dim+j+1].blue+src[(i+1)*dim+j-1].blue+src[(i+1)*dim+j].blue+src[(i+1)*dim+j+1].blue)/9;
		}
		//每行最后一个像素点
		dst[i*dim+j].red=(src[(i-1)*dim+j-1].red+src[(i-1)*dim+j].red+src[i*dim+j-1].red+src[i*dim+j].red+src[(i+1)*dim+j-1].red+src[(i+1)*dim+j].red)/6;
		dst[i*dim+j].green=(src[(i-1)*dim+j-1].green+src[(i-1)*dim+j].green+src[i*dim+j-1].green+src[i*dim+j].green+src[(i+1)*dim+j-1].green+src[(i+1)*dim+j].green)/6;
		dst[i*dim+j].blue=(src[(i-1)*dim+j-1].blue+src[(i-1)*dim+j].blue+src[i*dim+j-1].blue+src[i*dim+j].blue+src[(i+1)*dim+j-1].blue+src[(i+1)*dim+j].blue)/6;
	}
	//左下角
	dst[i*dim].red=(src[(i-1)*dim].red+src[(i-1)*dim+1].red+src[i*dim].red+src[i*dim+1].red)/4;
	dst[i*dim].green=(src[(i-1)*dim].green+src[(i-1)*dim+1].green+src[i*dim].green+src[i*dim+1].green)/4;
	dst[i*dim].blue=(src[(i-1)*dim].blue+src[(i-1)*dim+1].blue+src[i*dim].blue+src[i*dim+1].blue)/4;
	//最后一行非左下角、非右下角的像素点
	for(j=1; j<dim-1; j++) {
		dst[i*dim+j].red=(src[(i-1)*dim+j-1].red+src[(i-1)*dim+j].red+src[(i-1)*dim+j+1].red+src[i*dim+j-1].red+src[i*dim+j].red+src[i*dim+j+1].red)/6;
		dst[i*dim+j].green=(src[(i-1)*dim+j-1].green+src[(i-1)*dim+j].green+src[(i-1)*dim+j+1].green+src[i*dim+j-1].green+src[i*dim+j].green+src[i*dim+j+1].green)/6;
		dst[i*dim+j].blue=(src[(i-1)*dim+j-1].blue+src[(i-1)*dim+j].blue+src[(i-1)*dim+j+1].blue+src[i*dim+j-1].blue+src[i*dim+j].blue+src[i*dim+j+1].blue)/6;
	}
	//右下角像素点
	dst[i*dim+j].red=(src[(i-1)*dim+j-1].red+src[(i-1)*dim+j].red+src[i*dim+j-1].red+src[i*dim+j].red)/4;
	dst[i*dim+j].green=(src[(i-1)*dim+j-1].green+src[(i-1)*dim+j].green+src[i*dim+j-1].green+src[i*dim+j].green)/4;
	dst[i*dim+j].blue=(src[(i-1)*dim+j-1].blue+src[(i-1)*dim+j].blue+src[i*dim+j-1].blue+src[i*dim+j].blue)/4;	

版本II

由于上个版本程序运行过程中比较多的重复计算区域,基于动态规划的思想,将每一个像素点的计算转换为一个块(2x2或2x3或3x2或3x3)内的各个像素点取平均值,并将每一块纵向分开为2或3个纵向块,用动规数组记录每一列(2个或3个像素点)的RGB之和,其中相邻的两个纵向块之间的递推关系通式为:dp[i][j]=dp[i-1][j]-src[(i-1)*dim+j]+src[(i+2)*dim+j]

	int i,j;
	int r2[2][dim],g2[2][dim],b2[2][dim];
	int r3[dim][dim],g3[dim][dim],b3[dim][dim];
	for(j=0; j<dim; j++) {
		//第j列一开始的大小为2的子块
		r2[0][j]=src[j].red;
		g2[0][j]=src[j].green;
		b2[0][j]=src[j].blue;
		r2[0][j]+=src[dim+j].red;
		g2[0][j]+=src[dim+j].green;
		b2[0][j]+=src[dim+j].blue;
		//第j列一开始的大小为3的子块
		r3[0][j]=r2[0][j]+src[(dim<<1)+j].red;
		g3[0][j]=g2[0][j]+src[(dim<<1)+j].green;
		b3[0][j]=b2[0][j]+src[(dim<<1)+j].blue;
		//其他子块
		for(i=1; i<dim-2; i++) {
			r3[i][j]=r3[i-1][j]-src[(i-1)*dim+j].red+src[(i+2)*dim+j].red;
			g3[i][j]=g3[i-1][j]-src[(i-1)*dim+j].green+src[(i+2)*dim+j].green;
			b3[i][j]=b3[i-1][j]-src[(i-1)*dim+j].blue+src[(i+2)*dim+j].blue;
		}
		//最后一个长度为2的子块
		r2[1][j]=r3[dim-3][j]-src[(dim-3)*dim+j].red;
		g2[1][j]=g3[dim-3][j]-src[(dim-3)*dim+j].green;
		b2[1][j]=b3[dim-3][j]-src[(dim-3)*dim+j].blue;
	}
	//处理左上角顶点
	dst[0].red=(r2[0][0]+r2[0][1])/4;
	dst[0].green=(g2[0][0]+g2[0][1])/4;
	dst[0].blue=(b2[0][0]+b2[0][1])/4;
	//处理第一行其他非右上角顶点
	for(j=1; j<dim-1; j++) {
		dst[j].red=(r2[0][j-1]+r2[0][j]+r2[0][j+1])/6;
		dst[j].green=(g2[0][j-1]+g2[0][j]+g2[0][j+1])/6;
		dst[j].blue=(b2[0][j-1]+b2[0][j]+b2[0][j+1])/6;
	}
	//右上角顶点
	dst[j].red=(r2[0][j-1]+r2[0][j])/4;
	dst[j].green=(g2[0][j-1]+g2[0][j])/4;
	dst[j].blue=(b2[0][j-1]+b2[0][j])/4;
	//1至dim-2行
	for(i=1; i<dim-1; i++) {
		//每行的第一个像素点
		dst[i*dim].red=(r3[i-1][0]+r3[i-1][1])/6;
		dst[i*dim].green=(g3[i-1][0]+g3[i-1][1])/6;
		dst[i*dim].blue=(b3[i-1][0]+b3[i-1][1])/6;
		//每行第二个至第dim-1个像素点
		for(j=1; j<dim-1; j++) {
			dst[i*dim+j].red=(r3[i-1][j-1]+r3[i-1][j]+r3[i-1][j+1])/9;
			dst[i*dim+j].green=(g3[i-1][j-1]+g3[i-1][j]+g3[i-1][j+1])/9;
			dst[i*dim+j].blue=(b3[i-1][j-1]+b3[i-1][j]+b3[i-1][j+1])/9;
		}
		//每行最后一个像素点
		dst[i*dim+j].red=(r3[i-1][j-1]+r3[i-1][j])/6;
		dst[i*dim+j].green=(g3[i-1][j-1]+g3[i-1][j])/6;
		dst[i*dim+j].blue=(b3[i-1][j-1]+b3[i-1][j])/6;
	}
	//左下角像素点
	dst[i*dim].red=(r2[1][0]+r2[1][1])/4;
	dst[i*dim].green=(g2[1][0]+g2[1][1])/4;
	dst[i*dim].blue=(b2[1][0]+b2[1][1])/4;
	//最后一行非左下角、非右下角的像素点
	for(j=1; j<dim-1; j++) {
		dst[i*dim+j].red=(r2[1][j-1]+r2[1][j]+r2[1][j+1])/6;
		dst[i*dim+j].green=(g2[1][j-1]+g2[1][j]+g2[1][j+1])/6;
		dst[i*dim+j].blue=(b2[1][j-1]+b2[1][j]+b2[1][j+1])/6;	
	}
	//右下角像素点
	dst[i*dim+j].red=(r2[1][j-1]+r2[1][j])/4;
	dst[i*dim+j].green=(g2[1][j-1]+g2[1][j])/4;
	dst[i*dim+j].blue=(b2[1][j-1]+b2[1][j])/4;	

版本III

版本I将处理方式不同的位置分别讨论,但大量的重复计算限制了性能,而版本II的纵向三个像素点的R、G、B之和与二维数组的选择不够合理。又因为对任何像素点,待求平均的像素点所构成的块大小都不会超过三行,每行都不会超过三个。所以可以通过三个指针,每个指针控制行相邻的两个或三个像素点的读运算。

void smooth3(int dim, pixel *src, pixel *dst) {
	//使用指针,尽量少移动
    int i,j;
	//每一个指针对应一行
    pixel *pixelA,*pixelB,*pixelC;
    int size = dim-1;
    //处理第一行第一个像素点
    pixelB = src;
    pixelC = pixelB + dim;
    dst->red = (pixelB->red + (pixelB+1)->red + pixelC->red + (pixelC+1)->red)>>2;
    dst->green = (pixelB->green + (pixelB+1)->green + pixelC->green + (pixelC+1)->green)>>2;
    dst->blue = (pixelB->blue + (pixelB+1)->blue + pixelC->blue + (pixelC+1)->blue)>>2;
    pixelB++;
    pixelC++;
    dst++;
    //处理第一行中间的dim-2个像素点
    for(i = 1; i < size; i++)
    {
        dst->red = (pixelB->red + (pixelB-1)->red + (pixelB+1)->red + pixelC->red + (pixelC-1)->red + (pixelC+1)->red)/6;
        dst->green = (pixelB->green + (pixelB-1)->green + (pixelB+1)->green + pixelC->green + (pixelC-1)->green + (pixelC+1)->green)/6;
        dst->blue = (pixelB->blue + (pixelB-1)->blue + (pixelB+1)->blue + pixelC->blue + (pixelC-1)->blue + (pixelC+1)->blue)/6;
        pixelB++;
        pixelC++;
        dst++;
    }
    //处理第一行最后一个像素点
    dst->red = (pixelC->red + (pixelC-1)->red + pixelB->red + (pixelB-1)->red)>>2;
    dst->green = (pixelC->green + (pixelC-1)->green + pixelB->green + (pixelB-1)->green)>>2;
    dst->blue = (pixelC->blue + (pixelC-1)->blue + pixelB->blue + (pixelB-1)->blue)>>2;
    dst++;
	//开始处理中间的dim-2行
    pixelA = src;
    pixelB = pixelA + dim;
    pixelC = pixelB + dim;
    for(i = 1; i < size; i++)
    {
        //对于每一行的第一个像素点
        dst->red = (pixelA->red + (pixelA+1)->red + pixelB->red + (pixelB+1)->red + pixelC->red + (pixelC+1)->red)/6;
        dst->green = (pixelA->green + (pixelA+1)->green + pixelB->green + (pixelB+1)->green + pixelC->green + (pixelC+1)->green)/6;
        dst->blue = (pixelA->blue + (pixelA+1)->blue + pixelB->blue + (pixelB+1)->blue + pixelC->blue+ (pixelC+1)->blue)/6;
        dst++;
        pixelA++;
        pixelB++;
        pixelC++;
        //对于每一行中间的dim-2个像素点
        for(j = 1; j < dim-1; j++)
        {
            dst->red = (pixelA->red + (pixelA-1)->red + (pixelA+1)->red + pixelB->red + (pixelB-1)->red + (pixelB+1)->red + pixelC->red + (pixelC-1)->red + (pixelC+1)->red)/9;
            dst->green = (pixelA->green + (pixelA-1)->green + (pixelA+1)->green + pixelB->green + (pixelB-1)->green + (pixelB+1)->green + pixelC->green + (pixelC-1)->green + (pixelC+1)->green)/9;
            dst->blue = (pixelA->blue + (pixelA-1)->blue + (pixelA+1)->blue + pixelB->blue + (pixelB-1)->blue + (pixelB+1)->blue + pixelC->blue + (pixelC-1)->blue + (pixelC+1)->blue)/9;
            pixelA++;
            pixelB++;
            pixelC++;
            dst++;
        }
        //对于每一行最后一个像素点
        dst->red = (pixelA->red + (pixelA-1)->red + pixelB->red + (pixelB-1)->red + pixelC->red + (pixelC-1)->red)/6;
        dst->green = (pixelA->green + (pixelA-1)->green + pixelB->green + (pixelB-1)->green + pixelC->green + (pixelC-1)->green)/6;
        dst->blue = (pixelA->blue + (pixelA-1)->blue + pixelB->blue + (pixelB-1)->blue + pixelC->blue+ (pixelC-1)->blue)/6;
        pixelA++;
        pixelB++;
        pixelC++;
        dst++;
    }
    //处理最后一行第一个像素点
    dst->red = (pixelA->red + (pixelA+1)->red + pixelB->red + (pixelB+1)->red)>>2;
    dst->green = (pixelA->green + (pixelA+1)->green + pixelB->green + (pixelB+1)->green)>>2;
    dst->blue = (pixelA->blue + (pixelA+1)->blue + pixelB->blue + (pixelB+1)->blue)>>2;
    dst++;
    pixelA++;
    pixelB++;
    //处理最后一行中间dim-2个像素点
    for(i = 1; i < size; i++)
    {
        dst->red = (pixelA->red + (pixelA-1)->red + (pixelA+1)->red + pixelB->red + (pixelB-1)->red + (pixelB+1)->red)/6;
        dst->green = (pixelA->green + (pixelA-1)->green + (pixelA+1)->green + pixelB->green + (pixelB-1)->green + (pixelB+1)->green)/6;
        dst->blue = (pixelA->blue + (pixelA-1)->blue + (pixelA+1)->blue + pixelB->blue + (pixelB-1)->blue + (pixelB+1)->blue)/6;
        pixelA++;
        pixelB++;
        dst++;
    }
    //处理最后一行最后一个像素点
    dst->red = (pixelA->red + (pixelA-1)->red + pixelB->red + (pixelB-1)->red)>>2;
    dst->green = (pixelA->green + (pixelA-1)->green + pixelB->green + (pixelB-1)->green)>>2;
    dst->blue = (pixelA->blue + (pixelA-1)->blue + pixelB->blue + (pixelB-1)->blue)>>2;
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值