对 Jacobi 程序的循环优化

Jacobi 程序是矩阵计算中常见的算法,程序中存在大量的循环语句,通过使用循环优化技术,可以降低程序的运行时间,以下进行详细介绍。

原始程序如下所示:

#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <array>
#include <cmath>
#include <stdint.h>
#include <vector>
#include <algorithm>
#include <iterator>
#include <iostream>


#define N 202
#define K 202

struct RectanglePoints {
	double x_0, x_l;
	double y_0, y_m;
};

namespace InitialFunctions {
	inline double leftSideRectangle(double y) { return 15 * sqrt(y); }
	inline double rightSideRectangle(double y) { return 30 * y * (1 - y); }
	inline double upSideRectangle(double x) { return 15 * (1 - x); }
	inline double downSideRectangle(double x) { return 0; }

}

using namespace InitialFunctions;

class Grid {
private:

	RectanglePoints rectangle;

	double length_x;
	double length_y;
	double h_xyStep;
	
public:
	Grid(); //default constructor
	Grid(double l, double m, double h, std::array<double, N>& x, std::array<double, N>& y);

    void setPoints();
	void createGrid(std::array<double, N>& x, std::array<double, K>& y);
	void getGrid(std::array<double, N>& x, std::array<double, K>& y);
};

Grid::Grid()
{
	length_x = 1;
	length_y = 1;
	h_xyStep = 0.2;

	setPoints();
}

Grid::Grid(double l, double m, double h, std::array<double, N>& x, std::array<double, K>& y) {

	length_x = l;
	length_y = m;
	h_xyStep = h;

	setPoints();
	createGrid(x, y);
}

void Grid::setPoints()
{

	rectangle.x_0 = 0;
	rectangle.y_0 = 0;
	rectangle.x_l = rectangle.x_0 + length_x;
	rectangle.y_m = rectangle.y_0 + length_y;
}

void Grid::createGrid(std::array<double, N>& x, std::array<double, K>& y)
{
	x[0] = rectangle.x_0;
	y[0] = rectangle.y_0;
	
	for (uint8_t i = 0; i < N; ++i)
	{
		x[i] = i * h_xyStep;
	}

	for (uint8_t j = 0; j < K; ++j)
	{
		y[j] = j * h_xyStep;
	}
}

void Grid::getGrid(std::array<double, N>& x, std::array<double, K>& y)
{
	for (uint8_t i = 0; i < N; i++)
		printf("%lf ", x[i]);
	printf("\n\n");
	for (uint8_t j = 0; j < K; j++)
		printf("%lf ", y[j]);
	printf("\n\n");

}


class LaplasEquationSolver {
public:
	double w;
	LaplasEquationSolver(double(*u1)[N], std::array<double, N>& x, std::array<double, K>& y, double w);
};

LaplasEquationSolver::LaplasEquationSolver(double(*u1)[N], std::array<double, N>& x, std::array<double, K>& y, double w)
{
	double u2[N][K] = { 0 }, u_new[N][K] = { 0 };
	std::vector<double> errors;
	double max_error;
	double eps = 0.01;

	for (uint8_t i = 0; i < N; ++i)
		for (uint8_t j = 0; j < K; ++j) {
			u1[i][j] = 0;
			u2[i][j] = 0;
			u_new[i][j] = 0;
		}


	for (uint8_t i = 0; i < N; ++i)
	{
		u1[i][0] = downSideRectangle(x[i]);
		u1[i][K - 1] = upSideRectangle(x[i]);
	}

	for (uint8_t j = 0; j < K; ++j)
	{
		u1[0][j] = leftSideRectangle(y[j]);
		u1[N - 1][j] = rightSideRectangle(y[j]);
	}

	for (uint8_t i = 0; i < N; ++i)
	{
		u2[i][0] = downSideRectangle(x[i]);
		u2[i][K - 1] = upSideRectangle(x[i]);
	}

	for (uint8_t j = 0; j < K; ++j)
	{
		u2[0][j] = leftSideRectangle(y[j]);
		u2[N - 1][j] = rightSideRectangle(y[j]);
	}

	int k = 0;
	clock_t start_s=clock();
	do {
		for (uint8_t j = 1; j < K - 1; ++j) {
			for (uint8_t i = 1; i < N - 1; ++i) {
				u1[i][j] = (1.0 / 4.0) * (u1[i - 1][j] + u1[i + 1][j] + u1[i][j - 1] + u1[i][j + 1]);
			}
		}

		for (uint8_t j = 1; j < K - 1; ++j) {
			for (uint8_t i = 1; i < N - 1; ++i) {
				u2[i][j] = (1.0 / 4.0) * (u1[i - 1][j] + u1[i + 1][j] + u1[i][j - 1] + u1[i][j + 1]);
			}
		}

		for (uint8_t j = 0; j < K; ++j) {
			for (uint8_t i = 0; i < N; ++i) {
				u_new[i][j] = u1[i][j] + w * (u2[i][j] - u1[i][j]);
			}
		}

		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K; ++j) {
				errors.push_back(fabs(u_new[i][j] - u1[i][j]));;
			}
		}
		max_error = *std::max_element(errors.begin(), errors.end());
		//if(k%100==0)std::cout<<k<<" "<<max_error<<"\n";
		errors.clear();
		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K; ++j) {
				u1[i][j] = u_new[i][j];
			}
		}
		k++;
	} while (max_error > eps);
	clock_t end_s=clock();

	std::cout << "Input = "<<N<<" "<<K <<std::endl;
	std::cout << "w= " << w << ", iteration= " << k << ", max error = " << max_error << std::endl;
	std::cout << "cost time = "<< end_s - start_s << " clocks"<< std::endl;
}




int main(void) 
{
	std::array<double, N> x = { 0 };
	std::array<double, K> y = { 0 };
	double u[N][K] = { 0 };

	/*calling 2nd constructor*/
	Grid grid(1, 1, 0.2, x, y); 

	/*print grid to console*/
	//grid.getGrid(x, y);

	double w = 1.9;
	LaplasEquationSolver mySolver(u, x, y, w);



	return 0;
}

 1、对原始程序进行编译,并查看运行时间

 2、使用循环交换进行优化

do {
		
		for (uint8_t i = 1; i < N - 1; ++i) {
			for (uint8_t j = 1; j < K - 1; ++j) {
				u1[i][j] = (1.0 / 4.0) * (u1[i - 1][j] + u1[i + 1][j] + u1[i][j - 1] + u1[i][j + 1]);
			}
		}

		for (uint8_t i = 1; i < N - 1; ++i) {
			for (uint8_t j = 1; j < K - 1; ++j) {
				u2[i][j] = (1.0 / 4.0) * (u1[i - 1][j] + u1[i + 1][j] + u1[i][j - 1] + u1[i][j + 1]);
			}
		}

		
		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K; ++j) {
				u_new[i][j] = u1[i][j] + w * (u2[i][j] - u1[i][j]);
			}
		}

		
		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K; ++j) {
				errors.push_back(fabs(u_new[i][j] - u1[i][j]));;
			}
		}
		max_error = *std::max_element(errors.begin(), errors.end());
		//if(k%100==0)std::cout<<k<<" "<<max_error<<"\n";
		errors.clear();
		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K; ++j) {
				u1[i][j] = u_new[i][j];
			}
		}
		k++;
	} while (max_error > eps);
	clock_t end_s=clock();

编译后查看运行时间

3、 使用循环不变量外提进行优化

do {
		double coff = 1.0 / 4.0;
		for (uint8_t j = 1; j < K - 1; ++j) {
			for (uint8_t i = 1; i < N - 1; ++i) {
				u1[i][j] = coff * (u1[i - 1][j] + u1[i + 1][j] + u1[i][j - 1] + u1[i][j + 1]);
			}
		}

		for (uint8_t j = 1; j < K - 1; ++j) {
			for (uint8_t i = 1; i < N - 1; ++i) {
				u2[i][j] = coff * (u1[i - 1][j] + u1[i + 1][j] + u1[i][j - 1] + u1[i][j + 1]);
			}
		}

		for (uint8_t j = 0; j < K; ++j) {
			for (uint8_t i = 0; i < N; ++i) {
				u_new[i][j] = u1[i][j] + w * (u2[i][j] - u1[i][j]);
			}
		}

		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K; ++j) {
				errors.push_back(fabs(u_new[i][j] - u1[i][j]));;
			}
		}
		max_error = *std::max_element(errors.begin(), errors.end());
		//if(k%100==0)std::cout<<k<<" "<<max_error<<"\n";
		errors.clear();
		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K; ++j) {
				u1[i][j] = u_new[i][j];
			}
		}
		k++;
	} while (max_error > eps);
	clock_t end_s=clock();

编译后查看运行时间

 4、使用循环展开进行优化

5、使用循环交换、循环不变量外提、循环展开混合技术进行优化

do {
		double coff = 1.0 / 4.0;
		for (uint8_t i = 1; i < N - 1; ++i) {
			for (uint8_t j = 1; j < K - 1; j += 4) {
				u1[i][j] = coff * (u1[i - 1][j] + u1[i + 1][j] + u1[i][j - 1] + u1[i][j + 1]);
				u1[i][j + 1] = coff * (u1[i - 1][j + 1] + u1[i + 1][j + 1] + u1[i][j] + u1[i][j + 2]);
				u1[i][j + 2] = coff * (u1[i - 1][j + 2] + u1[i + 1][j + 2] + u1[i][j + 1] + u1[i][j + 3]);
				u1[i][j + 3] = coff * (u1[i - 1][j + 3] + u1[i + 1][j + 3] + u1[i][j + 2] + u1[i][j + 4]);
			}
		}

		for (uint8_t i = 1; i < N - 1; ++i) {
			for (uint8_t j = 1; j < K - 1; j += 4) {
				u2[i][j] = coff * (u1[i - 1][j] + u1[i + 1][j] + u1[i][j - 1] + u1[i][j + 1]);
				u2[i][j + 1] = coff * (u1[i - 1][j + 1] + u1[i + 1][j + 1] + u1[i][j] + u1[i][j + 2]);
				u2[i][j + 2] = coff * (u1[i - 1][j + 2] + u1[i + 1][j + 2] + u1[i][j + 1] + u1[i][j + 3]);
				u2[i][j + 3] = coff * (u1[i - 1][j + 3] + u1[i + 1][j + 3] + u1[i][j + 2] + u1[i][j + 4]);
			}
		}

		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K - 2; j += 4) {
				u_new[i][j] = u1[i][j] + w * (u2[i][j] - u1[i][j]);
				u_new[i][j + 1] = u1[i][j + 1] + w * (u2[i][j + 1] - u1[i][j + 1]);
				u_new[i][j + 2] = u1[i][j + 2] + w * (u2[i][j + 2] - u1[i][j + 2]);
				u_new[i][j + 3] = u1[i][j + 3] + w * (u2[i][j + 3] - u1[i][j + 3]);
			}
			for (uint8_t j = 200; j < 202; j++)
				u_new[i][j] = u1[i][j] + w * (u2[i][j] - u1[i][j]);
		}

		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K - 2; j += 4) {
				errors.push_back(fabs(u_new[i][j] - u1[i][j]));
				errors.push_back(fabs(u_new[i][j + 1] - u1[i][j + 1]));
				errors.push_back(fabs(u_new[i][j + 2] - u1[i][j + 2]));
				errors.push_back(fabs(u_new[i][j + 3] - u1[i][j + 3]));
			}
			for (uint8_t j = 200; j < 202; j++)
				errors.push_back(fabs(u_new[i][j] - u1[i][j]));
		}
		max_error = *std::max_element(errors.begin(), errors.end());
		//if(k%100==0)std::cout<<k<<" "<<max_error<<"\n";
		errors.clear();
		for (uint8_t i = 0; i < N; ++i) {
			for (uint8_t j = 0; j < K - 2; j += 4) {
				u1[i][j] = u_new[i][j];
				u1[i][j + 1] = u_new[i][j + 1];
				u1[i][j + 2] = u_new[i][j + 2];
				u1[i][j + 3] = u_new[i][j + 3];
			}
			for (uint8_t j = 200; j < 202; j++)
				u1[i][j] = u_new[i][j];
		}
		k++;
	} while (max_error > eps);
	clock_t end_s=clock();

 编译后查看运行时间

总结:

在迭代次数和误差不变的情况下,通过使用循环交换、循环不变量外提、循环展开三种技术对程序进行了优化,每一种都可以提高效率,降低运行时间,混合后,更能提升程序的效率,值得推广。

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值