5、梯度下降法，牛顿法，高斯-牛顿迭代法

Adagrad

已于 2022-03-30 14:44:23 修改

阅读量1.6k

点赞数 1

分类专栏：常用算法文章标签：线性代数 c++

于 2022-03-30 14:15:04 首次发布

本文链接：https://blog.csdn.net/u013049912/article/details/123844923

版权

常用算法专栏收录该内容

10 篇文章 0 订阅

订阅专栏

1、梯度下降

2、牛顿法

3、高斯-牛顿迭代法

4、代码部分

1.梯度下降法代码

批量梯度下降法c++代码：

/*
需要参数为theta：theta0，theta1
目标函数:y=theta0*x0+theta1*x1;
*/
#include <iostream>
using namespace std;
 
int main()
{
	float matrix[4][2] = { { 1, 1 }, { 2, 1 }, { 2, 2 }, { 3, 4 } };
	float result[4] = {5,6.99,12.02,18};
	float theta[2] = {0,0};
	float loss = 10.0;
	for (int i = 0; i < 10000 && loss>0.0000001; ++i)
	{
		float ErrorSum = 0.0;
		float cost[2] = { 0.0, 0.0 };
		for (int j = 0; j <4; ++j)
		{
			float h = 0.0;
			for (int k = 0; k < 2; ++k)
			{
				h += matrix[j][k] * theta[k];
			}
			
			ErrorSum = result[j] - h;
 
			for (int k = 0; k < 2; ++k)
			{
				cost[k] = ErrorSum*matrix[j][k];
			}
		}
		for (int k = 0; k < 2; ++k)
		{
			theta[k] = theta[k] + 0.01*cost[k] / 4;
		}
 
		cout << "theta[0]=" << theta[0] << "\n" << "theta[1]=" << theta[1] << endl;
		loss = 0.0;
		for (int j = 0; j < 4; ++j)
		{
			float h2 = 0.0;
			for (int k = 0; k < 2; ++k)
			{
				h2 += matrix[j][k] * theta[k];
			}
			loss += (h2 - result[j])*(h2 - result[j]);
		}
		cout << "loss=" << loss << endl;
	}
	return 0;
}

2、随机梯度下降法C++代码：

/*
需要参数为theta：theta0，theta1
目标函数:y=theta0*x0+theta1*x1;
*/
#include <iostream>
using namespace std;
 
int main()
{
	float matrix[4][2] = { { 1, 1 }, { 2, 1 }, { 2, 2 }, { 3, 4 } };
	float result[4] = {5,6.99,12.02,18};
	float theta[2] = {0,0};
	float loss = 10.0;
	for (int i = 0; i<1000 && loss>0.00001; ++i)
	{
		float ErrorSum = 0.0;
		int j = i % 4;
 
		float h = 0.0;
		for (int k = 0; k<2; ++k)
		{
			h += matrix[j][k] * theta[k];
		}
	
		ErrorSum = result[j] - h;
	
		for (int k = 0; k<2; ++k)
		{
			theta[k] = theta[k] + 0.001*(ErrorSum)*matrix[j][k];
		}
		cout << "theta[0]=" << theta[0] << "\n" << "theta[1]=" << theta[1] << endl;
 
		loss = 0.0;
		for (int j = 0; j<4; ++j)
		{
			float sum = 0.0;
			for (int k = 0; k<2; ++k)
			{
 
				sum += matrix[j][k] * theta[k];
			}
			loss += (sum - result[j])*(sum - result[j]);
		}
		cout << "loss=" << loss << endl;
	}
	return 0;
}

3、牛顿法代码

实例，求解二元方程:

x*x-2*x-y+0.5=0; (1)

x*x+4*y*y-4=0; (2)

#include<iostream>
#include<cmath>
#define N 2                     // 非线性方程组中方程个数
#define Epsilon 0.0001          // 差向量1范数的上限
#define Max 1000            //最大迭代次数
 
using namespace std;
 
const int N2 = 2 * N;
void func_y(float xx[N], float yy[N]);					//计算向量函数的因变量向量yy[N]
void func_y_jacobian(float xx[N], float yy[N][N]);		// 计算雅克比矩阵yy[N][N]
void inv_jacobian(float yy[N][N], float inv[N][N]); //计算雅克比矩阵的逆矩阵inv
void NewtonFunc(float x0[N], float inv[N][N], float y0[N], float x1[N]);   //由近似解向量 x0 计算近似解向量 x1
 
int main()
{
	float x0[N] = { 2.0, 0.5 }, y0[N], jacobian[N][N], invjacobian[N][N], x1[N], errornorm;//再次X0初始值满足方程1，不满足也可
	int i, j, iter = 0;
	cout << "初始近似解向量：" << endl;
	for (i = 0; i < N; i++)
	{
		cout << x0[i] << " ";
	}
	cout << endl;
	cout << endl;
 
	while (iter<Max)
	{
		iter = iter + 1;
		cout << "第 " << iter << " 次迭代" << endl;   
		func_y(x0, y0);                             //计算向量函数的因变量向量
		func_y_jacobian(x0, jacobian);              //计算雅克比矩阵
		inv_jacobian(jacobian, invjacobian);        //计算雅克比矩阵的逆矩阵
		NewtonFunc(x0, invjacobian, y0, x1);        //由近似解向量 x0 计算近似解向量 x1
		errornorm = 0;
		for (i = 0; i<N; i++)
			errornorm = errornorm + fabs(x1[i] - x0[i]);
		if (errornorm<Epsilon)
			break;
		for (i = 0; i<N; i++)
			x0[i] = x1[i];
	}
	return 0;
}
 
void func_y(float xx[N], float yy[N])//求函数的因变量向量
{
	float x, y;
	int i;
	x = xx[0];
	y = xx[1];
	yy[0] = x*x-2*x-y+0.5;
	yy[1] = x*x+4*y*y-4;
	cout << "函数的因变量向量：" << endl;
	for (i = 0; i<N; i++)
		cout << yy[i] << "  ";
	cout << endl;
}
 
void func_y_jacobian(float xx[N], float yy[N][N])	//计算函数雅克比的值
{
	float x, y;
	int i, j;
	x = xx[0];
	y = xx[1];
	//yy[][]分别对x,y求导，组成雅克比矩阵
	yy[0][0] = 2*x-2;
	yy[0][1] = -1;
	yy[1][0] = 2*x;
	yy[1][1] = 8*y;
 
	cout << "雅克比矩阵:" << endl;
	for (i = 0; i<N; i++)
	{
		for (j = 0; j < N; j++)
		{
			cout << yy[i][j] << " ";
		}
		cout << endl;
	}
	cout << endl;
}
 
void inv_jacobian(float yy[N][N], float inv[N][N])//雅克比逆矩阵
{
	float aug[N][N2], L;
	int i, j, k;
 
	cout << "计算雅克比矩阵的逆矩阵：" << endl;
	for (i = 0; i<N; i++)
	{
		for (j = 0; j < N; j++)
		{
			aug[i][j] = yy[i][j];
		}
		for (j = N; j < N2; j++)
		{
			if (j == i + N) 
				aug[i][j] = 1;
			else  
				aug[i][j] = 0;
		}
	}
 
	for (i = 0; i<N; i++)
	{
		for (j = 0; j < N2; j++)
		{
			cout << aug[i][j] << " ";
		}
		cout << endl;
	}
	cout << endl;
 
	for (i = 0; i<N; i++)
	{
		for (k = i + 1; k<N; k++)
		{
			L = -aug[k][i] / aug[i][i];
			for (j = i; j < N2; j++)
			{
				aug[k][j] = aug[k][j] + L*aug[i][j];
			}
		}
	}
 
	for (i = 0; i<N; i++)
	{
		for (j = 0; j<N2; j++)
		{
			cout << aug[i][j] << " ";
		}
		cout << endl;
	}
	cout << endl;
 
	for (i = N - 1; i>0; i--)
	{
		for (k = i - 1; k >= 0; k--)
		{
			L = -aug[k][i] / aug[i][i];
			for (j = N2 - 1; j >= 0; j--)
			{
				aug[k][j] = aug[k][j] + L*aug[i][j];
			}
		}
	}
 
	for (i = 0; i<N; i++)
	{
		for (j = 0; j < N2; j++)
		{
			cout << aug[i][j] << " ";
		}
		cout << endl;
	}
	cout << endl;
 
	for (i = N - 1; i >= 0; i--)
	{
		for (j = N2 - 1; j >= 0; j--)
		{
			aug[i][j] = aug[i][j] / aug[i][i];
		}
	}
 
 
	for (i = 0; i<N; i++)
	{
		for (j = 0; j < N2; j++)
		{
			cout << aug[i][j] << " ";
		}
		cout << endl;
		for (j = N; j < N2; j++)
		{
			inv[i][j - N] = aug[i][j];
		}
	}
	cout << endl;
	cout << "雅克比矩阵的逆矩阵： " << endl;
	for (i = 0; i<N; i++)
	{
		for (j = 0; j < N; j++)
		{
			cout << inv[i][j] << " ";
		}
		cout << endl;
	}
	cout << endl;
}
 
void NewtonFunc(float x0[N], float inv[N][N], float y0[N], float x1[N])
{
	int i, j;
	float sum = 0;
 
	for (i = 0; i<N; i++)
	{
		sum = 0;
		for (j = 0; j < N; j++)
		{
			sum = sum + inv[i][j] * y0[j];
		}
		x1[i] = x0[i] - sum;
	}
	cout << "近似解向量：" << endl;
	for (i = 0; i < N; i++)
	{
		cout << x1[i] << "  ";
	}
	cout << endl;
}

4、高斯-牛顿迭代法代码

#include <cstdio>  
#include <vector>  
#include <opencv2/core/core.hpp>  
 
using namespace std;
using namespace cv;
 
const double DERIV_STEP = 1e-5;
const int MAX_ITER = 100;
 
void GaussNewton(double(*Func)(const Mat &input, const Mat params), 
	const Mat &inputs, const Mat &outputs, Mat params);
 
double Deriv(double(*Func)(const Mat &input, const Mat params),
	const Mat &input, const Mat params, int n);
 
// The user defines their function here  
double Func(const Mat &input, const Mat params);
 
int main()
{
	// For this demo we're going to try and fit to the function  
	// F = A*exp(t*B)， There are 2 parameters: A B  
	int num_params = 2;
 
	// Generate random data using these parameters  
	int total_data = 8;
 
	Mat inputs(total_data, 1, CV_64F);
	Mat outputs(total_data, 1, CV_64F);
 
	//load observation data  
	for (int i = 0; i < total_data; i++) 
	{
		inputs.at<double>(i, 0) = i + 1;  //load year  
	}
	//load America population  
	outputs.at<double>(0, 0) = 8.3;
	outputs.at<double>(1, 0) = 11.0;
	outputs.at<double>(2, 0) = 14.7;
	outputs.at<double>(3, 0) = 19.7;
	outputs.at<double>(4, 0) = 26.7;
	outputs.at<double>(5, 0) = 35.2;
	outputs.at<double>(6, 0) = 44.4;
	outputs.at<double>(7, 0) = 55.9;
 
	// Guess the parameters, it should be close to the true value, else it can fail for very sensitive functions!  
	Mat params(num_params, 1, CV_64F);
 
	//init guess  
	params.at<double>(0, 0) = 6;
	params.at<double>(1, 0) = 0.3;
	GaussNewton(Func, inputs, outputs, params);
	printf("Parameters from GaussNewton: %f %f\n", params.at<double>(0, 0), params.at<double>(1, 0));
 
	return 0;
}
 
double Func(const Mat &input, const Mat params)
{
	// Assumes input is a single row matrix  
	// Assumes params is a column matrix  
	double A = params.at<double>(0, 0);
	double B = params.at<double>(1, 0);
	double x = input.at<double>(0, 0);
	return A*exp(x*B);
}
 
//calc the n-th params' partial derivation ， the params are our  final target  
double Deriv(double(*Func)(const Mat &input, const Mat params), const Mat &input, const Mat params, int n)
{
	// Assumes input is a single row matrix  
	// Returns the derivative of the nth parameter  
	Mat params1 = params.clone();
	Mat params2 = params.clone();
	// Use central difference  to get derivative  
	params1.at<double>(n, 0) -= DERIV_STEP;
	params2.at<double>(n, 0) += DERIV_STEP;
	double p1 = Func(input, params1);
	double p2 = Func(input, params2);
	double d = (p2 - p1) / (2 * DERIV_STEP);
	return d;
}
 
void GaussNewton(double(*Func)(const Mat &input, const Mat params),
	const Mat &inputs, const Mat &outputs, Mat params)
{
	int m = inputs.rows;
	int n = inputs.cols;
	int num_params = params.rows;
 
	Mat r(m, 1, CV_64F); // residual matrix  
	Mat Jf(m, num_params, CV_64F); // Jacobian of Func()  
	Mat input(1, n, CV_64F); // single row input  
 
	double last_mse = 0;
 
	for (int i = 0; i < MAX_ITER; i++) 
	{
		double mse = 0;
		for (int j = 0; j < m; j++) 
		{
			for (int k = 0; k < n; k++) 
			{//copy Independent variable vector, the year  
				input.at<double>(0, k) = inputs.at<double>(j, k);
			}
			r.at<double>(j, 0) = outputs.at<double>(j, 0) - Func(input, params);//diff between estimate and observation population  
			mse += r.at<double>(j, 0)*r.at<double>(j, 0);
			for (int k = 0; k < num_params; k++) 
			{
				Jf.at<double>(j, k) = Deriv(Func, input, params, k);
			}
		}
 
		mse /= m;
		// The difference in mse is very small, so quit  
		if (fabs(mse - last_mse) < 1e-8) 
		{
			break;
		}
 
		Mat delta = ((Jf.t()*Jf)).inv() * Jf.t()*r;
		params += delta;
		//printf("%d: mse=%f\n", i, mse);  
		printf("%d %f\n", i, mse);
		last_mse = mse;
	}
}

#include <cstdio>  
#include <vector>  
#include <opencv2/core/core.hpp>  
 
using namespace std;
using namespace cv;
 
const double DERIV_STEP = 1e-5;
const int MAX_ITER = 100;
 
void GaussNewton(double(*Func)(const Mat &input, const Mat params), 
	const Mat &inputs, const Mat &outputs, Mat params);
 
double Deriv(double(*Func)(const Mat &input, const Mat params),  
	const Mat &input, const Mat params, int n);
 
double Func(const Mat &input, const Mat params);
 
int main()
{
	// For this demo we're going to try and fit to the function  
	// F = A*sin(Bx) + C*cos(Dx)，There are 4 parameters: A, B, C, D  
	int num_params = 4;
	// Generate random data using these parameters  
	int total_data = 100;
 
	double A = 5;
	double B = 1;
	double C = 10;
	double D = 2;
 
	Mat inputs(total_data, 1, CV_64F);
	Mat outputs(total_data, 1, CV_64F);
 
	for (int i = 0; i < total_data; i++) {
		double x = -10.0 + 20.0* rand() / (1.0 + RAND_MAX); // random between [-10 and 10]  
		double y = A*sin(B*x) + C*cos(D*x);
		// Add some noise  
		// y += -1.0 + 2.0*rand() / (1.0 + RAND_MAX);  
		inputs.at<double>(i, 0) = x;
		outputs.at<double>(i, 0) = y;
	}
	// Guess the parameters, it should be close to the true value, else it can fail for very sensitive functions!  
	Mat params(num_params, 1, CV_64F);
	params.at<double>(0, 0) = 1;
	params.at<double>(1, 0) = 1;
	params.at<double>(2, 0) = 8; // changing to 1 will cause it not to find the solution, too far away  
	params.at<double>(3, 0) = 1;
	GaussNewton(Func, inputs, outputs, params);
	printf("True parameters: %f %f %f %f\n", A, B, C, D);
	printf("Parameters from GaussNewton: %f %f %f %f\n", params.at<double>(0, 0), params.at<double>(1, 0),
		params.at<double>(2, 0), params.at<double>(3, 0));
 
	return 0;
}
 
double Func(const Mat &input, const Mat params)
{
	// Assumes input is a single row matrix  
	// Assumes params is a column matrix  
	double A = params.at<double>(0, 0);
	double B = params.at<double>(1, 0);
	double C = params.at<double>(2, 0);
	double D = params.at<double>(3, 0);
	double x = input.at<double>(0, 0);
	return A*sin(B*x) + C*cos(D*x);
}
 
double Deriv(double(*Func)(const Mat &input, const Mat params), const Mat &input, const Mat params, int n)
{
	// Assumes input is a single row matrix  
	// Returns the derivative of the nth parameter  
	Mat params1 = params.clone();
	Mat params2 = params.clone();
	// Use central difference  to get derivative  
	params1.at<double>(n, 0) -= DERIV_STEP;
	params2.at<double>(n, 0) += DERIV_STEP;
	double p1 = Func(input, params1);
	double p2 = Func(input, params2);
	double d = (p2 - p1) / (2 * DERIV_STEP);
	return d;
}
 
void GaussNewton(double(*Func)(const Mat &input, const Mat params),
	const Mat &inputs, const Mat &outputs, Mat params)
{
	int m = inputs.rows;
	int n = inputs.cols;
	int num_params = params.rows;
	Mat r(m, 1, CV_64F); // residual matrix  
	Mat Jf(m, num_params, CV_64F); // Jacobian of Func()  
	Mat input(1, n, CV_64F); // single row input  
 
	double last_mse = 0;
 
	for (int i = 0; i < MAX_ITER; i++) 
	{
		double mse = 0;
		for (int j = 0; j < m; j++) 
		{
			for (int k = 0; k < n; k++) 
			{
				input.at<double>(0, k) = inputs.at<double>(j, k);
			}
			r.at<double>(j, 0) = outputs.at<double>(j, 0) - Func(input, params);
			mse += r.at<double>(j, 0)*r.at<double>(j, 0);
			for (int k = 0; k < num_params; k++) 
			{
				Jf.at<double>(j, k) = Deriv(Func, input, params, k);
			}
		}
		mse /= m;
		// The difference in mse is very small, so quit  
		if (fabs(mse - last_mse) < 1e-8) 
		{
			break;
		}
		Mat delta = ((Jf.t()*Jf)).inv() * Jf.t()*r;
		params += delta; 
		printf("%f\n", mse);
		last_mse = mse;
	}
}

梯度下降法，牛顿法，高斯-牛顿迭代法，附代码实现_Naruto_Q的博客-CSDN博客_梯度下降高斯

Adagrad

关注

1
点赞
踩
13

收藏

觉得还不错? 一键收藏
1
评论
5、梯度下降法，牛顿法，高斯-牛顿迭代法

----------------------代码部分--------------------1.梯度下降法代码批量梯度下降法c++代码：/*需要参数为theta：theta0，theta1目标函数:y=theta0*x0+theta1*x1;*/#include <iostream>using namespace std; int main(){ float matrix[4][2] = { { 1,...
复制链接

扫一扫