Rosenbrock函数介绍
Rosenbrock是可导非凸函数,二维情况下的函数值情况如下:
高维函数定义如下:
f
(
x
)
=
f
(
x
1
,
x
2
,
…
,
x
N
)
=
∑
i
=
1
N
/
2
[
100
(
x
2
i
−
1
2
−
x
2
i
)
2
+
(
x
2
i
−
1
−
1
)
2
]
f(\mathbf{x})=f\left(x_{1}, x_{2}, \ldots, x_{N}\right)=\sum_{i=1}^{N / 2}\left[100\left(x_{2 i-1}^{2}-x_{2 i}\right)^{2}+\left(x_{2 i-1}-1\right)^{2}\right]
f(x)=f(x1,x2,…,xN)=i=1∑N/2[100(x2i−12−x2i)2+(x2i−1−1)2]
具体函数细节见Rosenbrock函数
由以上函数定义对函数求导可得(其中v是梯度向量):
v
2
i
−
1
=
400
(
x
2
i
−
1
3
−
x
2
i
−
1
x
2
i
)
+
2
(
x
2
i
−
1
−
1
)
,
i
=
1
,
2
,
3.....
N
/
2
v_{2i-1}=400\left(x_{2 i-1}^{3}-x_{2 i-1}x_{2 i}\right)+2(x_{2 i-1}-1),i = 1,2,3.....N/2
v2i−1=400(x2i−13−x2i−1x2i)+2(x2i−1−1),i=1,2,3.....N/2
v
2
i
=
−
200
(
x
2
i
−
1
2
−
x
2
i
)
,
i
=
1
,
2
,
3.....
N
/
2
v_{2i}=-200\left(x_{2 i-1}^{2}-x_{2 i}\right),i = 1,2,3.....N/2
v2i=−200(x2i−12−x2i),i=1,2,3.....N/2
具体代码实现
#include<iostream>
#include<algorithm>
#include<fstream>
#include<iomanip>
#include<chrono>
#include<ctime>
#include<sstream>
#include<vector>
#include<cmath>
#include<iterator>
#include<map>
#include<string.h>
#include <complex>
#include <set>
using namespace std;
// 函数值
double func(const vector<double> x)
{
double res = 0.0;
for(int i = 0; i <= int(x.size()/2 - 1); ++i)
{
res += 100 * pow(pow(x[2*i],2) - x[2*i+1], 2) + pow(x[2*i] - 1, 2);
}
return res;
}
// 算函数梯度
vector<double> gradient(const vector<double> x)
{
vector<double> res;
for(int i = 0; i <= int(x.size()/2 - 1); ++i)
{
double per1 = 400 * (pow(x[2*i], 3) - x[2*i]*x[2*i+1]) + 2 * (x[2*i] - 1);
double per2 = -200 * (pow(x[2*i], 2) - x[2*i+1]);
res.push_back(per1);
res.push_back(per2);
}
return res;
}
// 向量 * t
vector<double> timeByt(const vector<double> x, double t)
{
vector<double> res;
for(int i = 0; i < x.size(); ++i)
{
res.push_back(x[i] * t);
}
return res;
}
// 向量的和
vector<double> addVector(const vector<double> x1, const vector<double> x2)
{
vector<double> res;
for(int i = 0; i < x1.size(); ++i){
res.push_back(x1[i] + x2[i]);
}
return res;
}
// 向量点积
double proVector(const vector<double> x1, const vector<double> x2)
{
double res = 0.0;
for(int i = 0; i < x1.size(); ++i){
res += x1[i] * x2[i];
}
return res;
}
int main()
{
vector<double> x = {2, 0, 2, 3};
cout <<"初始函数值:" << func(x) <<endl;
// iteration initialization
double t = 1.0;
double c = 0.5;
double sigma = 1e-5;
vector<double> delta = gradient(x);
vector<double> d = timeByt(delta, -1);
cout <<"初始梯度值:";
for(int i = 0; i < x.size(); ++i) cout << delta[i] << " ";
cout << endl;
cout << func(addVector(x, d)) <<endl;
int cnt = 0;
while(sqrt(proVector(delta, delta)) > sigma)
{
while(func(addVector(x, timeByt(d, t))) > func(x) + proVector(d, timeByt(delta, c*t)))
{
// update
t /= 2;
}
x = addVector(x, timeByt(d,t));
delta = gradient(x);
d = timeByt(delta, -1);
++ cnt;
}
cout <<"运行迭代次数: "<< cnt <<endl;
cout << "最终函数值:" << func(x) <<endl;
cout << "最终步长 " << t <<endl;
cout << "最终收敛时对应的x:";
for(int i = 0; i < x.size(); ++i) cout << x[i] << " ";
cout << endl;
return 0;
}