Python实现:
import sympy
import numpy as np
def twoloop(s, y, rho, gk):
n = len(s) #向量序列的长度
if n >= 1 and type(gk)==np.matrix:
#h0是标量,而非矩阵
h0 = float((s[-1].T*y[-1])/(y[-1].T*y[-1]))
elif n >=1:
h0 = (s[-1]*y[-1])/(y[-1]*y[-1])
else:
h0 = 1
a = np.empty((n,))
if type(gk)==np.matrix:
q = gk.copy()
else:
q = gk
for i in range(n - 1, -1, -1):
if type(gk)==np.matrix:
a[i] = rho[i] * s[i].T * q
else:
a[i] = rho[i] * s[i] * q
q -= a[i] * y[i]
z = h0*q
for i in range(n):
if type(gk)==np.matrix:
b = rho[i] * y[i].T * z
else:
b = rho[i] * y[i] * z
z += s[i] *(float (a[i] - b))
return z
#f为要求极值的函数,x0为初始位置,max_iter为最大迭代次数,epsilon为相邻两次迭代的x改变量
def LBFGS_x(f, x0, max_iter, epsilon):
i = 0 # 记录迭代次数的变量
x0 = float(x0) # 浮点数计算更快
df = sympy.diff(f, x) # 定义一阶导数
beta = 0.5 #beta 0~1
delta = 0.25 #delta 0~0.5
s, y, rho = [], [], []
while i < max_iter:
gk = df.subs(x, x0)
dk = -twoloop(s, y, rho, gk)
mk = 0
while mk < 10:
if f.subs(x, x0+beta**mk*dk) < f.subs(x,x0) + delta*beta**mk*gk*dk:
break
mk += 1
xnew = x0 + beta**mk*dk
sk = xnew - x0
yk = df.subs(x, xnew) - gk
if sk*yk > 0:
rho.append(1/(sk*yk))
s.append(sk)
y.append(yk)
if len(rho) > 5:
rho.pop(0)
s.pop(0)
y.pop(0)
i += 1
print('迭代第%d次:%.5f' %(i, xnew))
if abs(f.subs(x, xnew)-f.subs(x, x0)) < epsilon:
break
x0 = xnew
return xnew
#f为要求极值的函数,X0为初始位置,max_iter为最大迭代次数,epsilon为相邻两次迭代的x改变量
def LBFGS_x0x1(f, X0, max_iter, epsilon):
i = 0 # 记录迭代次数的变量
X0[0], X0[1] = float(X0[0]), float(X0[1]) #浮点数计算更快
df0 = sympy.diff(f, x0) #定义一阶导数
df1 = sympy.diff(f, x1)
beta = 0.5 #beta 0~1
delta = 0.25 #delta 0~0.5
s, y, rho = [], [], []
while i < max_iter:
gk = np.mat([float(df0.subs([(x0, X0[0]), (x1, X0[1])])), float(df1.subs([(x0, X0[0]), (x1, X0[1])]))]).T #梯度矩阵
dk = -twoloop(s, y, rho, gk)
mk = 0
while mk < 10:
if f.subs([(x0, X0[0]+beta**mk*dk[0,0]), (x1, X0[1]+beta**mk*dk[1,0])]) < f.subs([(x0, X0[0]), (x1, X0[1])]) + delta*beta**mk*gk.T*dk:
break
mk += 1
Xnew = [X0[0] + beta**mk*dk[0,0], X0[1] + beta**mk*dk[1,0]]
sk = np.mat([beta**mk*dk[0,0], beta**mk*dk[1,0]]).T
yk = np.mat([float(df0.subs([(x0, Xnew[0]), (x1, Xnew[1])])), float(df1.subs([(x0, Xnew[0]), (x1, Xnew[1])]))]).T - gk
if sk.T*yk > 0:
rho.append(1/(sk.T*yk))
s.append(sk)
y.append(yk)
if len(rho) > 5:
rho.pop(0)
s.pop(0)
y.pop(0)
i += 1
print('迭代第%d次:[%.5f, %.5f]' %(i, Xnew[0], Xnew[1]))
if abs(f.subs([(x0, Xnew[0]), (x1, Xnew[1])])-f.subs([(x0, X0[0]), (x1, X0[1])])) < epsilon:
break
X0 = Xnew
return Xnew
if __name__ == '__main__':
x = sympy.symbols("x")
x0 = sympy.symbols("x0")
x1 = sympy.symbols("x1")
result = LBFGS_x(x**4-4*x, 10, 50, 1e-5)
print('最佳迭代的位置:%.5f' %result)
result = LBFGS_x0x1((x0-1)**2+(x1-1)**4, [10,10], 50, 1e-5)
print('最佳迭代位置:[%.5f, %.5f]' %(result[0], result[1]))
C++实现:
#include <iostream>
#include <vector>
#include <Eigen/Dense>
const double dx = 1e-3;
double f(double x)
{
return pow(x, 4) - 4 * x;
}
double df(double x)
{
//return 4 * pow(x, 3) - 4;
return (f(x + dx) - f(x)) / dx;
}
double f(std::vector<double> X)
{
return pow(X[0] - 1, 2) + pow(X[1] - 1, 4);
}
double twoloop(std::vector<double> s, std::vector<double> y, std::vector<double> rho, double gk)
{
int n = s.size();
double h0;
if (n >= 1)
h0 = (s.back()*y.back()) / (y.back()*y.back());
else
h0 = 1;
std::vector<double> a(n);
double q = gk;
for (int i = n - 1; i >= 0; --i)
{
a[i] = rho[i] * s[i] * q;
q -= a[i] * y[i];
}
double z = h0*q;
for (int i = 0; i < n; ++i)
{
double b = rho[i] * y[i] * z;
z += s[i] * (a[i] - b);
}
return z;
}
double df0(std::vector<double> X)
{
//return 2 * (X[0] - 1);
return (f({ X[0] + dx, X[1] }) - f(X)) / dx;
}
double df1(std::vector<double> X)
{
//return 4 * pow(X[1] - 1, 3);
return (f({ X[0], X[1] + dx }) - f(X)) / dx;
}
Eigen::Vector2d twoloop(std::vector<Eigen::Vector2d> s, std::vector<Eigen::Vector2d> y, std::vector<double> rho, Eigen::Vector2d gk)
{
int n = s.size();
double h0;
if (n >= 1)
h0 = (s.back().transpose()*y.back())(0, 0) / (y.back().transpose()*y.back())(0, 0);
else
h0 = 1;
std::vector<double> a(n);
Eigen::Vector2d q = gk;
for (int i = n - 1; i >= 0; --i)
{
a[i] = rho[i] * s[i].transpose() * q;
q -= a[i] * y[i];
}
Eigen::Vector2d z = h0*q;
for (int i = 0; i < n; ++i)
{
double b = rho[i] * y[i].transpose() * z;
z += s[i] * (a[i] - b);
}
return z;
}
double LBFGS_x(double x0, int max_iter, double epsilon)
{
int i = 0;
double beta = 0.5;
double delta = 0.25;
std::vector<double> s, y, rho;
double xnew;
while (i < max_iter)
{
double gk = df(x0);
double dk = - twoloop(s, y, rho, gk);
int mk = 0;
while (mk < 10)
{
if (f(x0 + pow(beta, mk)*dk) < f(x0) + delta*pow(beta, mk)*gk*dk)
break;
++mk;
}
xnew = x0 + pow(beta, mk)*dk;
float sk = xnew - x0;
float yk = df(xnew) - gk;
if (sk*yk > 0)
{
rho.push_back(1 / (sk*yk));
s.push_back(sk);
y.push_back(yk);
}
if (rho.size() > 5)
{
rho.erase(rho.begin());
s.erase(s.begin());
y.erase(y.begin());
}
++i;
std::cout << "迭代次数:" << i << " " << x0 << std::endl;
if (abs(f(xnew) - f(x0)) < epsilon)
break;
x0 = xnew;
}
return xnew;
}
std::vector<double> LBFGS_x0x1(std::vector<double> X0, int max_iter, double epsilon)
{
int i = 0;
double beta = 0.5;
double delta = 0.25;
std::vector<Eigen::Vector2d> s, y;
std::vector<double> rho;
std::vector<double> Xnew;
while (i < max_iter)
{
Eigen::Vector2d gk;
gk << df0(X0), df1(X0);
Eigen::Vector2d dk = - twoloop(s, y, rho, gk);
int mk = 0;
while (mk < 10)
{
Xnew = { X0[0] + pow(beta, mk)*dk(0), X0[1] + pow(beta, mk)*dk(1) };
if (f(Xnew) < f(X0) + delta*pow(beta, mk)*gk.transpose()*dk)
break;
++mk;
}
Xnew = { X0[0] + pow(beta, mk)*dk(0), X0[1] + pow(beta, mk)*dk(1) };
Eigen::Vector2d sk;
sk << pow(beta, mk)*dk(0), pow(beta, mk)*dk(1);
Eigen::Vector2d yk;
yk << df0(Xnew), df1(Xnew);
yk -= gk;
if ((sk.transpose()*yk)(0, 0) > 0)
{
rho.push_back(1 / (sk.transpose()*yk));
s.push_back(sk);
y.push_back(yk);
}
if (rho.size() > 5)
{
rho.erase(rho.begin());
s.erase(s.begin());
y.erase(y.begin());
}
++i;
std::cout << "迭代次数:" << i << " " << X0[0] << " " << X0[1] << std::endl;
if (abs(f(Xnew) - f(X0)) < epsilon)
break;
X0 = Xnew;
}
return X0;
}
int main(int argc, char* argv[])
{
double result = LBFGS_x(10, 50000, 1e-5);
std::cout << "最佳迭代位置:" << result << std::endl;
std::vector<double> results = LBFGS_x0x1({ 10,10 }, 50000, 1e-5);
std::cout << "最佳迭代位置:" << results[0] << " " << results[1] << std::endl;
system("pause");
return EXIT_SUCCESS;
}