本质都是基于梯度下降法。
牛顿法: 依赖于Hessen矩阵非奇异,收敛较快
高斯 牛顿法:依赖二阶项的jacobian 解决了Hessen非奇异的问题,收敛相对慢
LM下降法:表现是使用一个因子拟合牛顿和高斯牛顿法
#coding=utf-8
from numpy import *
import sympy
def Hessen(f,x,x_value):
#行 列表示Hesse矩阵
num=len(x)
lis2=mat([[1.0 for i in range(num)] for i in range(num)],dtype=float64)
for i in range(num):
for j in range(i,num):
f1=sympy.diff(f,x[i],1)
f2=sympy.diff(f1,x[j],1)
value= f2.subs(x_value)
lis2[i,j]=value
return lis2
def Jacobian(f,x,x_value):
num=len(x)
lis1=mat([1.0 for i in range(num)])
#print lis1
for i in range(num):
s2=sympy.diff(f,x[i],1)
value= s2.subs(x_value)
#print value
lis1[0,i] =value
#print lis1[0,i]
return lis1.T
def handle(n):
x=[1 for i in range(n)]
for i in range(n):
str1='x['+str(i)+']'
x[i] = sympy.Symbol(str1)
return x
def Newton(f,x_value,maxIter):
num=len(x_value.keys())
x=handle(num)
#求逆运算的问题
error=f.subs(x_value)
try:
for i in range(maxIter):
hessen=Hessen(f,x,x_value)
I=mat(zeros((num,num)))
#for q in range(num):
# I[q,q]=0.001
jacobian=Jacobian(f,x,x_value)
delta=-1*dot((linalg.inv(hessen)+I),jacobian)
#delta=dot(delta,error)
#print jacobian,hessen,delta
for j in range(num):
x_value[x[j]]=x_value[x[j]]+delta[j,0]
error=f.subs(x_value)
print i,error
except:
print "gradient is vanish"
return x_value,error
def GaussNewton(f,x_value,maxIter):
num=len(x_value.keys())
x=handle(num)
#求逆运算的问题
error=f.subs(x_value)
try:
for i in range(maxIter):
jacobian=Jacobian(f,x,x_value)
J1=dot(jacobian.T,jacobian)
#print J1
J2=-1.0*dot(linalg.inv(J1),jacobian.T)
delta=dot(J2,error).T
for j in range(num):
x_value[x[j]]=x_value[x[j]]+delta[j,0]
error=f.subs(x_value)
print i,error
except:
pass
return x_value,error
def LM(f,x_value,maxIter):
num=len(x_value.keys())
x=handle(num)
#求逆运算的问题
#last_value=f.subs(x_value)
error=f.subs(x_value)
lr=0.000001
for i in range(maxIter):
jacobian=Jacobian(f,x,x_value)
J1=dot(jacobian.T,jacobian)
#print jacobian
I=mat(zeros((len(J1),len(J1))))
for q in range(len(J1)):
I[q,q]=lr
J1=J1+I
J2=-1.0*dot(linalg.inv(J1),jacobian.T)
delta=dot(J2,error).T
for j in range(num):
x_value[x[j]]=x_value[x[j]]+delta[j,0]
if(f.subs(x_value)>=error):
lr=lr*5
else:
lr=0.000001
error=f.subs(x_value)
print i,error
return x_value,error
x=handle(2)
# sin cos need like : sympy.sin()
f=(3-x[0])**2+(7-x[1])**2
x_value={x[0]:-1.5,x[1]:0.5};
maxIter=100
lis1=Jacobian(f,x,x_value)
#print lis1
lis2=Hessen(f,x,x_value)
#print lis2
#限制因素 非奇异矩阵
#Newton(f,x_value,100)
#GaussNewton(f,x_value,100)
#LM(f,x_value,100)