对于一个多元线性方程:
f
(
x
)
=
w
1
x
1
+
w
2
x
2
+
.
.
.
+
w
n
x
n
+
b
f(x)=w_1x_1+w_2x_2+...+w_nx_n + b
f(x)=w1x1+w2x2+...+wnxn+b
学过线性代数的同学应该知道,用矩阵的方式可以写作:
f
(
x
)
=
W
X
+
b
f(x)=WX+b
f(x)=WX+b
现在我们有以下数据:
x1 | x2 | x3 | … | xn | y |
---|---|---|---|---|---|
0.2 | 0,1 | .-1 | … | 2.1 | 0.1 |
… | … | … | … | … | … |
希望通过f(x)来拟合这些数据。
那么我们构建损失函数:
l
o
s
s
(
W
,
b
)
=
(
f
(
x
)
−
y
)
2
loss(W,b)=(f(x)-y)^2
loss(W,b)=(f(x)−y)2
∂
l
o
s
s
∂
W
=
2
X
⋅
(
f
(
x
)
−
y
)
\frac{\partial{loss}}{\partial{W}}=2X·(f(x)-y)
∂W∂loss=2X⋅(f(x)−y)
∂
l
o
s
s
∂
b
=
2
(
f
(
x
)
−
y
)
\frac{\partial{loss}}{\partial{b}}=2(f(x)-y)
∂b∂loss=2(f(x)−y)
对于求使loss最小的W、b,令其导数等于零。
∂
l
o
s
s
∂
W
=
2
X
⋅
(
f
(
x
)
−
y
)
=
0
X
⋅
(
W
X
+
b
−
y
)
=
0
\frac{\partial{loss}}{\partial{W}}=2X·(f(x)-y)=0 \\ X·(WX+b-y)=0
∂W∂loss=2X⋅(f(x)−y)=0X⋅(WX+b−y)=0
得:
W
=
(
X
T
X
)
−
1
X
T
y
b
=
y
−
W
X
W=(X^TX)^{-1}X^Ty \\ b=y-WX
W=(XTX)−1XTyb=y−WX
这里就可以直接求出W、b。代码如下
import matplotlib.pyplot as plt
# %matplotlib inline
import numpy as np
import random
import shutil,os
from PIL import Image
import imageio
x = np.arange(11)
y = np.array([0.5,9.36,52,191,350,571,912,1207,1682,2135,2684])
display(x,y)
plt.plot(x,y,'*')
X = np.array([x**3,x**2,x]).T
display(X.shape,y.shape)
w = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
b = y.mean()-w.dot(X.mean(axis=0))
f = lambda x:w.dot(np.array([x**3,x**2,x]))+b
plt.plot(x,y,'*')
X_LINE = np.linspace(0,12,100)
plt.plot(X_LINE,f(X_LINE))
plt.show()
但是如果我们使用梯度下降的方法求得话:
写成类的形式:
class LineRegression2:
def __init__(self, X, Y):
'''
初始化,必须传入数据。
m个特征值,n个数据点。
:param X: nparray,shape=(n,m)
:param Y: nparray,size=n
'''
self.X = X
self.Y = Y.reshape(1,-1)
self.set_f()
def set_f(self):
'''
定义函数和导函数
:return:
'''
self.f = lambda w, b: w.dot(self.X.T) + b
self.loss = lambda w, b: np.mean((self.f(w, b) - self.Y) ** 2)
self.loss_dw = lambda w, b: np.mean(2 * self.X.T * (self.f(w, b) - self.Y), axis=1)
self.loss_db = lambda w, b: np.mean(2 * (self.f(w, b) - self.Y), axis=1)[0]
# learning_rate : 学习率
# precision : 精确度
# max_pass : 最大迭代次数
def learning(self, learning_rate=0.000001, precision=0.00001, max_pass=5000):
# 随机生成一个初始a,b
min_w = np.random.random(size=(1,self.X.shape[-1]))*2-1
# min_w = np.zeros((1,3))
min_b = 0.01
# 计数
count = 0
# 记录过程
self.wb_list = []
while True:
self.wb_list.append([min_w.copy(), min_b, self.loss(min_w, min_b)])
# 求出该点导数
dw_ = self.loss_dw(min_w, min_b)
db_ = self.loss_db(min_w, min_b)
# 沿导数方向前进
step_w = learning_rate * dw_
step_b = learning_rate * db_
print('count:{},w:{},b:{},step:{}'.format(count, min_w, min_b, [step_w, step_b]))
min_w -= step_w
min_b -= step_b
count += 1
if self.loss(min_w, min_b) < precision or count > max(max_pass, 0):
self.wb_list.append([min_w.copy(), min_b, self.loss(min_w, min_b)])
break
print('count:{},w:{},b:{},step:{}'.format(count, min_w, min_b, [step_w, step_b]))
return self.wb_list
调用:
X = np.arange(11)
X = np.array([X ** 3, X ** 2, X])
Y = np.array([0.5,9.36,52,191,350,571,912,1207,1682,2135,2684])
lr = LineRegression(X,Y)
wb_list = lr.learning(learning_rate=0.000001)
import shutil, os
save_path = None
if save_path:
shutil.rmtree(save_path, ignore_errors=True)
os.makedirs(save_path)
W = [wb[0] for wb in wb_list]
B = [wb[1] for wb in wb_list]
LINE_X = np.linspace(0, 13, 20)
fig, ax = plt.subplots()
for i in range(0,len(W),5):
ax.cla()
plt.xlim(0, 13)
plt.ylim(-10, 3000)
ax.plot(X[-1].reshape(-1), Y.reshape(-1), '.')
LINE_Y = W[i].dot(np.array([LINE_X ** 3, LINE_X ** 2, LINE_X])) + B[i]
ax.plot(LINE_X, LINE_Y.reshape(-1))
if save_path:
plt.savefig("{}{:05d}.png".format(save_path, i))
plt.pause(0.001)
plt.show()
然而收敛速度很慢,我优化了一下:
class LineRegression2(LineRegression):
def set_f(self):
super().set_f()
self.loss_ddw = lambda w,b:np.mean(2*self.X*self.X,axis=0)
self.loss_ddb = lambda w,b:np.mean(2*self.X)
# learning_rate : 学习率
# precision : 精确度
# max_pass : 最大迭代次数
def learning(self, learning_rate=0.000005, precision=0.00001, max_pass=5000, www=200,bbb=1,kkk=10):
# 随机生成一个初始a,b
min_w = np.random.random(size=(1,self.X.shape[-1]))*2-1
min_b = 0.01
# 计数
count = 0
# 记录过程
self.wb_list = []
while True:
self.wb_list.append([min_w.copy(), min_b, self.loss(min_w, min_b)])
# 求出该点导数
dw_ = self.loss_dw(min_w, min_b)
db_ = self.loss_db(min_w, min_b)
ddw_ = abs(self.loss_ddw(min_w, min_b))
ddb_ = abs(self.loss_ddb(min_w, min_b))
# 沿导数方向前进
step_w = learning_rate * dw_ * (np.exp(-ddw_/kkk) * www + bbb)
step_b = learning_rate * db_ * (np.exp(-ddb_/kkk) * www + bbb)
print('count:{},w:{},b:{},step:{}'.format(count, min_w, min_b, [step_w, step_b]))
min_w -= step_w
min_b -= step_b
count += 1
if self.loss(min_w, min_b)< precision or count > max(max_pass, 0):
self.wb_list.append([min_w.copy(), min_b, self.loss(min_w, min_b)])
break
print('count:{},w:{},b:{},step:{}'.format(count, min_w, min_b, [step_w, step_b]))
return self.wb_list
调用时:
lr.learning(learning_rate=0.00000030,www=1000,bbb=0.2,kkk=35000)
最终效果图: