1. 在第1周的基础上扩展为多元线性回归
f w , b ( x ) = w 0 x 0 + w 1 x 1 + . . . + w n − 1 x n − 1 + b (1) f_{\mathbf{w},b}(\mathbf{x})=w_0x_0+w_1x_1+...+w_{n-1}x_{n-1}+b\tag{1} fw,b(x)=w0x0+w1x1+...+wn−1xn−1+b(1)
1.1 参数
w = ( w 0 w 1 ⋯ w n − 1 ) , b 是常量参数 \mathbf{w}=\begin{pmatrix}w_0 \\ w_1 \\ \cdots\\ w_{n-1} \end{pmatrix},b是常量参数 w=⎝ ⎛w0w1⋯wn−1⎠ ⎞,b是常量参数
1.2 目标损失函数
J ( w , b ) = 1 2 m ∑ i = 0 m − 1 ( f w , b ( x ( i ) ) − y ( i ) ) 2 (3) J(\mathbf{w},b)=\frac{1}{2m}\sum\limits_{i=0}^{m-1}(f_{\mathbf{w},b}(\mathbf{x}^{(i)})-y^{(i)})^2\tag{3} J(w,b)=2m1i=0∑m−1(fw,b(x(i))−y(i))2(3)
2. 代码
同样也是需要实现:
- 损失计算
- 梯度计算
- 梯度更新
这三个方法
2.1 代码
import numpy as np
import copy
import math
import matplotlib.pyplot as plt
plt.style.use("deeplearning.mplstyle")
np.set_printoptions(precision=2)
def predict(x,w,b):
p = np.dot(x,w)+b
return p
def compute_cost(X,y,w,b):
m = X.shape[0]
cost = 0.0
for i in range(m):
f_wb_i = np.dot(X[i],w)+b
cost = cost + (f_wb_i-y[i])**2
cost = cost / (2*m)
return cost
def compute_gradient(X,y,w,b):
"""功能: 计算梯度\n
return:\n
dj_dw: (n,)\n
dj_db: scalar
"""
m,n = X.shape
dj_dw = np.zeros((n,))
dj_db = 0
for i in range(m):
err = (np.dot(X[i],w)+b)-y[i]
for j in range(n):
dj_dw[j] = dj_dw[j]+err*X[i,j]
dj_db = dj_db + err
dj_dw = dj_dw / m
dj_db = dj_db / m
return dj_dw,dj_db
def gradient_decent(
X,y,w_in,b_in,cost_func,gradient_func,
alpha,num_iters):
# 历史损失值
J_history = []
w = copy.deepcopy(w_in)
b = b_in
for i in range(num_iters):
dj_dw,dj_db = gradient_func(X,y,w,b)
w = w - alpha*dj_dw
b = b - alpha*dj_db
if i<100000:
J_history.append(cost_func(X,y,w,b))
if i%math.ceil(num_iters/10)==0:
print(f'Iteration {i:4d}: Cost {J_history[-1]:8.2f}')
return w,b,J_history
if __name__ == '__main__':
print(f'multiple variable solution start'.center(60,'='))
X_train = np.array([[2104,5,1,45],
[1416,3,2,40],
[852,2,1,35]])
y_train = np.array([460,232,178])
print(f'x.shape={X_train.shape},y.shape={y_train.shape}')
# 参数初始化
b_init = 0
w_init = np.zeros(shape=(4,))
# x_vec = X_train[0,:]
# f_wb = predict(x_vec,w_init,b_init)
# print(f_wb)
# cost = compute_cost(X_train,y_train,w_init,b_init)
# print(cost)
# tmp_dj_dw,tmp_dj_db = compute_gradient(X_train,y_train,w_init,b_init)
# print(f'dj_dw={tmp_dj_dw}')
# print(f'dj_db={tmp_dj_db}')
# hyper params
iterations = 1000
alpha = 5e-7
w_final,b_final,J_hist = gradient_decent(
X_train,y_train,w_init,b_init,compute_cost,
compute_gradient,alpha,iterations
)
print(f'b,w found by gradient decent: {b_final:.2f}, {w_final}')
m,_ = X_train.shape
for i in range(m):
print(f'prediction: {predict(X_train[i],w_final,b_final):.2f}',
f'target value={y_train[i]}')
fig,(ax1,ax2) = plt.subplots(1,2,constrained_layout=True,figsize=(12,4))
ax1.plot(J_hist)
ax2.plot(100+np.arange(len(J_hist[100:])),J_hist[100:])
ax1.set_title("Cost vs. iteration")
ax2.set_title("Cost vs. iteration(tail)")
ax1.set_ylabel('Cost')
ax2.set_ylabel("Cost")
ax1.set_xlabel("iteration step")
ax2.set_xlabel('iteration step')
plt.show()
2.2 结果
==============multiple variable solution start==============
x.shape=(3, 4),y.shape=(3,)
Iteration 0: Cost 2529.46
Iteration 100: Cost 695.99
Iteration 200: Cost 694.92
Iteration 300: Cost 693.86
Iteration 400: Cost 692.81
Iteration 500: Cost 691.77
Iteration 600: Cost 690.73
Iteration 700: Cost 689.71
Iteration 800: Cost 688.70
Iteration 900: Cost 687.69
b,w found by gradient decent: -0.00, [ 0.2 0. -0.01 -0.07]
prediction: 426.19 target value=460
prediction: 286.17 target value=232
prediction: 171.47 target value=178