多元线性回归模型
w
=
[
w
1
,
w
2
,
.
.
.
,
w
n
]
y
^
(
i
)
=
f
w
⃗
,
b
(
x
⃗
)
=
w
⃗
⋅
x
⃗
+
b
w=[w_1,w_2,...,w_n]~~~\\ \hat{y}^{(i)}=f_{\vec{w},b}(\vec{x})=\vec{w}\cdot\vec{x}+b
w=[w1,w2,...,wn] y^(i)=fw,b(x)=w⋅x+b
代价函数 cost function
平均平方误差函数 squared error function
J
(
w
,
b
)
=
1
2
m
∑
i
=
1
m
(
y
^
(
i
)
−
y
(
i
)
)
2
J(w,b)=\frac{1}{2m}\sum_{i=1}^{m}(\hat{y}^{(i)}-y^{(i)})^2
J(w,b)=2m1i=1∑m(y^(i)−y(i))2
计算偏导
∂
∂
w
j
J
(
w
⃗
,
b
)
=
∂
∂
w
j
1
2
m
∑
i
=
1
m
(
w
⃗
⋅
x
⃗
(
i
)
+
b
−
y
(
i
)
)
2
=
1
2
m
∑
i
=
1
m
2
(
w
⃗
⋅
x
⃗
(
i
)
+
b
−
y
(
i
)
)
x
j
(
i
)
=
1
m
∑
i
=
1
m
x
j
(
i
)
(
w
⃗
⋅
x
⃗
(
i
)
+
b
−
y
(
i
)
)
\frac{\partial}{\partial{w_j}}J(\vec{w},b)=\frac{\partial}{\partial{w_j}}\frac{1}{2m}\sum_{i=1}^{m}(\vec{w}\cdot\vec{x}^{(i)}+b-y^{(i)})^2\\=\frac{1}{2m}\sum_{i=1}^{m}2(\vec{w}\cdot\vec{x}^{(i)}+b-y^{(i)})x_j^{(i)}=\frac{1}{m}\sum_{i=1}^{m}x_j^{(i)}(\vec{w}\cdot\vec{x}^{(i)}+b-y^{(i)})
∂wj∂J(w,b)=∂wj∂2m1i=1∑m(w⋅x(i)+b−y(i))2=2m1i=1∑m2(w⋅x(i)+b−y(i))xj(i)=m1i=1∑mxj(i)(w⋅x(i)+b−y(i))
∂
∂
b
J
(
w
⃗
,
b
)
=
∂
∂
b
1
2
m
∑
i
=
1
m
(
w
⃗
⋅
x
⃗
(
i
)
+
b
−
y
(
i
)
)
2
=
1
2
m
∑
i
=
1
m
2
(
w
⃗
⋅
x
⃗
(
i
)
+
b
−
y
(
i
)
)
=
1
m
∑
i
=
1
m
(
w
⃗
⋅
x
⃗
(
i
)
+
b
−
y
(
i
)
)
\frac{\partial}{\partial{b}}J(\vec{w},b)=\frac{\partial}{\partial{b}}\frac{1}{2m}\sum_{i=1}^{m}(\vec{w}\cdot\vec{x}^{(i)}+b-y^{(i)})^2\\=\frac{1}{2m}\sum_{i=1}^{m}2(\vec{w}\cdot\vec{x}^{(i)}+b-y^{(i)})=\frac{1}{m}\sum_{i=1}^{m}(\vec{w}\cdot\vec{x}^{(i)}+b-y^{(i)})
∂b∂J(w,b)=∂b∂2m1i=1∑m(w⋅x(i)+b−y(i))2=2m1i=1∑m2(w⋅x(i)+b−y(i))=m1i=1∑m(w⋅x(i)+b−y(i))
梯度下降
w
j
=
w
j
−
α
∂
∂
w
j
J
(
w
⃗
,
b
)
(
j
=
1
、
2...
、
n
)
b
=
b
−
α
∂
∂
b
J
(
w
⃗
,
b
)
w_j=w_j-\alpha\frac{\partial}{\partial{w_j}}J(\vec{w},b)(j=1、2...、n)\\b=b-\alpha\frac{\partial}{\partial{b}}J(\vec{w},b)
wj=wj−α∂wj∂J(w,b)(j=1、2...、n)b=b−α∂b∂J(w,b)
import nummpy as np
#计算代价的函数
def get_cost(x,y,w,b):
#数据集的大小
m=x.shape[0]
total_cost=0
for i in range(m):
error=np.dot(x[i,:],w)+b-y[i]
total_cost=total_cost+error**2
cost=total_cost/(2*m)
return cost
#计算梯度的函数
def get_gradient(x,y,w,b):
#获取数据量和特征数量
m=x.shape[0]
n=x.shape[1]
dj_dw=np.zeros((n,))
dj_db=0
for i in range(m):
error=np.dot(x[i,:],w)+b-y[i]
dj_db+=error
for j in range(n):
dj_dw[j]+=(error*x[i,j])
dj_db=dj_db/m
dj_dw=dj_dw/m
return dj_dw,dj_db
#梯度下降函数
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function):
'''
x:输入向量,numpy.ndarray
y:输出向量,numpy.ndarray
w_in:初始w向量
b_in:初始b
alpha:学习率
num_iters:迭代次数
cost_function:代价函数
gradient_function:计算梯度函数
'''
J_history = [] #记录训练过程中的所有代价
b = b_in
w = w_in
for i in range(num_iters):
# 计算偏导,更新参数w,b
dj_dw,dj_db=gradient_function(x,y,w,b)
w=w-dj_dw*alpha
b=b-dj_db*alpha
# 保存当前代价J和参数(w,b)->可用于后续可视化
J_history.append( cost_function(x, y, w , b))
# 打印其中十次训练信息
if i% math.ceil(num_iters/10) == 0:
print(f"Iteration {i}: Cost {J_history[-1]} ",
f"dj_dw: {dj_dw}, dj_db: {dj_db} ",
f"w: {w}, b:{b}")
print{f'final w:{w},b:{b}'}
#输出目标值与预测值对比
y_hat=np.dot(x,w)+b
for i in range(x.shape[0]):
print(f'target value:{y[i]},predicted value:{y_hat[i]},error:{y[i]-y_hat[i]}')
return w, b, J_history