前言
- 本专栏是我这个小菜鸡跟随 B 站 up 主
刘二大人
学习pytorch
完成的课后作业,原视频请戳这里
题目
计算
模型 y ^ = w 1 x 2 + w 2 x + b \hat{y}={w}_{1}{x}^{2}+{w}_{2}x+b y^=w1x2+w2x+b 中损失对各权重的梯度
分析
流程图
-
首先画一下流程图(用 mermaid 画出来效果看着不行,就用的 PPT 的形状一个一个画)
-
正向传播
-
反向传播
公式
- 正向传播
y ^ = w 1 x 2 + w 2 x + b r = y ^ − y l o s s = r 2 \hat{y}={w}_{1}{x}^{2}+{w}_{2}x+b \\ r = \hat{y} - y \\ loss = {r}^{2} y^=w1x2+w2x+br=y^−yloss=r2 - 反向传播,即
损失对各权重求偏导
∂ l o s s ∂ y ^ = ∂ l o s s ∂ r ∗ ∂ r ∂ y ^ = 2 r = 2 ( y ^ − y ) = 2 ( w 1 x 2 + w 2 x + b − y ) \frac{\partial loss}{\partial \hat{y}}=\frac{\partial loss}{\partial r}*\frac{\partial r}{\partial \hat{y}}=2r \\ =2(\hat{y}-y)=2({w}_{1}{x}^{2}+{w}_{2}x+b-y) ∂y^∂loss=∂r∂loss∗∂y^∂r=2r=2(y^−y)=2(w1x2+w2x+b−y)
∂ l o s s ∂ w 1 = ∂ l o s s ∂ y ^ ∗ ∂ y ^ ∂ w 1 = 2 r ∗ x 2 = 2 x 2 ( w 1 x 2 + w 2 x + b − y ) \frac{\partial loss}{\partial {w}_{1}}=\frac{\partial loss}{\partial \hat{y}}*\frac{\partial \hat{y}}{\partial {w}_{1}}=2r*{x}^{2}=2{x}^{2}({w}_{1}{x}^{2}+{w}_{2}x+b-y) ∂w1∂loss=∂y^∂loss∗∂w1∂y^=2r∗x2=2x2(w1x2+w2x+b−y)
∂ l o s s ∂ w 2 = ∂ l o s s ∂ y ^ ∗ ∂ y ^ ∂ w 2 = 2 r ∗ x = 2 x ( w 1 x 2 + w 2 x + b − y ) \frac{\partial loss}{\partial {w}_{2}}=\frac{\partial loss}{\partial \hat{y}}*\frac{\partial \hat{y}}{\partial {w}_{2}}=2r*{x}=2{x}({w}_{1}{x}^{2}+{w}_{2}x+b-y) ∂w2∂loss=∂y^∂loss∗∂w2∂y^=2r∗x=2x(w1x2+w2x+b−y)
∂ l o s s ∂ b = ∂ l o s s ∂ y ^ ∗ ∂ y ^ ∂ b = 2 r = 2 ( w 1 x 2 + w 2 x + b − y ) \frac{\partial loss}{\partial b}=\frac{\partial loss}{\partial \hat{y}}*\frac{\partial \hat{y}}{\partial b}=2r=2({w}_{1}{x}^{2}+{w}_{2}x+b-y) ∂b∂loss=∂y^∂loss∗∂b∂y^=2r=2(w1x2+w2x+b−y)
验证公式
import torch
x = 1.0
y = 2.0
w = torch.Tensor([1.0, 2.0])
b = torch.Tensor([2.0])
w.requires_grad = True
b.requires_grad = True
def forward(x):
return w[0] * x ** 2 + w[1] * x + b
- 用公式计算
In[]: w1_ = 2 * x ** 2 * (forward(x).item() - y)
w2_ = 2 * x * (forward(x).item() - y)
b_ = 2 * (forward(x).item() - y)
w1_, w2_, b_
Out[]: (6.0, 6.0, 6.0)
- 用
torch
计算梯度
In[]: loss = (forward(x) - y) ** 2
loss.backward()
w.grad[0].item(), w.grad[1].item(), b.grad.item()
Out[]: (6.0, 6.0, 6.0)
- 结果正确
代码展示
- 下面就看一下
损失和各权重梯度的关系
- 训练集如下
X = [1.0, 2.0, 3.0]
y = [2.0, 4.0, 6.0]
- 普通
# 初始化权重
w = torch.Tensor([2.0, 0.0])
b = torch.Tensor([2.0])
w.requires_grad = True
b.requires_grad = True
def forward(x):
return w[0] * x ** 2 + w[1] * x + b
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) ** 2
# 用于绘图
result = {
'w1': [],
'w2': [],
'b': [],
'loss': []
}
for step in range(1, 1001):
for train_x, train_y in zip(X, y):
# 计算损失,并反向传播
l = loss(train_x, train_y)
l.backward()
result['w1'].append(w.grad[0].item())
result['w2'].append(w.grad[1].item())
result['b'].append(b.grad.item())
result['loss'].append(l.item())
print('\rEpoch: {:>5d}/1000 [{}{}] Gradient: w1={:> .2f} w2={:> .2f} b={:> .2f} loss={:.4f}'.format(
step, '■' * int(step/50), '□' * (20 - int(step/50)),
w.grad[0], w.grad[1], b.grad[0], l.item()
), end='')
w.data -= 0.01 * w.grad.data
b.data -= 0.01 * b.grad.data
w.grad.data.zero_()
b.grad.data.zero_()
time.sleep(0.001)
for i, pic in enumerate(list(result.keys())[:-1]):
plt.plot(result[pic], result['loss'], '-.', label=pic)
plt.xlabel('gradient')
plt.ylabel('loss')
plt.legend()
plt.show()
- 训练情况
- 预测结果
In[]: print('Predict (after training): x=%.2f, w1=%.4f, w2=%.4f, b=%.4f, predict=%.4f' % (4, w[0], w[1], b, forward(4).item()))
Out[]: Predict (after training): x=4.00, w1=0.2878, w2=0.7126, b=1.1960, predict=8.6515
- 封装类
class BackPropagation:
def __init__(self, w1=1.0, w2=0.0, b=2.0, n_iter=10000, eta=0.01):
self.w = torch.Tensor([w1, w2])
self.b = torch.Tensor([b])
self.w.requires_grad = True
self.b.requires_grad = True
# 迭代次数
self.n_iter = n_iter
# 学习率
self.eta = eta
# 延时
self.delay = 0.1 ** (len(str(n_iter))+1) * n_iter / pow(10, len(str(n_iter))-1)
def __forward(self, x):
return self.w[0] * x ** 2 + self.w[1] * x + self.b
def __loss(self, x, y):
y_pred = self.__forward(x)
return (y_pred - y) ** 2
def train(self, X, y):
result = {
'w1': [],
'w2': [],
'b': [],
'loss': []
}
for step in range(1, self.n_iter+1):
for train_x, train_y in zip(X, y):
loss = self.__loss(train_x, train_y)
loss.backward()
result['w1'].append(self.w.grad[0].item())
result['w2'].append(self.w.grad[1].item())
result['b'].append(self.b.grad[0].item())
result['loss'].append(loss.item())
print('\rEpoch: {:>{}d}/{} [{}{}] Gradient: w1={:>.2f} w2={:>.2f} b={:>.2f} loss={:>.4f}'.format(
step, len(str(self.n_iter)), self.n_iter,
'■' * int(step/self.n_iter*20),
'□' * (20 - int(step/self.n_iter*20)),
self.w.grad[0], self.w.grad[1], self.b.grad[0], loss.item()
), end='')
self.w.data -= self.eta * self.w.grad.data
self.b.data -= self.eta * self.b.grad.data
self.w.grad.data.zero_()
self.b.grad.data.zero_()
time.sleep(self.delay)
for i, pic in enumerate(list(result.keys())[:-1]):
plt.plot(result[pic], result['loss'], '-.', label=pic)
plt.legend()
plt.xlabel('gradient')
plt.ylabel('loss')
plt.legend()
plt.show()
def predict(self, x):
result = self.__forward(x)
print('Predict: x=%.2f result=%.4f' % (x, result))
return result
- 训练情况
In[]: model = BackPropagation()
model.train(X, y)
Out[]: Epoch: 10000/10000 [■■■■■■■■■■■■■■■■■■■■] Gradient: w1=0.01 w2=0.00 b=0.00 loss=0.00000003
- 预测结果
In[]: model.predict(4)
Out[]: Predict: x=4.00 result=8.0038
结尾
以上就是我要分享的内容,因为学识尚浅,会有不足,还请各位大佬指正。
有什么问题也可在评论区留言。