利用神经网络模拟直线,并验证自己对反向传播的理解。
具体推导见文章末尾。
链式法则参考https://zhuanlan.zhihu.com/p/97476874
import torch
import torch.nn as nn
import torch.optim as optim
import math
import random
import numpy as np
import torch.nn.functional as F
# 设置随机种子
def setup_seed(seed):
torch.manual_seed(seed) # 为cpu分配随机种子
if torch.cuda.is_available():
torch.cuda.manual_seed(seed) # 为gpu分配随机种子
torch.cuda.manual_seed_all(seed) # 若使用多块gpu,使用该命令设置随机种子
random.seed(seed)
np.random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
setup_seed(666)
class model(nn.Module):
def __init__(self, n_feature, n_hidden):
super(model, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden) # 隐藏层
self.predict = torch.nn.Linear(n_hidden, n_feature) # 输出层
def forward(self, x, target):
y1 = self.hidden(x)
# print(self.hidden.weight)
# print(self.hidden.bias)
# print(y1)
y2 = F.relu(y1) # 隐藏层用 relu
# print(y2)
y3 = self.predict(y2)
# print(y3)
# print(self.predict.weight)
# print(self.predict.bias)
loss = MSE(y3, target)
return loss
# Training loop
x_data = torch.linspace(-1, 1, 10) # x_data设置小一点,可以快速得到结果
x = torch.unsqueeze(x_data, dim=-1)
target = x.pow(2) + 0.2 * torch.rand(x.size())
# print(x)
# print(target)
model = model(n_feature=1, n_hidden=10)
optimizer = optim.SGD(model.parameters(), lr=0.5, momentum=0.9) # 学习率设置大一点,也可以快速得到结果
MSE = nn.MSELoss()
for epoch in range(2):
loss = model(x, target)
# print(loss)
optimizer.zero_grad()
loss.backward()
# print(optimizer.param_groups[0])
res1 = [x for x in optimizer.param_groups[0]['params']]
# print(res1)
# print("-----------------------------------------------------")
res2 = [x.grad for x in optimizer.param_groups[0]['params']]
# print(res2)
# print("-----------------------------------------------------")
optimizer.step()
res3 = [x for x in optimizer.param_groups[0]['params']]
# print(res3)
# print("-----------------------------------------------------")
exit()
'''
x = [[-1.0000], [-0.7778], [-0.5556], [-0.3333], [-0.1111], [ 0.1111], [ 0.3333], [ 0.5556], [ 0.7778], [ 1.0000]]
y1 =
tensor([[ 0.0227, -0.3667, -0.7500, 0.7615, -0.2314, -0.4504, -0.3309, -0.9571,
0.0028, 0.1209],
[-0.0875, -0.3437, -0.5818, 0.7740, -0.2332, -0.2375, -0.2946, -0.9627,
-0.1905, 0.2323],
[-0.1977, -0.3206, -0.4136, 0.7865, -0.2349, -0.0245, -0.2583, -0.9682,
-0.3838, 0.3437],
[-0.3079, -0.2976, -0.2454, 0.7990, -0.2367, 0.1884, -0.2220, -0.9738,
-0.5772, 0.4551],
[-0.4182, -0.2746, -0.0772, 0.8115, -0.2384, 0.4014, -0.1856, -0.9794,
-0.7705, 0.5665],
[-0.5284, -0.2516, 0.0910, 0.8240, -0.2402, 0.6143, -0.1493, -0.9849,
-0.9639, 0.6779],
[-0.6386, -0.2286, 0.2592, 0.8365, -0.2419, 0.8273, -0.1130, -0.9905,
-1.1572, 0.7893],
[-0.7488, -0.2056, 0.4274, 0.8490, -0.2437, 1.0402, -0.0767, -0.9961,
-1.3506, 0.9007],
[-0.8590, -0.1826, 0.5956, 0.8615, -0.2454, 1.2532, -0.0404, -1.0017,
-1.5439, 1.0121],
[-0.9692, -0.1596, 0.7638, 0.8740, -0.2471, 1.4661, -0.0041, -1.0072,
-1.7373, 1.1234]], grad_fn=<AddmmBackward>)
y2 =
tensor([[0.0227, 0.0000, 0.0000, 0.7615, 0.0000, 0.0000, 0.0000, 0.0000, 0.0028,
0.1209],
[0.0000, 0.0000, 0.0000, 0.7740, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.2323],
[0.0000, 0.0000, 0.0000, 0.7865, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
0.3437],
[0.0000, 0.0000, 0.0000, 0.7990, 0.0000, 0.1884, 0.0000, 0.0000, 0.0000,
0.4551],
[0.0000, 0.0000, 0.0000, 0.8115, 0.0000, 0.4014, 0.0000, 0.0000, 0.0000,
0.5665],
[0.0000, 0.0000, 0.0910, 0.8240, 0.0000, 0.6143, 0.0000, 0.0000, 0.0000,
0.6779],
[0.0000, 0.0000, 0.2592, 0.8365, 0.0000, 0.8273, 0.0000, 0.0000, 0.0000,
0.7893],
[0.0000, 0.0000, 0.4274, 0.8490, 0.0000, 1.0402, 0.0000, 0.0000, 0.0000,
0.9007],
[0.0000, 0.0000, 0.5956, 0.8615, 0.0000, 1.2532, 0.0000, 0.0000, 0.0000,
1.0121],
[0.0000, 0.0000, 0.7638, 0.8740, 0.0000, 1.4661, 0.0000, 0.0000, 0.0000,
1.1234]], grad_fn=<ReluBackward0>)
y3 =
tensor([[ 0.1421],
[ 0.1106],
[ 0.0757],
[ 0.0540],
[ 0.0341],
[ 0.0031],
[-0.0372],
[-0.0776],
[-0.1179],
[-0.1583]], grad_fn=<AddmmBackward>)
target = [[1.0624], [0.6590], [0.3310], [0.1313], [0.0499], [0.0160], [0.1774], [0.3256], [0.7196], [1.0016]]
'''
'''
w1_weight_01 =
tensor([[-0.4960],
[ 0.1035],
[ 0.7569],
[ 0.0563],
[-0.0078],
[ 0.9583],
[ 0.1634],
[-0.0251],
[-0.8700],
[ 0.5013]]
w1_weight_grad =
[[-0.0272],
[ 0.0000],
[ 0.0512],
[ 0.0158],
[ 0.0000],
[-0.0294],
[ 0.0000],
[ 0.0000],
[-0.0113],
[ 0.0354]]
w1_weight_02 =
tensor([[-0.4824],
[ 0.1035],
[ 0.7313],
[ 0.0484],
[-0.0078],
[ 0.9730],
[ 0.1634],
[-0.0251],
[-0.8644],
[ 0.4836]])
'''
'''
w2_weight_01 = [-0.1475, -0.2782, -0.1215, -0.1338, 0.2476, 0.0707, -0.0606, -0.0268,
-0.0613, -0.2990]])
w2_weight_grad = tensor([[-4.1780e-03, 0.0000e+00, -3.2279e-01, -7.3368e-01, 0.0000e+00,
-6.7517e-01, 0.0000e+00, 0.0000e+00, -5.2251e-04, -6.1250e-01]])
w2_weight_02 = [[-0.1454, -0.2782, 0.0399, 0.2330, 0.2476, 0.4083, -0.0606, -0.0268,
-0.0610, 0.0072]])
w2_weight_grad = tensor([[0.0000, 0.0000, 0.3009, 1.0942, 0.0000, 1.0365, 0.0000, 0.0000, 0.0000,
0.8401]])
w2_weight_03 = [[-0.1436, -0.2782, 0.0347, 0.0161, 0.2476, 0.1939, -0.0606, -0.0268,
-0.0608, -0.1372]])
更新公式:
w2 = w1 - lr * v1
v1 = v0 * momentum + w1_grad
v0 = 0
'''