网络结构在文章https://blog.csdn.net/Acmer_future_victor/article/details/103490422中有。
# __author: Y
# date: 2019/12/11
import numpy as np
import torch
import torch.nn as nn
# N是有多少个训练数据 D_in是输入维度
N , D_in, H, D_out = 64, 1000, 100, 10
# 随机创建一些训练数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
learning_rate = 1e-4
#定义一个网络模型
model = nn.Sequential(
nn.Linear(D_in, H, bias=False), # w_1*x+b_1 多了一个b_1
nn.ReLU(N, D_out, bias=False),
nn.Linear(H, D_out, bias=False)
)
# 权重初始化,为了得到一个好的优化结果
nn.init.normal_(model[0].weight)
nn.init.normal_(model[2].weight)
# model = model.cuda()
#损失函数
loss_fn = nn.MSELoss(reduction='sum')
#优化器
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for it in range(500):
# Forward pass
y_pred = model(x) # model.forward()
# compute loss
loss = loss_fn(y_pred, y) # computation graph
print(it, loss.item())
optimizer.zero_grad()
# Backward pass
# compute the gradient
loss.backward() # 这一句就做了上面所有的操作
# update weights of w1 and w2
# with torch.no_grad():
# for param in model.parameters():
# param -= learning_rate * param.grad
# model.zero_grad() # 防止param的grad累加,下一次backward之前清就可以
optimizer.step()