1. 设定参数
- n_train, n_test, num_inputs
- true_w, true_b
- feature(n_train+n_test,num_inputs)和对应的labels=torch.matmul(features,true_w)+true_b
- 从feature和labels中选择train_feature,test_feature和train_labels,test_labels
2. 初始化w和b的方法
1. 定义初始化函数:
def init_params():
w=torch.randn((num_inputs,1),requires_grad=True
b=torch.zeros(1,requires_grad=True)
2. 直接赋值
W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
3. 调用init.方法
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)
3. 定义net
1. d2l方法
net = d2l.linreg
2. nn.Sequential
# 写法一
net = nn.Sequential(
nn.Linear(num_inputs, 1)
# 此处还可以传入其他层
)
# 写法二
net = nn.Sequential()
net.add_module('linear', nn.Linear(num_inputs, 1))
# net.add_module ......
# 写法三
from collections import OrderedDict
net = nn.Sequential(OrderedDict([
('linear', nn.Linear(num_inputs, 1))
# ......
]))
print(net)
print(net[0])
#输出
Sequential(
(linear): Linear(in_features=2, out_features=1, bias=True)
)
Linear(in_features=2, out_features=1, bias=True)
可以通过net.parameters()来查看模型所有的可学习参数,此函数将返回一个生成器。
for param in net.parameters():
print(param)
#输出:
Parameter containing:
tensor([[-0.0277, 0.2771]], requires_grad=True)
Parameter containing:
tensor([0.3395], requires_grad=True)
4. 准备训练:
1. 定义loss:
loss = d2l.squard_loss
loss = nn.MSELoss()
2. 定义准确度
def accuracy(y_hat, y):
return (y_hat.argmax(dim=1) == y).float().mean().item()
3. 选取优化器optimizer:
optimizer = optim.SGD(net.parameters(), lr=0.03)
为不同子网络设置不同的学习率:
optimizer =optim.SGD([
# 如果对某个参数不指定学习率,就使用最外层的默认学习率
{'params': net.subnet1.parameters()}, # lr=0.03
{'params': net.subnet2.parameters(), 'lr': 0.01}
], lr=0.03)
4. 定义dataset 和train_iter:
train_ls,test_ls = [ ] , [ ]
dataset=torch.utils.data.TensorDataset(train_features,train_labels)
train_iter=torch.utils.data.DataLoader(dataset,batch_size,shuffle=True)
5. 开始训练:
1. 先在num_epochs中循环训练
for _ in range(num_epochs):
每个epochs都将所有样本训练一次
2. 在train_features中训练
for X, y in train_iter:
3. 计算loss
l = loss(net(X, w, b), y)
l = l.sum()
4. 清空梯度
1.
if w.grad is not None:
w.grad.data.zero_()
b.grad.data.zero_()
2.
optimizer.zero_grad() # 梯度清零,等价于net.zero_grad()
3.
# 梯度清零
if optimizer is not None:
optimizer.zero_grad()
elif params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
if optimizer is None:
d2l.sgd(params, lr, batch_size)
5. 对l进行backward
l.backward()
6. 优化器进行优化
1.
d2l.sgd([w, b], lr, batch_size)
2.
optimizer.step()
7. 对 train_ls 和 test_ls 进行append
进行下一次 train_iter 循环
进行下一次epochs循环
8. 评价模型
# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进:它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n