lr =0.03
num_epochs=5
net = linreg
loss = squared_loss
for epoch inrange(num_epochs):for x,y in data_iter(batch_size,features,labels):
l = loss(net(x,w,b),y)#X和y的小批量损失# 因为l形状是(batch_size,1),而不是一个标量。l中的所有元素被加到一起,# 并以此计算关于[w,b]的梯度
l.sum().backward()#反向传播
sgd([w,b],lr,batch_size)# 使用参数的梯度更新参数with torch.no_grad():
train_l = loss(net(features, w, b), labels)print(f'epoch {epoch +1}, loss {float(train_l.mean()):f}')print(f'w的估计误差: {true_w - w.reshape(true_w.shape)}')print(f'b的估计误差: {true_b - b}')
epoch 1, loss 0.039145
epoch 2, loss 0.000148
epoch 3, loss 0.000052
epoch 4, loss 0.000052
epoch 5, loss 0.000052
w的估计误差: tensor([ 0.0002, -0.0001], grad_fn=<SubBackward0>)
b的估计误差: tensor([-0.0003], grad_fn=<RsubBackward1>)
2.尝试使用不同的学习率,观察损失函数值下降的快慢。
w = torch.normal(0,0.01, size=(2,1), requires_grad=True)#requires_grad 自动求梯度
b = torch.zeros(1, requires_grad=True)
lr =0.1
train(net,loss,sgd,lr,batch_size,num_epochs)
epoch 1, loss 0.000052
epoch 2, loss 0.000053
epoch 3, loss 0.000053
epoch 4, loss 0.000052
epoch 5, loss 0.000052
w = torch.normal(0,0.01, size=(2,1), requires_grad=True)#requires_grad 自动求梯度
b = torch.zeros(1, requires_grad=True)
lr =0.001
train(net,loss,sgd,lr,batch_size,num_epochs)
epoch 1, loss 13.501316
epoch 2, loss 11.077716
epoch 3, loss 9.089215
epoch 4, loss 7.457697
epoch 5, loss 6.119079