3.1 线性回归
import torch
from time import time
a = torch.ones(1000)
b = torch.ones(1000)
c = torch.zeros(1000)
start = time()
for i in range(1000):
c[i] = a[i] + b[i]
print(time()-start)
start = time()
d = a + b
print(time()-start)
0.016954660415649414
0.0
3.2 线性回归的从零开始实现
#使画出的图内嵌在note中,不会弹出额外的图框
%matplotlib inline
from IPython import display #用于显示图片
from matplotlib import pyplot as plt
import numpy as np
import random
3.2.1 生成数据集
num_inputs = 2
num_examples = 1000
true_w = [2,-3.4]
true_b = 4.2
features = torch.from_numpy(np.random.normal(0, 1, (num_examples, num_inputs)))
labels = true_w[0] * features[: , 0] + true_w[1] * features[: , 1] + true_b
labels += torch.from_numpy(np.random.normal(0, 0.01, labels.size()))
print(features[0], labels[0])
tensor([1.4474, 0.8296], dtype=torch.float64) tensor(4.2919, dtype=torch.float64)
#生成features和labels散点图
#SVG是一种图像文件格式,它的英文全称为Scalable Vector Graphics,意思为可缩放的矢量图形,是一种开放标准的矢量图形语言。用户可以直接用代码来描绘图像,并可以随时插入到HTML中通过浏览器来观看。
def use_svg_display():
display.set_matplotlib_formats('svg')
def set_figsize(figsize=(3.5,2.5)):
use_svg_display()
#设置图的尺寸
plt.rcParams['figure.figsize'] =figsize #整个图像显示大小为(3.5,2.5)
set_figsize()
plt.scatter(features[:, 0].numpy(), labels.numpy(), 1, color = 'b')
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1, color = "r")
<matplotlib.collections.PathCollection at 0x21e598a6508>
3.2.2 读取数据
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices)#shuffle() 方法将序列的所有元素随机排序,使样本的读取顺序是随机的
for i in range(0, num_examples, batch_size):
j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)])#最后一次可能不足一个mini-batch
yield features.index_select(0, j), labels.index_select(0, j)
batch_size = 10
for x, y in data_iter(batch_size, features, labels):
print(x, y)
break
tensor([[ 0.1813, -0.0854],
[ 0.1292, -1.0831],
[ 1.6538, 1.0937],
[-0.3127, -1.5689],
[ 0.3194, -0.1455],
[-0.3495, 0.4521],
[ 1.3258, -0.4229],
[ 0.2867, -1.1319],
[ 0.7810, -0.5923],
[-0.4121, 0.1861]], dtype=torch.float64) tensor([4.8550, 8.1427, 3.7869, 8.9058, 5.3368, 1.9858, 8.2940, 8.6263, 7.7772,
2.7421], dtype=torch.float64)
3.2.3 初始化模型参数
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)))
w = w.double() #不加的话在3.2.7中会报错
b = torch.zeros(1, dtype = torch.float32)
w.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)
tensor([0.], requires_grad=True)
3.2.4 定义模型
def linreg(X, w, b):
return torch.mm(X, w) + b
3.2.5 定义损失函数
def squared_loss(y_hat, y):
return (y_hat - y.view(y_hat.size())) ** 2 / 2
3.2.6 定义优化算法
def sgd(params, lr, batch_size):
for param in params: #tensor.data是独立于计算图之外的,即它的requires_grad = False
param.data -= lr * param.grad / batch_size #如果我们想要修改tensor的数值,但是又不希望被autograd记录(即不会影响反向传播),那么可以对tensor.data进行操作。
3.2.7 训练模型
lr = 0.03
num_epoches = 3
net = linreg
loss = squared_loss
for epoch in range(num_epoches):
for X, y in data_iter(batch_size, features, labels):
l = loss(net(X, w, b), y).sum()
l.backward()
sgd([w, b], lr, batch_size)
#梯度清零
w.grad.data.zero_()
b.grad.data.zero_()
train_l = loss(net(features, w, b), labels)
print('epoch %d, loss %f' % (epoch+1, train_l.mean().item()))
epoch 1, loss 0.046463
epoch 2, loss 0.000186
epoch 3, loss 0.000051
print(true_w, '\n', w)
print(true_b, '\n', b)
[2, -3.4]
tensor([[ 1.9996],
[-3.3992]], dtype=torch.float64, requires_grad=True)
4.2
tensor([4.1995], requires_grad=True)
3.3 线性回归的简洁实现
3.3.1 生成数据集
与3.2.1节内容相同
3.3.2 读取数据
import torch.utils.data as Data
torch.set_default_tensor_type(torch.DoubleTensor)
batch_size = 10
dataset = Data.TensorDataset(features, labels)
data_iter = Data.DataLoader(dataset, batch_size, shuffle=True)
for X, y in data_iter:
print(X, y)
break
tensor([[-1.8507, -0.3694],
[-1.7290, -0.1033],
[ 1.6665, -0.3100],
[ 0.4747, 0.5740],
[-0.1191, 0.8525],
[ 0.5377, 2.2123],
[-0.2951, -0.7979],
[ 3.8129, 1.1159],
[-1.0820, 0.0823],
[-0.4296, 1.0366]]) tensor([ 1.7562, 1.1124, 8.5759, 3.2043, 1.0542, -2.2329, 6.3421, 8.0257,
1.7580, -0.1773])
3.3.3 定义模型
'''
from torch.nn import*
class LinearNet(torch.nn.Module):
def __init__(self, n_features):
super(LinearNet, self).__init__()
self.linear = torch.nn.Linear(n_features, 1)
def forward(self, x):
y = self.linear(x)
return y
net = LinearNet(num_inputs)
print(net)
'''
'\nfrom torch.nn import*\nclass LinearNet(torch.nn.Module):\n def __init__(self, n_features):\n super(LinearNet, self).__init__()\n self.linear = torch.nn.Linear(n_features, 1)\n \n def forward(self, x):\n y = self.linear(x)\n return y\nnet = LinearNet(num_inputs)\nprint(net)\n'
#使用sequential搭建网络,sequential是一个有序的容器,网络层将按照在传入sequential的顺序依次被添加到计算图中
'''
net = torch.nn.Sequential(
torch.nn.Linear(num_inputs, 1)
)
'''
net = torch.nn.Sequential()
net.add_module('linear', torch.nn.Linear(num_inputs, 1))
print(net[0])
for param in net.parameters():
print(param)
Linear(in_features=2, out_features=1, bias=True)
Parameter containing:
tensor([[ 0.5318, -0.4140]], requires_grad=True)
Parameter containing:
tensor([-0.3767], requires_grad=True)
3.3.4 初始化模型参数
from torch.nn import init
init.normal_(net[0].weight, mean = 0, std = 0.01)
init.constant_(net[0].bias, val = 0)
Parameter containing:
tensor([0.], requires_grad=True)
3.3.5 定义损失函数
loss = torch.nn.MSELoss()
3.3.6 定义优化算法
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr = 0.03)
print(optimizer)
SGD (
Parameter Group 0
dampening: 0
lr: 0.03
momentum: 0
nesterov: False
weight_decay: 0
)
3.3.7 训练模型
num_epochs = 3
for epoch in range(1, num_epochs+1):
for X, y in data_iter:
output = net(X)
l = loss(output, y.view(-1,1))
optimizer.zero_grad()
l.backward()
optimizer.step()
print('epoch %d, loss: %f' % (epoch, l.item()))
epoch 1, loss: 0.000319
epoch 2, loss: 0.000156
epoch 3, loss: 0.000059
dense = net[0]
print(true_w, dense.weight)
print(true_b, dense.bias)
[2, -3.4] Parameter containing:
tensor([[ 1.9998, -3.3996]], requires_grad=True)
4.2 Parameter containing:
tensor([4.2000], requires_grad=True)
总结时刻:
利用pyTorch实现线性回归训练的步骤:
- 手动生成数据集
- 通过torch.utils.data模块中TensorDataset()、DataLoader()函数读取数据集
- 通过 torch.nn.Sequential()定义神经网络
- 通过 init.normal_()、init.constant()初始化模型参数
- torch.nn模块定义损失函数
- torch.optim模块定义优化算法,比如SGD、Adam、RMSProp
- 训练模型(optimizer.step()通常用在每个mini-batch之中,只有用了optimizer.step(),模型才会更新)
一般,使用optimizer的流程就是三行代码:
- optimizer.zero_grad() #梯度清零
- loss.backward() #反向梯度传播
- optimizer.step()
#更新模型
最小化交叉熵损失函数等价于最大化训练数据集所有标签类别的联合概率。
欢迎关注【OAOA】