线性回归的简洁实现
# -*- coding: utf-8 -*-
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l
from torch import nn
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = d2l.synthetic_data(true_w, true_b, 1000)
#读取数据集
def load_array(data_arrays, batch_size, is_train = True):
dataset = data.TensorDataset(*data_arrays)
return data.DataLoader(dataset, batch_size, shuffle=is_train)
batch_size = 10
data_iter = load_array((features, labels), batch_size)
#定义模型
net = nn.Sequential(nn.Linear(2,1))
#初始化模型参数
net[0].weight.data.normal_(0,0.01)
net[0].bias.data.fill_(0)
#定义损失函数
loss = nn.MSELoss()
#定义优化方法
trainer = torch.optim.SGD(net.parameters(), lr=0.03)
#训练
num_epochs = 3
for epoch in range(num_epochs):
for X,y in data_iter:
l = loss(net(X), y)
trainer.zero_grad()
l.backward()
trainer.step()
l = loss(net(features), labels)
print(f'epoch{epoch+1}, loss{l:f}')
#参数的估计值
print("w的估计值:",net[0].weight.data)
print("b的估计值:",net[0].bias.data)
Out:
线性回归的具体实现
# -*- coding: utf-8 -*-
import random
import torch
from d2l import torch as d2l
#生成数据集, y = Xw+b+*
def synthetic_data(w, b, num_examples):
X = torch.normal(0, 1, (num_examples, len(w)))
y = torch.mv(X,w) + b
y += torch.normal(0, 0.01, y.shape)
return X, y.reshape((-1,1))
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w,true_b, 1000)
d2l.set_figsize()
d2l.plt.scatter(features[:,1].detach().numpy(),labels.detach().numpy(),s=1)
#读取数据集
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
#随机读取样本
random.shuffle(indices)
for i in range(0, num_examples, batch_size):
batch_indices = torch.tensor(
indices[i: min(i+batch_size, num_examples)])
yield features[batch_indices], labels[batch_indices]
print(batch_indices)
batch_size = 10
for X,y in data_iter(batch_size, features, labels):
print(X,"\n",y)
break
#初始化模型参数
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
#定义模型
def linreg(X, w, b):
return torch.matmul(X, w) + b
#定义损失函数
def squared_loss(y_hat, y):
return (y_hat - y.reshape(y_hat.shape)) **2 /2
#定义优化算法,计算的损失是一个批量样本的总和,用batch_size归一化
def sgd(params, lr, batch_size):
"""Minibatch stochastic gradient descent."""
with torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
#训练
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss
for epoch in range(num_epochs):
for X, y in data_iter(batch_size, features, labels):
l = loss(net(X,w, b), y)
l.sum().backward()
sgd([w,b], lr, batch_size)
with torch.no_grad():
train_l = loss(net(features, w, b), labels)
print(f'epoch{epoch+1},loss{float(train_l.mean()):f}')
print("w的估计值:",w)
print("b的估计值:",b)
Out:
不同优化算法的比较
# -*- coding: utf-8 -*-
import torch
import torch.nn.functional as F
import torch.utils.data as Data
from torch.autograd import Variable
import matplotlib.pyplot as plt
# hyper parameters
LR = 0.01
BATCH_SIZE = 32
EPOCH = 12
#un增加一个维度
x = torch.unsqueeze(torch.linspace(-1,1,1000),dim=1)
y =x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))
torch_dataset = Data.TensorDataset(x,y)
loader = Data.DataLoader(dataset = torch_dataset,
batch_size =BATCH_SIZE,
shuffle = True)
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(1,20)
self.predict = torch.nn.Linear(20,1)
def forward(self,x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x
net_SGD = Net()
net_Momentum = Net()
net_RMSprop = Net()
net_Adam = Net()
nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]
opt_SGD = torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum = torch.optim.SGD(net_Momentum.parameters(), lr=LR,momentum=0.8)
opt_RMSprop = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR,alpha=0.9)
opt_Adam = torch.optim.Adam(net_Adam.parameters(), lr=LR,betas = (0.9,0.99))
optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]
loss_func = torch.nn.MSELoss()
#record loss
losses_his = [[],[],[],[]]
for epoch in range(EPOCH):
print(epoch)
for step,(batch_x,batch_y) in enumerate(loader):
b_x = Variable(batch_x)
b_y = Variable(batch_y)
for net,opt,l_his in zip(nets, optimizers, losses_his):
output = net(b_x)
loss = loss_func(output, b_y)
opt.zero_grad()
loss.backward()
opt.step()
l_his.append(loss.data)
labels = ["SGD","Momentum","RMSprop","Adam"]
for i, l_his in enumerate(losses_his):
plt.plot(l_his, label = labels[i])
plt.legend(loc="best")
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.ylim((0,0.2))
plt.show()
Out:
代码总结
数据集
官方文档TORCH.UTILS.DATA类
-
torch.utils.data.TensorDataset()
Dataset wrapping tensors. -
torch.utils.data.DataLoader()
常用的参数:
dataset 要加载的数据集
batch_size 每个batch加载多少样本
shuffle 要不要打乱数据
模型定义
torch.nn模块中已经有很多与预定义好的层,我们可以直接拿来用
优化
opt_SGD = torch.optim.SGD(net.parameters(), lr=LR)
根据上面对四种优化方法的比较可得,Adam优化方法的效果不错。
迭代训练
一次训练的来回可以看成是epoch,而batch相当于是数据分割,对于每个batch,都会调用net生成预测并计算损失(正向传播),通过反向传播计算梯度,调用优化器更新参数。
for epoch in range(epochs):
....
zero_grad()#梯度是累积计算,而不是被替换,需要对每个batch调用一遍,将参数梯度置为0
backward()
step()
PyTorch基础系列的学习资料为:
- 莫烦大神的PyTorch视频教程
- 《Dive Into Deep Learning》
- PyTorch官方文档