PyTorch是由美国互联网巨头Facebook在深度学习框架Torch的基础上使用Python重写的一个全新的深度学习框架,它更像Numpy的替代产物,不仅继承了Numpy的众多优点,还支持GPUs计算,计算效率要比Numpy有更明显的优势。此外,PyTorch还有丰富的API,可以快速完成深度神经网络模型的搭建和训练,所以深受众多开发人员和科研人员的喜爱。
以下以简单的二层网络的搭建作为PyTorch的入门例子:
1.自动梯度
# PyTorch简单的二层网络搭建
import torch
from torch.autograd import Variable
# 输入层、隐含层和输出层的数据和权重的维度
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10
# requires_grad参数表示是否需要计算梯度 x和y为训练数据的特征和标签 因此为False
x = Variable(torch.randn(batch_n, input_data), requires_grad = False)
y = Variable(torch.randn(batch_n, output_data), requires_grad = False)
# w1和w2为权重参数 requires_grad为True
w1 = Variable(torch.randn(input_data, hidden_layer), requires_grad = True)
w2 = Variable(torch.randn(hidden_layer, output_data), requires_grad = True)
# 设置学习率和训练循环次数
epoch_n = 20
learning_rate = 1e-6
# 循环20次进行训练
for epoch in range(epoch_n):
y_pred = x.mm(w1).clamp(min = 0).mm(w2) # 前向传播
loss = (y_pred-y).pow(2).sum() # 计算均方误差
print("Epoch:{}, Loss:{:.4f}".format(epoch, loss)) # 打印循环次数和误差值
# 误差回传
loss.backward()
# 更新w1和w2的权重
w1.data -= learning_rate*w1.grad.data
w2.data -= learning_rate*w2.grad.data
# 将权重置0,否则梯度值会被一直累加
w1.grad.data.zero_()
w2.grad.data.zero_()
2.自定义传播函数
自定义传播函数类Model(继承自torch.nn.Module),重写forward函数和backward函数,其中forward函数一般为矩阵的乘法运算,backward函数一般不做改变
import torch
from torch.autograd import Variable
batch_n = 64
hidden_layer = 100
input_data = 1000
output_data = 10
# 自定义传播函数 须继承torch.nn.Module类
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
def forward(self, input, w1, w2):
x = torch.mm(input, w1)
x = torch.clamp(x, min=0)
x = torch.mm(x, w2)
return x
def backward(self):
pass
model = Model()
x = Variable(torch.randn(batch_n,input_data), requires_grad = False)
y = Variable(torch.randn(batch_n,output_data), requires_grad = False)
w1 = Variable(torch.randn(input_data, hidden_layer), requires_grad = True)
w2 = Variable(torch.randn(hidden_layer, output_data), requires_grad = True)
epoch_n = 30
learning_rate = 1e-6
# 迭代训练数据
for epoch in range(epoch_n):
# 调用预测函数
y_pred = model(x, w1, w2)
# 计算误差
loss = (y_pred - y).pow(2).sum()
print("Epoch:{}, Loss:{:.4f}".format(epoch,loss))
loss.backward()
w1.data -= learning_rate*w1.grad.data
w2.data -= learning_rate*w2.grad.data
w1.grad.data.zero_()
w2.grad.data.zero_()
3.模型搭建PyTorch之torch.nn
tor.nn包中提供了很多与实现神经网络中的具体功能相关的类
其中torch.nn.Sequential类是torch.nn中的一种序列容器,通过在容器中嵌套各种实现神经网络中具体功能的类来完成神经网络的搭建。
import torch
from torch.autograd import Variable
from collections import OrderedDict
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10
x = Variable(torch.randn(batch_n, input_data), requires_grad = False)
y = Variable(torch.randn(batch_n, output_data), requires_grad = False)
# torch.cnn.Sequential类是torch.nn中的一种序列容器 模块的加入有两种
# 1直接嵌套加入
'''
models = torch.nn.Sequential(
torch.nn.Linear(input_data, hidden_layer),
torch.nn.ReLU(),
torch.nn.Linear(hidden_layer, output_data)
)
'''
# 2以orderdict字典序加入:对模块使用自定义的名称可让我们更便捷地找到模型中相应的模块进行操作
models = torch.nn.Sequential(OrderedDict([
("Line1", torch.nn.Linear(input_data, hidden_layer)),
("Relu1", torch.nn.ReLU()),
("Line2", torch.nn.Linear(hidden_layer, output_data))])
)
epoch_n = 10000
learning_rate = 1e-4
# 生成实例
loss_fn = torch.nn.MSELoss()
# 迭代训练
for epoch in range(epoch_n):
y_pred = models(x)
loss = loss_fn(y_pred, y)
if epoch%1000 == 0:
print("Epoch:{}, Loss:{:.4f}".format(epoch, loss))
models.zero_grad()
loss.backward()
for param in models.parameters():
param.data -= param.grad.data*learning_rate
4.模型优化PyTorch之torch.optim
在torch.optim包中提供了非常多的可实现参数自动化的类,比如SGD、AdaGrad、Adam等,实现了神经网络权重参数优化和更新的自动化,这里以torch.optim.Adam类作为模型参数的优化函数,在此类中输入的是被优化的参数和学习速率的初始值(默认为0.001),Adam函数可以对梯度更新使用到的学习速率进行自适应的调节,大大缩短了训练循环次数,快速实现模型较高的精度(训练集上较低的错误率)
import torch
from torch.autograd import Variable
batch_n = 100
hidden_layer = 100
input_data = 1000
output_data = 10
x = Variable(torch.randn(batch_n, input_data), requires_grad = False)
y = Variable(torch.randn(batch_n, output_data), requires_grad = False)
# torch.cnn.Sequential类是torch.nn中的一种序列容器 模块的加入有两种
# 1直接嵌套加入
models = torch.nn.Sequential(
torch.nn.Linear(input_data, hidden_layer),
torch.nn.ReLU(),
torch.nn.Linear(hidden_layer, output_data)
)
epoch_n = 20
learning_rate = 1e-4
# 生成实例损失函数
loss_fn = torch.nn.MSELoss()
optimzer = torch.optim.Adam(models.parameters(), lr = learning_rate)
# 迭代训练
for epoch in range(epoch_n):
y_pred = models(x)
loss = loss_fn(y_pred, y)
print("Epoch:{}, Loss:{:.4f}".format(epoch, loss))
models.zero_grad()
loss.backward()
optimzer.step()
5.实战手写数字识别
import matplotlib.pyplot as plt
import torch
import torchvision
from torchvision import datasets, transforms
from torch.autograd import Variable
# 对图片的处理(将图片转换为Tensor数据类型,Normalize将数据进行标准化,此处简单用数值进行带入计算了)
transform = transforms.Compose([transforms.ToTensor(),
# transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
transforms.Normalize(mean=[0.5],
std=[0.5])])
# 下载并载入训练数据
data_train = datasets.MNIST(
transform=transform,
root='./data/',
train=True,
download=True
)
# 下载并载入测试数据
data_test = datasets.MNIST(
transform=transform,
root="./data/",
train=False
)
# 数据装载 打包成大小为64张图片的包 并将其顺序打乱
data_loader_train = torch.utils.data.DataLoader(
dataset=data_train,
batch_size=64,
shuffle=True
)
data_loader_test = torch.utils.data.DataLoader(
dataset=data_test,
batch_size=64,
shuffle=True
)
# 使用iter和next来获取一个批次的图片数据和其对应的标签
images, labels = next(iter(data_loader_train))
# 使用torchvision.utils中的make_grid类方法将一个批次的照片构造成网格模式
img = torchvision.utils.make_grid(images)
img = img.numpy().transpose(1, 2, 0)
std = [0.5, 0.5, 0.5]
mean = [0.5, 0.5, 0.5]
img = img * std+mean
# print([labels[i] for i in range(64)])
# plt.imshow(img)
# plt.show()
# 实现卷积神经网络的搭建
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1 = torch.nn.Sequential(
torch.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(stride=2, kernel_size=2)
)
self.dense = torch.nn.Sequential(
torch.nn.Linear(14 * 14 * 128, 1024),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(1024, 10)
)
def forward(self, x):
x = self.conv1(x)
x = x.view(-1, 14 * 14 * 128)
x = self.dense(x)
return x
model = Model()
cost = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
# print(model)
# 模型训练和参数优化
n_epochs = 5
for epoch in range(n_epochs):
running_loss = 0.0
running_correct = 0
print("Epoch{}/{}".format(epoch, n_epochs))
print("-" * 10)
for data in data_loader_train:
x_train, y_train = data
x_train, y_train = Variable(x_train), Variable(y_train)
outputs = model(x_train)
_, pred = torch.max(outputs.data, 1)
optimizer.zero_grad()
loss = cost(outputs, y_train)
loss.backward()
optimizer.step()
running_loss += loss.data
running_correct += torch.sum(pred == y_train.data)
testing_correct = 0
for data in data_loader_test:
x_test, y_test = data
x_test, y_test = Variable(x_test), Variable(y_test)
outputs = model(x_test)
_, pred = torch.max(outputs.data, 1)
testing_correct += torch.sum(pred == y_test.data)
print("Loss is:{:.4f}, Train Accuracy is:{:.4f}%, Test Accuracy is:{:.4f}%"
.format(running_loss / len(data_train), 100 * running_correct / len(data_train),
100*testing_correct / len(data_test)))
# 对模型用测试数据进行检测
# 装载大小为4的测试数据
data_loader_test = torch.utils.data.DataLoader(
dataset=data_test,
batch_size=4,
shuffle=True
)
# 取出一组测试数据预测结果
x_test, y_test = next(iter(data_loader_test))
inputs = Variable(x_test)
pred = model(inputs)
_,pred = torch.max(pred, 1)
print("Predict Label is:", [i for i in pred.data])
print("Real Label is:", [i for i in y_test])
img = torchvision.utils.make_grid(x_test)
img = img.numpy().transpose(1, 2, 0)
std = [0.5, 0.5, 0.5]
mean = [0.5, 0.5, 0.5]
img = img*std+mean
plt.imshow(img)