Pytorch建模笔记
伤不起,用了好久的Tensorflow如今要切换到Pytorch,遂记录下来Pytorch的核心用法。
目前来看,和Tensorflow创建类的建模方式基本一致,不知是否存在Tensorflow中Input类似的函数式建模,持续完善。
文章目录
一、继承式建模
1. 构建数据集
1.1 构建数据集
Pytorch的数据集需要手工将ndarray或dataframe转化为一种pytorch通用的数据集形式,即tensor,这与Tensorflow基本一样。
首先需要将ndarray数据转为tensor
x_train = torch.tensor(x_train.astype(np.float32))
y_train = torch.tensor(y_train.astype(np.int64)) # 这里是大坑,之前强制转换的float32,导致损失函数构建过程中报错,后了解到标签标准格式为int64
1.2 构建Dataset
将tensor类型的x_train, y_train构建为标准的TensorDataset
train_dataset = TensorDataset(x_train, y_train)
test_dataset = TensorDataset(x_test, y_test)
1.3 构建DataLoader
将Dataset构建为DataLoader
train_loader = DataLoader(dataset = train_dataset,
batch_size = 32,
shuffle=True)
test_loader = DataLoader(dataset = test_dataset,
batch_size = 32,
shuffle=True)
至此,标准的可迭代的训练集和测试集构建完成
2. 网络搭建
Pytorch与Tensorflow相比,建模过程简化了一些,通过初步调研,发现建模方式基本上和Tensorflow中继承式建模相似。
首先,不论如何,我们都需要构建类来继承nn.Module父类
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
# 方式一,Sequential串行式建模
# self.model = nn.Sequential(
# nn.Linear(784, 200),
# nn.LeakyReLU(inplace=True),
# nn.Linear(200, 200),
# nn.LeakyReLU(inplace=True),
# nn.Linear(200, 10),
# nn.LeakyReLU(inplace=True)
# )
# 方式二,多输入多输出自定义建模
self.fc1 = nn.Linear(784,200)
self.relu1 = nn.LeakyReLU(inplace=True)
self.fc2 = nn.Linear(200,200)
self.relu2 = nn.LeakyReLU(inplace=True)
self.fc3 = nn.Linear(200, 10)
self.out = nn.LeakyReLU(inplace=True)
def forward(self, x):
#x = self.model(x)
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
x = self.out(x)
return x
如上所示,一个神经网络的模型搭建工作完成,可以看出,除了要继承父类nn.Module以外,必须要定义forward(self, x)前向传播的函数,实际喂入模型的数据也是在forward中定义的网络中进行传播
3. 损失函数及优化器
常用的损失函数及优化器在 torch.optim 中已有定义,因此需加载该库
import torch.optim as optim
import torch.optim as optim
batch_size = 200
learning_rate = 0.001
epochs = 20
device = torch.device('cuda:0')
net = MLP().to(device)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
criteon = nn.CrossEntropyLoss().to(device)
4. 迭代训练及验证
# 迭代训练
for epoch in range(epochs):
for batch_idx, (data, target) in enumerate(train_loader):
#data = data.view(-1, 28*28)
data, target = data.to(device), target.to(device)
logits = net(data)
loss = criteon(logits, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx*len(data), len(train_loader.dataset),
100.*batch_idx/len(train_loader), loss.item()
))
test_loss = 0
correct = 0
for data, target in test_loader:
#data = data.view(-1, 28*28)
data, target = data.to(device), target.to(device)
logits = net(data)
test_loss += criteon(logits, target).item()
pred = logits.data.max(1)[1]
correct += pred.eq(target.data).sum()
test_loss /= len(test_loader.dataset)
print('\nTest set : Averge loss: {:.4f}, Accurancy: {}/{}({:.3f}%)'.format(
test_loss, correct, len(test_loader.dataset),
100.*correct/len(test_loader.dataset)
))
完整代码,基于MNIST数据集
import torch
import torch.utils.data
from torch import optim, nn
from torchvision import datasets
from torchvision.transforms import transforms
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import pandas as pd
import numpy as np
# 读取源文件
df_train = pd.read_csv(r'./data/mnist_train.csv', header = None)
df_test = pd.read_csv(r'./data/mnist_test.csv', header = None)
df_train.columns = ['col{}'.format(i) for i in range(df_train.shape[1])]
df_test.columns = ['col{}'.format(i) for i in range(df_test.shape[1])]
label = 'col0'
feature_columns = ['col{}'.format(i) for i in range(1,785)]
x_train = df_train[feature_columns]
y_train = df_train[label]
x_test = df_test[feature_columns]
y_test = df_test[label]
print('x_train.shape:', x_train.shape)
print('y_train.shape:', y_train.shape)
print('x_test.shape:', x_test.shape)
print('y_test.shape:', y_test.shape)
# 构建数据集
x_train = torch.tensor(x_train.values.astype(np.float32))
y_train = torch.tensor(y_train.values.astype(np.int64))
x_test = torch.tensor(x_test.values.astype(np.float32))
y_test = torch.tensor(y_test.values.astype(np.int64))
# 构建Dataset,DataLoader
train_dataset = TensorDataset(x_train, y_train)
test_dataset = TensorDataset(x_test, y_test)
train_loader = DataLoader(dataset = train_dataset,
batch_size = 32,
shuffle=True)
test_loader = DataLoader(dataset = test_dataset,
batch_size = 32,
shuffle=True)
batch_size = 200
learning_rate = 0.001
epochs = 20
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
# self.model = nn.Sequential(
# nn.Linear(784, 200),
# nn.LeakyReLU(inplace=True),
# nn.Linear(200, 200),
# nn.LeakyReLU(inplace=True),
# nn.Linear(200, 10),
# nn.LeakyReLU(inplace=True)
# )
self.fc1 = nn.Linear(784,200)
self.relu1 = nn.LeakyReLU(inplace=True)
self.fc2 = nn.Linear(200,200)
self.relu2 = nn.LeakyReLU(inplace=True)
self.fc3 = nn.Linear(200, 10)
self.out = nn.LeakyReLU(inplace=True)
def forward(self, x):
#x = self.model(x)
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
x = self.out(x)
return x
device = torch.device('cuda:0')
net = MLP().to(device)
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
criteon = nn.CrossEntropyLoss().to(device)
for epoch in range(epochs):
for batch_idx, (data, target) in enumerate(train_loader):
#data = data.view(-1, 28*28)
data, target = data.to(device), target.to(device)
logits = net(data)
loss = criteon(logits, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx*len(data), len(train_loader.dataset),
100.*batch_idx/len(train_loader), loss.item()
))
test_loss = 0
correct = 0
for data, target in test_loader:
#data = data.view(-1, 28*28)
data, target = data.to(device), target.to(device)
logits = net(data)
test_loss += criteon(logits, target).item()
pred = logits.data.max(1)[1]
correct += pred.eq(target.data).sum()
test_loss /= len(test_loader.dataset)
print('\nTest set : Averge loss: {:.4f}, Accurancy: {}/{}({:.3f}%)'.format(
test_loss, correct, len(test_loader.dataset),
100.*correct/len(test_loader.dataset)
))
API对比
Pytorch | Tensorflow |
---|---|
torch.view() | tf.reshape() |
torch.unsqueeze() | tf.expand_dim() |
nn.Linear() | layers.Dense() |
nn.cat() | layers.concatenate() |
model.eval() : 告诉我们的网络,这个阶段是用来测试的,于是模型的参数在该阶段不进行更新
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__() # 继承父类nn.Module的构造函数
self.conv1 = nn.Conv2d(1, 6 ,5) # channel:1 filter:6 size:5x5
self.conv2 = nn.Conv2d(6, 16, 5) # channel:6, filter:16 size:5x5
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
# forward函数是必须的!
def forward(self, x):
# Max pooling over a (2, 2) window
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
# If the size is a square you can only specify a single number
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
# Flattern features
x = x.view(-1, 16*5*5) # 等同于reshape
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x