PyTorch学习笔记5

PyTorch学习笔记5

整理笔记视频来源
CNN-Image-Classification

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
print("PyTorch Version: ",torch.__version__)
'''PyTorch Version:  1.0.0'''

首先我们定义一个基于ConvNet的简单神经网络

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

NLL loss的定义

ℓ(𝑥,𝑦)=𝐿={𝑙1,…,𝑙𝑁}⊤,𝑙𝑛=−𝑤𝑦𝑛𝑥𝑛,𝑦𝑛,𝑤𝑐=weight[𝑐]⋅𝟙{𝑐≠ignore_index}

def train(model, device, train_loader, optimizer, epoch, log_interval=100):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}".format(
                epoch, batch_idx * len(data), len(train_loader.dataset), 
                100. * batch_idx / len(train_loader), loss.item()
            ))
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
torch.manual_seed(53113)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = test_batch_size = 32
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./mnist_data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./mnist_data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, shuffle=True, **kwargs)


lr = 0.01
momentum = 0.5
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

epochs = 2
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

save_model = True
if (save_model):
    torch.save(model.state_dict(),"mnist_cnn.pt")
 '''Train Epoch: 1 [0/60000 (0.000000%)]	Loss: 2.297938
Train Epoch: 1 [3200/60000 (5.333333%)]	Loss: 0.567845
Train Epoch: 1 [6400/60000 (10.666667%)]	Loss: 0.206370
Train Epoch: 1 [9600/60000 (16.000000%)]	Loss: 0.094653
Train Epoch: 1 [12800/60000 (21.333333%)]	Loss: 0.180530
Train Epoch: 1 [16000/60000 (26.666667%)]	Loss: 0.041645
Train Epoch: 1 [19200/60000 (32.000000%)]	Loss: 0.135092
Train Epoch: 1 [22400/60000 (37.333333%)]	Loss: 0.054001
Train Epoch: 1 [25600/60000 (42.666667%)]	Loss: 0.111863
Train Epoch: 1 [28800/60000 (48.000000%)]	Loss: 0.059039
Train Epoch: 1 [32000/60000 (53.333333%)]	Loss: 0.089227
Train Epoch: 1 [35200/60000 (58.666667%)]	Loss: 0.186015
Train Epoch: 1 [38400/60000 (64.000000%)]	Loss: 0.093208
Train Epoch: 1 [41600/60000 (69.333333%)]	Loss: 0.077090
Train Epoch: 1 [44800/60000 (74.666667%)]	Loss: 0.038075
Train Epoch: 1 [48000/60000 (80.000000%)]	Loss: 0.036247
Train Epoch: 1 [51200/60000 (85.333333%)]	Loss: 0.052358
Train Epoch: 1 [54400/60000 (90.666667%)]	Loss: 0.013201
Train Epoch: 1 [57600/60000 (96.000000%)]	Loss: 0.036660

Test set: Average loss: 0.0644, Accuracy: 9802/10000 (98%)

Train Epoch: 2 [0/60000 (0.000000%)]	Loss: 0.054402
Train Epoch: 2 [3200/60000 (5.333333%)]	Loss: 0.032239
Train Epoch: 2 [6400/60000 (10.666667%)]	Loss: 0.092350
Train Epoch: 2 [9600/60000 (16.000000%)]	Loss: 0.058544
Train Epoch: 2 [12800/60000 (21.333333%)]	Loss: 0.029762
Train Epoch: 2 [16000/60000 (26.666667%)]	Loss: 0.012521
Train Epoch: 2 [19200/60000 (32.000000%)]	Loss: 0.101891
Train Epoch: 2 [22400/60000 (37.333333%)]	Loss: 0.127773
Train Epoch: 2 [25600/60000 (42.666667%)]	Loss: 0.009259
Train Epoch: 2 [28800/60000 (48.000000%)]	Loss: 0.013482
Train Epoch: 2 [32000/60000 (53.333333%)]	Loss: 0.039676
Train Epoch: 2 [35200/60000 (58.666667%)]	Loss: 0.016707
Train Epoch: 2 [38400/60000 (64.000000%)]	Loss: 0.168691
Train Epoch: 2 [41600/60000 (69.333333%)]	Loss: 0.056318
Train Epoch: 2 [44800/60000 (74.666667%)]	Loss: 0.008174
Train Epoch: 2 [48000/60000 (80.000000%)]	Loss: 0.075149
Train Epoch: 2 [51200/60000 (85.333333%)]	Loss: 0.205798
Train Epoch: 2 [54400/60000 (90.666667%)]	Loss: 0.019762
Train Epoch: 2 [57600/60000 (96.000000%)]	Loss: 0.012056

Test set: Average loss: 0.0464, Accuracy: 9850/10000 (98%)'''
torch.manual_seed(53113)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = test_batch_size = 32
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST('./fashion_mnist_data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST('./fashion_mnist_data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, shuffle=True, **kwargs)


lr = 0.01
momentum = 0.5
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

epochs = 2
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

save_model = True
if (save_model):
    torch.save(model.state_dict(),"fashion_mnist_cnn.pt")

CNN模型的迁移学习

  1. 很多时候当我们需要训练一个新的图像分类任务,我们不会完全从一个随机的模型开始训练,而是利用_预训练_的模型来加速训练的过程。我们经常使用在ImageNet上的预训练模型。

  2. 这是一种transfer learning的方法。我们常用以下两种方法做迁移学习。
    2.1 fine tuning: 从一个预训练模型开始,我们改变一些模型的架构,然后继续训练整个模型的参数。
    2.2 feature extraction: 我们不再改变与训练模型的参数,而是只更新我们改变过的部分模型参数。我们之所以叫它feature extraction是因为我们把预训练的CNN模型当做一个特征提取模型,利用提取出来的特征做来完成我们的训练任务。
    以下是构建和训练迁移学习模型的基本步骤:

  3. 初始化预训练模型

  4. 把最后一层的输出层改变成我们想要分的类别总数

  5. 定义一个optimizer来更新参数

  6. 模型训练

import numpy as np
import torchvision
from torchvision import datasets, transforms, models

import matplotlib.pyplot as plt
import time
import os
import copy
print("Torchvision Version: ",torchvision.__version__)
'''Torchvision Version:  0.2.0'''

数据
我们会使用hymenoptera_data数据集。

这个数据集包括两类图片, bees 和 ants, 这些数据都被处理成了可以使用ImageFolder https://pytorch.org/docs/stable/torchvision/datasets.html#torchvision.datasets.ImageFolder来读取的格式。我们只需要把data_dir设置成数据的根目录,然后把model_name设置成我们想要使用的与训练模型: :: [resnet, alexnet, vgg, squeezenet, densenet, inception]

其他的参数有:

  1. num_classes表示数据集分类的类别数
  2. batch_size
  3. num_epochs
  4. feature_extract表示我们训练的时候使用fine tuning还是feature extraction方法。如果feature_extract = False,整个模型都会被同时更新。如果feature_extract = True,只有模型的最后一层被更新。
# Top level data directory. Here we assume the format of the directory conforms 
#   to the ImageFolder structure
data_dir = "./hymenoptera_data"
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "resnet"
# Number of classes in the dataset
num_classes = 2
# Batch size for training (change depending on how much memory you have)
batch_size = 32
# Number of epochs to train for 
num_epochs = 15
# Flag for feature extracting. When False, we finetune the whole model, 
#   when True we only update the reshaped layer params
feature_extract = True
def train_model(model, dataloaders, criterion, optimizer, num_epochs=5):
    since = time.time()
    val_acc_history = []
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.
    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch, num_epochs-1))
        print("-"*10)
        
        for phase in ["train", "val"]:
            running_loss = 0.
            running_corrects = 0.
            if phase == "train":
                model.train()
            else: 
                model.eval()
            
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                with torch.autograd.set_grad_enabled(phase=="train"):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    
                _, preds = torch.max(outputs, 1)
                if phase == "train":
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds.view(-1) == labels.view(-1)).item()
            
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)
       
            print("{} Loss: {} Acc: {}".format(phase, epoch_loss, epoch_acc))
            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == "val":
                val_acc_history.append(epoch_acc)
            
        print()
    
    time_elapsed = time.time() - since
    print("Training compete in {}m {}s".format(time_elapsed // 60, time_elapsed % 60))
    print("Best val Acc: {}".format(best_acc))
    
    model.load_state_dict(best_model_wts)
    return model, val_acc_history
# it = iter(dataloaders_dict["train"])
# inputs, labels = next(it)
# for inputs, labels in dataloaders_dict["train"]:
#     print(labels.size())
len(dataloaders_dict["train"].dataset.imgs)
#244
len(dataloaders_dict["train"].dataset)
#244
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    if model_name == "resnet":
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224
        
    return model_ft, input_size
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
print(model_ft)

读入数据
现在我们知道了模型输入的size,我们就可以把数据预处理成相应的格式。

data_transforms = {
    "train": transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    "val": transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Initializing Datasets and Dataloaders…

# Send the model to GPU
model_ft = model_ft.to(device)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are 
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
'''Params to learn:
	 fc.weight
	 fc.bias'''
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# Train and evaluate
model_ft, ohist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs)
'''得到结果train Loss:  Acc:
		   val Loss: Acc:'''
# Initialize the non-pretrained version of the model used for this run
scratch_model,_ = initialize_model(model_name, num_classes, feature_extract=False, use_pretrained=False)
scratch_model = scratch_model.to(device)
scratch_optimizer = optim.SGD(scratch_model.parameters(), lr=0.001, momentum=0.9)
scratch_criterion = nn.CrossEntropyLoss()
_,scratch_hist = train_model(scratch_model, dataloaders_dict, scratch_criterion, scratch_optimizer, num_epochs=num_epochs)
'''得到结果train Loss:  Acc:
		   val Loss: Acc:'''
# Plot the training curves of validation accuracy vs. number 
#  of training epochs for the transfer learning method and
#  the model trained from scratch
# ohist = []
# shist = []

# ohist = [h.cpu().numpy() for h in ohist]
# shist = [h.cpu().numpy() for h in scratch_hist]

plt.title("Validation Accuracy vs. Number of Training Epochs")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(range(1,num_epochs+1),ohist,label="Pretrained")
plt.plot(range(1,num_epochs+1),scratch_hist,label="Scratch")
plt.ylim((0,1.))
plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

在这里插入图片描述

情感分析

第一步:导入豆瓣电影数据集,只有训练集和测试集

  1. TorchText中的一个重要概念是Field。Field决定了你的数据会被怎样处理。在我们的情感分类任务中,我们所需要接触到的数据有文本字符串和两种情感,“pos"或者"neg”。
  2. Field的参数制定了数据会被怎样处理。
  3. 我们使用TEXT field来定义如何处理电影评论,使用LABEL field来处理两个情感类别。
  4. 我们的TEXT field带有tokenize=‘spacy’,这表示我们会用spaCy tokenizer来tokenize英文句子。如果我们不特别声明tokenize这个参数,那么默认的分词方法是使用空格。
  5. 安装spaCy
    pip install -U spacy
    python -m spacy download en
  6. LABEL由LabelField定义。这是一种特别的用来处理label的Field。我们后面会解释dtype。
  7. 更多关于Fields,参见https://github.com/pytorch/text/blob/master/torchtext/data/field.py
  8. 和之前一样,我们会设定random seeds使实验可以复现。
  9. TorchText支持很多常见的自然语言处理数据集。
  10. 下面的代码会自动下载IMDb数据集,然后分成train/test两个torchtext.datasets类别。数据被前面的Fields处理。IMDb数据集一共有50000电影评论,每个评论都被标注为正面的或负面的。
import torch
from torchtext import data

SEED = 1234

torch.manual_seed(SEED) #为CPU设置随机种子
torch.cuda.manual_seed(SEED)#为GPU设置随机种子
torch.backends.cudnn.deterministic = True  #在程序刚开始加这条语句可以提升一点训练速度,没什么额外开销。

TEXT = data.Field(tokenize='spacy')#torchtext.data.Field : 用来定义字段的处理方法(文本字段,标签字段)
LABEL = data.LabelField(dtype=torch.float)
#首先,我们要创建两个Field 对象:这两个对象包含了我们打算如何预处理文本数据的信息。
#spaCy:英语分词器,类似于NLTK库,如果没有传递tokenize参数,则默认只是在空格上拆分字符串。
#LabelField是Field类的一个特殊子集,专门用于处理标签。 

from torchtext import datasets
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
print(vars(train_data.examples[0])) #可以查看数据集长啥样子

第二步:训练集划分为训练集和验证集

  1. 由于我们现在只有train/test这两个分类,所以我们需要创建一个新的validation set。我们可以使用.split()创建新的分类。
  2. 默认的数据分割是 70、30,如果我们声明split_ratio,可以改变split之间的比例,split_ratio=0.8表示80%的数据是训练集,20%是验证集。
  3. 我们还声明random_state这个参数,确保我们每次分割的数据集都是一样的。
import random
train_data, valid_data = train_data.split(random_state=random.seed(SEED)) #默认split_ratio=0.7
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')

第三步:用训练集建立vocabulary,就是把每个单词一一映射到一个数字。

  1. 下一步我们需要创建 vocabulary 。vocabulary 就是把每个单词一一映射到一个数字。
  2. 我们使用最常见的25k个单词来构建我们的单词表,用max_size这个参数可以做到这一点。
  3. 所有其他的单词都用来表示。
# TEXT.build_vocab(train_data, max_size=25000)
# LABEL.build_vocab(train_data)
TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.100d", unk_init=torch.Tensor.normal_)
#从预训练的词向量(vectors) 中,将当前(corpus语料库)词汇表的词向量抽取出来,构成当前 corpus 的 Vocab(词汇表)。
#预训练的 vectors 来自glove模型,每个单词有100维。glove模型训练的词向量参数来自很大的语料库,
#而我们的电影评论的语料库小一点,所以词向量需要更新,glove的词向量适合用做初始化参数。
LABEL.build_vocab(train_data)
print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")
print(TEXT.vocab.freqs.most_common(20))
print(TEXT.vocab.stoi) #语料库单词频率越高,索引越靠前。前两个默认为unk和pad。
print(TEXT.vocab.itos[:10]) #查看TEXT单词表

第四步:创建iterators,每个itartion都会返回一个batch的样本。

  1. 最后一步数据的准备是创建iterators。每个itartion都会返回一个batch的examples。
  2. 我们会使用BucketIterator。BucketIterator会把长度差不多的句子放到同一个batch中,确保每个batch中不出现太多的padding。
  3. 严格来说,我们这份notebook中的模型代码都有一个问题,也就是我们把也当做了模型的输入进行训练。更好的做法是在模型中把由产生的输出给消除掉。在这节课中我们简单处理,直接把也用作模型输入了。由于数量不多,模型的效果也不差。
  4. 如果我们有GPU,还可以指定每个iteration返回的tensor都在GPU上。
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#相当于把样本划分batch,只是多做了一步,把相等长度的单词尽可能的划分到一个batch,不够长的就用padding。
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size=BATCH_SIZE,
    device=device)
next(iter(train_iterator)).label 
next(iter(train_iterator)).text 
next(iter(train_iterator))#多运行一次可以发现一条评论的单词长度会变
next(iter(train_iterator)).text 

第五步:创建Word Averaging模型
Word Averaging模型

  1. 我们首先介绍一个简单的Word Averaging模型。这个模型非常简单,我们把每个单词都通过Embedding层投射成word embedding vector,然后把一句话中的所有word vector做个平均,就是整个句子的vector表示了。接下来把这个sentence vector传入一个Linear层,做分类即可。

  2. 我们使用avg_pool2d来做average pooling。我们的目标是把sentence length那个维度平均成1,然后保留embedding这个维度。

  3. avg_pool2d的kernel size是 (embedded.shape[1], 1),所以句子长度的那个维度会被压扁。

import torch.nn as nn
import torch.nn.functional as F

class WordAVGModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_dim, pad_idx):
        #初始化参数,
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
        #vocab_size=词汇表长度,embedding_dim=每个单词的维度
        #padding_idx:如果提供的话,输出遇到此下标时用零填充。这里如果遇到padding的单词就用0填充。
        
        self.fc = nn.Linear(embedding_dim, output_dim)
        #output_dim输出的维度,一个数就可以了,=1
        
    def forward(self, text):
        embedded = self.embedding(text) 
        #text下面会指定,为一个batch的数据
        # embedded = [sent len, batch size, emb dim] 
        # sent len:一条评论的单词数 
        # batch size:一个batch有多少条评论
        # emb dim:一个单词的维度
        # 假设[sent len, batch size, emb dim]=(1000,64,100)
        #这个代码我猜测进行了运算:(text:1000,64,25000)*(self.embedding:1000,25000,100)= (1000,64,100)
        
        embedded = embedded.permute(1, 0, 2) 
        # [batch size, sent len, emb dim]更换顺序
        
        pooled = F.avg_pool2d(embedded, (embedded.shape[1], 1)).squeeze(1) 
        # [batch size, embedding_dim] 把单词长度的维度压扁为1,并降维
        
        return self.fc(pooled)  
        #(batch size, embedding_dim)*(embedding_dim, output_dim)=(batch size,output_dim)
INPUT_DIM = len(TEXT.vocab) #25002
EMBEDDING_DIM = 100
OUTPUT_DIM = 1
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] 
#PAD_IDX = 1 为pad的索引

model = WordAVGModel(INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM, PAD_IDX)
def count_parameters(model): #统计参数,可以不用管
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

第六步:初始化参数

pretrained_embeddings = TEXT.vocab.vectors 
model.embedding.weight.data.copy_(pretrained_embeddings) #遇到_的语句直接替换,不需要另外赋值=
#把上面vectors="glove.6B.100d"取出的词向量作为初始化参数,数量为25000*100个参数
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token] #UNK_IDX=0

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
#词汇表25002个单词,前两个unk和pad也需要初始化

第七步:训练模型

import torch.optim as optim

optimizer = optim.Adam(model.parameters()) #定义优化器
criterion = nn.BCEWithLogitsLoss()  #定义损失函数,这个BCEWithLogitsLoss特殊情况,二分类损失函数
model = model.to(device) #送到gpu上去
criterion = criterion.to(device) #送到gpu上去

计算预测的准确率

def binary_accuracy(preds, y): #计算准确率
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    #.round函数:四舍五入
    
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum()/len(correct)
    return acc
def train(model, iterator, optimizer, criterion):
    
    
    epoch_loss = 0
    epoch_acc = 0
    total_len = 0
    model.train() #model.train()代表了训练模式
    #这步一定要加,是为了区分model训练和测试的模式的。
    #有时候训练时会用到dropout、归一化等方法,但是测试的时候不能用dropout等方法。
    
    
    
    for batch in iterator: #iterator为train_iterator
        optimizer.zero_grad() #加这步防止梯度叠加
        
        predictions = model(batch.text).squeeze(1)
        #batch.text 就是上面forward函数的参数text
        #压缩维度,不然跟batch.label维度对不上
        
        loss = criterion(predictions, batch.label)
        acc = binary_accuracy(predictions, batch.label)
        
        
        loss.backward() #反向传播
        optimizer.step() #梯度下降
        
        epoch_loss += loss.item() * len(batch.label)
        #loss.item()已经本身除以了len(batch.label)
        #所以得再乘一次,得到一个batch的损失,累加得到所有样本损失。
        
        epoch_acc += acc.item() * len(batch.label)
        #(acc.item():一个batch的正确率) *batch数 = 正确数
        #train_iterator所有batch的正确数累加。
        
        total_len += len(batch.label)
        #计算train_iterator所有样本的数量,不出意外应该是17500
        
    return epoch_loss / total_len, epoch_acc / total_len
    #epoch_loss / total_len :train_iterator所有batch的损失
    #epoch_acc / total_len :train_iterator所有batch的正确率
def evaluate(model, iterator, criterion):
     
    
    epoch_loss = 0
    epoch_acc = 0
    total_len = 0
    
    model.eval()
    #转换成测试模式,冻结dropout层或其他层。
    
    with torch.no_grad():
        for batch in iterator: 
            #iterator为valid_iterator
            
            #没有反向传播和梯度下降
            predictions = model(batch.text).squeeze(1)
            loss = criterion(predictions, batch.label)
            acc = binary_accuracy(predictions, batch.label)
            
            
            epoch_loss += loss.item() * len(batch.label)
            epoch_acc += acc.item() * len(batch.label)
            total_len += len(batch.label)
    model.train() #调回训练模式   
    
    return epoch_loss / total_len, epoch_acc / total_len
import time 

def epoch_time(start_time, end_time):  #查看每个epoch的时间
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

第八步:查看模型运行结果

N_EPOCHS = 10

best_valid_loss = float('inf') #无穷大

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss: #只要模型效果变好,就存模型
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'wordavg-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

第九步:预测结果

model.load_state_dict(torch.load("wordavg-model.pt"))
#用保存的模型参数预测数据
import spacy  #分词工具,跟NLTK类似
nlp = spacy.load('en')

def predict_sentiment(sentence):
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]#分词
    indexed = [TEXT.vocab.stoi[t] for t in tokenized] 
    #sentence的索引
    
    tensor = torch.LongTensor(indexed).to(device) #seq_len
    tensor = tensor.unsqueeze(1) 
    #seq_len * batch_size(1)
    
    prediction = torch.sigmoid(model(tensor))
    #tensor与text一样的tensor
    
    return prediction.item()

predict_sentiment("I love This film bad ")
predict_sentiment("This film is great")

RNN模型

  1. 下面我们尝试把模型换成一个recurrent neural network (RNN)。RNN经常会被用来encode一个sequence
    ℎ𝑡=RNN(𝑥𝑡,ℎ𝑡−1)

  2. 我们使用最后一个hidden state ℎ𝑇 来表示整个句子。

  3. 然后我们把 ℎ𝑇 通过一个线性变换 𝑓 ,然后用来预测句子的情感。

class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, 
                 n_layers, bidirectional, dropout, pad_idx):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, 
                           bidirectional=bidirectional, dropout=dropout)
        #embedding_dim:每个单词维度
        #hidden_dim:隐藏层维度
        #num_layers:神经网络深度,纵向深度
        #bidirectional:是否双向循环RNN
        #这个自己先得理解LSTM各个维度,不然容易晕
        
        
        self.fc = nn.Linear(hidden_dim*2, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        embedded = self.dropout(self.embedding(text)) #[sent len, batch size, emb dim]
        output, (hidden, cell) = self.rnn(embedded)
        #output = [sent len, batch size, hid dim * num directions]
        #hidden = [num layers * num directions, batch size, hid dim]
        #cell = [num layers * num directions, batch size, hid dim]
        
        #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        #and apply dropout
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)) # [batch size, hid dim * num directions]
        return self.fc(hidden.squeeze(0))
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, 
            N_LAYERS, BIDIRECTIONAL, DROPOUT, PAD_IDX)
print(f'The model has {count_parameters(model):,} trainable parameters')
model.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

print(model.embedding.weight.data)

训练RNN模型

optimizer = optim.Adam(model.parameters())
model = model.to(device)
N_EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
    start_time = time.time()
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'lstm-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
model.load_state_dict(torch.load('lstm-model.pt'))
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

CNN模型

class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, 
                 filter_sizes, output_dim, dropout, pad_idx):
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        text = text.permute(1, 0) # [batch size, sent len]
        embedded = self.embedding(text) # [batch size, sent len, emb dim]
        embedded = embedded.unsqueeze(1) # [batch size, 1, sent len, emb dim]
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conv_n = [batch size, n_filters, sent len - filter_sizes[n]]
        
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim=1))

        #cat = [batch size, n_filters * len(filter_sizes)]
            
        return self.fc(cat)
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
N_FILTERS = 100
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 1
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]


model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
model.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
model = model.to(device)
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
criterion = criterion.to(device)

N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'CNN-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')
model.load_state_dict(torch.load('CNN-model.pt'))
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值