Pytorch---使用Pytorch的预训练模型实现四种天气分类问题

一、代码中的数据集可以通过以下链接获取

百度网盘提取码:lala

二、代码运行环境

Pytorch-gpu==1.7.1
Python==3.7

三、数据集处理代码如下所示

import torchvision
from torchvision import transforms
import os
from torch.utils.data import DataLoader


def loader_data():
    BATCH_SIZE = 64
    train_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.RandomCrop(192),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(0.2),
        transforms.ColorJitter(brightness=0.5),
        transforms.ColorJitter(contrast=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5],
                             std=[0.5, 0.5, 0.5])
    ])
    test_transform = transforms.Compose([
        transforms.Resize((192, 192)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5],
                             std=[0.5, 0.5, 0.5])
    ])
    train_ds = torchvision.datasets.ImageFolder(root=os.path.join('dataset', 'train_weather'),
                                                transform=train_transform)
    test_ds = torchvision.datasets.ImageFolder(root=os.path.join('dataset', 'test_weather'), transform=test_transform)
    train_dl = DataLoader(dataset=train_ds, batch_size=BATCH_SIZE, shuffle=True)
    test_dl = DataLoader(dataset=test_ds, batch_size=BATCH_SIZE)

    return train_dl, test_dl, test_ds.class_to_idx

四、模型的构建代码如下所示

import torch
import torchvision


def load_model():
    model = torchvision.models.vgg16(pretrained=True)
    for p in model.features.parameters():
        p.requires_grad = False
    model.classifier[-1].out_features = 4
    return model


def load_resnet18():
    model = torchvision.models.resnet18(pretrained=True)
    for param in model.parameters():
        param.requires_grad = False
    in_f = model.fc.in_features
    model.fc = torch.nn.Linear(in_features=in_f, out_features=4)

    return model

五、模型的训练代码如下所示

import torch
from data_loader import loader_data
from model_loader import load_model, load_resnet18
import numpy as np
import tqdm
import os
from sklearn.metrics import accuracy_score
from torch.optim import lr_scheduler

# 数据的加载
train_dl, test_dl, class_to_idx = loader_data()

# 模型的加载
model = load_resnet18()

# 训练的相关配置
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.0001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=5, gamma=0.9)
loss_fn = torch.nn.CrossEntropyLoss()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
EPOCHS = 100

# 进行训练
model = model.to(device)
for epoch in range(EPOCHS):
    # 训练部分
    model.train()
    train_tqdm = tqdm.tqdm(iterable=train_dl, total=len(train_dl))
    train_tqdm.set_description_str('Train epoch {:2d}'.format(epoch))
    train_accuracy_sum = []
    train_loss_sum = []
    for images, labels in train_tqdm:
        images, labels = images.to(device), labels.to(device)
        pred = model(images)
        loss = loss_fn(pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # 进行训练部分的展示
        train_loss_sum.append(loss.item())
        pred = torch.argmax(input=pred, dim=-1)
        train_accuracy_sum.append(accuracy_score(y_true=labels.cpu().numpy(), y_pred=pred.cpu().numpy()))
        train_tqdm.set_postfix_str(
            'loss is {:14f}, accuracy is {:14f}'.format(np.mean(train_loss_sum), np.mean(train_accuracy_sum)))
    train_tqdm.close()

    # 学习速率
    exp_lr_scheduler.step()

    # 验证部分
    with torch.no_grad():
        model.eval()
        test_tqdm = tqdm.tqdm(iterable=test_dl, total=len(test_dl))
        test_tqdm.set_description_str('Val epoch {:2d}'.format(epoch))
        test_accuracy_sum = []
        test_loss_sum = []
        for images, labels in test_tqdm:
            images, labels = images.to(device), labels.to(device)
            pred = model(images)
            loss = loss_fn(pred, labels)
            # 进行验证结果的展示
            test_loss_sum.append(loss.item())
            pred = torch.argmax(input=pred, dim=-1)
            test_accuracy_sum.append(accuracy_score(y_true=labels.cpu().numpy(), y_pred=pred.cpu().numpy()))
            test_tqdm.set_postfix_str(
                'loss is {:14f}, accuracy is {:14f}'.format(np.mean(test_loss_sum), np.mean(test_accuracy_sum)))
        test_tqdm.close()

# 模型的保存 
if not os.path.exists(os.path.join('model_data')):
    os.mkdir(os.path.join('model_data'))
torch.save(model.state_dict(), os.path.join('model_data', 'model.pth'))

六、模型的预测代码如下所示

import os
import torch
from data_loader import loader_data
from model_loader import load_model, load_resnet18
import matplotlib.pyplot as plt
import matplotlib

# 数据的加载
train_dl, test_dl, class_index = loader_data()
image, label = next(iter(test_dl))
new_class = dict((v, k) for k, v in class_index.items())

# 模型的加载
model = load_resnet18()
model_state_dict = torch.load(os.path.join('model_data', 'model.pth'))
model.load_state_dict(model_state_dict)
model.eval()

# 进行模型的预测
index = 23
with torch.no_grad():
    pred = model(image)
    pred = torch.argmax(input=pred, dim=-1)
    # matplotlib.rc("font", family='Microsoft YaHei')
    plt.axis('off')
    plt.title('predict result: ' + new_class.get(pred[index].item()) + ', label result: ' + new_class.get(
        label[index].item()),
              )
    plt.imshow(image[index].permute(1, 2, 0))
    plt.savefig('result.png')
    plt.show()

七、代码的运行结果如下所示

在这里插入图片描述

首先,需要安装pkuseg和pytorch的包: ```bash pip install pkuseg torch ``` 接下来,我们需要加载预训练的pkuseg模型: ```python import pkuseg import torch seg = pkuseg.pkuseg() # 加载默认的模型 ``` 然后,我们可以使用pytorch的API来微调pkuseg模型。这里以微调分词模型为例,首先需要定义模型的结构和训练数据: ```python from torch import nn from torch.utils.data import DataLoader, Dataset class SegDataset(Dataset): def __init__(self, data): self.data = data def __len__(self): return len(self.data) def __getitem__(self, idx): return self.data[idx] class SegModel(nn.Module): def __init__(self, num_labels): super(SegModel, self).__init__() self.bert = pkuseg.pkuseg(model_name='web_bert') self.linear = nn.Linear(768, num_labels) def forward(self, input_ids): output = self.bert(input_ids) output = self.linear(output) return output ``` 在这个例子,我们使用了pkuseg的BERT模型,并在其之上添加了一个线性层作为输出。接下来,我们需要定义训练的过程: ```python def train(model, train_data, num_epochs, batch_size, learning_rate): # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) # 将数据划分为batch train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) # 开始训练 for epoch in range(num_epochs): total_loss = 0 for batch in train_loader: optimizer.zero_grad() input_ids = [model.bert.convert_tokens_to_ids(sent) for sent in batch] input_ids = torch.tensor(input_ids) labels = [model.bert.label_to_id(sent) for sent in batch] labels = torch.tensor(labels) outputs = model(input_ids) loss = criterion(outputs, labels) loss.backward() optimizer.step() total_loss += loss.item() print('Epoch {}/{}: Loss={}'.format(epoch+1, num_epochs, total_loss/len(train_data))) ``` 在训练过程,我们需要将句子转换为BERT模型可以接受的输入格式,并将标签转换为数字。这里使用pytorch的自动求导机制来计算梯度,并使用AdamW优化器来更新模型参数。最后,我们可以使用训练好的模型进行分词: ```python def predict(model, text): seg_list = model.bert(text) return seg_list ``` 完整的代码示例: ```python import pkuseg import torch from torch import nn from torch.utils.data import DataLoader, Dataset class SegDataset(Dataset): def __init__(self, data): self.data = data def __len__(self): return len(self.data) def __getitem__(self, idx): return self.data[idx] class SegModel(nn.Module): def __init__(self, num_labels): super(SegModel, self).__init__() self.bert = pkuseg.pkuseg(model_name='web_bert') self.linear = nn.Linear(768, num_labels) def forward(self, input_ids): output = self.bert(input_ids) output = self.linear(output) return output def train(model, train_data, num_epochs, batch_size, learning_rate): # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) # 将数据划分为batch train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) # 开始训练 for epoch in range(num_epochs): total_loss = 0 for batch in train_loader: optimizer.zero_grad() input_ids = [model.bert.convert_tokens_to_ids(sent) for sent in batch] input_ids = torch.tensor(input_ids) labels = [model.bert.label_to_id(sent) for sent in batch] labels = torch.tensor(labels) outputs = model(input_ids) loss = criterion(outputs, labels) loss.backward() optimizer.step() total_loss += loss.item() print('Epoch {}/{}: Loss={}'.format(epoch+1, num_epochs, total_loss/len(train_data))) def predict(model, text): seg_list = model.bert(text) return seg_list # 加载默认的模型 seg = pkuseg.pkuseg() # 测试默认模型 text = '今天天气真好' seg_list = seg.cut(text) print(seg_list) # 微调模型 train_data = SegDataset(['今天天气真好', '我爱北京天安门']) model = SegModel(num_labels=3) train(model, train_data, num_epochs=10, batch_size=2, learning_rate=1e-3) # 测试微调后的模型 text = '今天天气真好' seg_list = predict(model, text) print(seg_list) ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

水哥很水

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值