pytorch迁移学习+ResNet50实现猫十二分类

之前写过一篇实现猫十二分类的文章,写出了大体的流程,但实际效果并不佳。本文采取微调预训练模型的方式,使准确率从0.3提升到了0.93。大体流程参考ResNet猫十二分类,本文只给出不同的地方。

代码框架根据一篇比较漂亮的resnet代码,借鉴,学习。原文

迁移学习

迁移学习的两种方式

  1. 微调。从线上下载以训练完毕的模型,利用本地数据集进行参数的微调,更新的是所有参数
  2. 用作特征提取器。外加一层全连接,只训练全连接部分的参数

1. 数据

  • dataset定义
# Dataset
import os
import cv2
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
import random
from PIL import Image


class myData(Dataset):
    def __init__(self, kind):
        super(myData, self).__init__()
        self.mode = kind
        # self.transform = transforms.ToTensor()
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((224, 224)),
            transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5)),
        ])

        if kind == 'test':
            self.imgs = self.load_origin_data()

        elif kind == 'train':
            self.imgs, self.labels = self.load_origin_data()
            # self.imgs, self.labels = self.enlarge_dataset(kind, self.imgs, self.labels, 0.5)
            print('train size:')
            print(len(self.imgs))

        else:
            self.imgs, self.labels = self.load_origin_data()

    def __getitem__(self, index):
        if self.mode == 'test':
            sample = self.transform(self.imgs[index])
            return sample
        else:
            sample = self.transform(self.imgs[index])
            return sample, torch.tensor(self.labels[index])

    def __len__(self):
        return len(self.imgs)

    def load_origin_data(self):
        filelist = './data/%s_split_list.txt' % self.mode

        imgs = []
        labels = []
        data_dir = os.getcwd()
        if self.mode == 'train' or self.mode == 'val':
            with open(filelist) as flist:
                lines = [line.strip() for line in flist]
                if self.mode == 'train':
                    np.random.shuffle(lines)
                for line in lines:
                    img_path, label = line.split('&')
                    img_path = os.path.join(data_dir, img_path)
                    try:
                        # img, label = process_image((img_path, label), mode, color_jitter, rotate)
                        img = Image.fromarray(cv2.imdecode(np.fromfile(img_path, dtype=np.float32), 1))
                        imgs.append(img)
                        labels.append(int(label))
                    except:
                        print(img_path)
                        continue
                return imgs, labels
        elif self.mode == 'test':
            full_lines = os.listdir('data/test/')
            lines = [line.strip() for line in full_lines]
            for img_path in lines:
                img_path = os.path.join(data_dir, "data/test/", img_path)
                # try:
                #     img= process_image((img_path, label), mode, color_jitter, rotate)
                #     imgs.append(img)
                # except:
                #     print(img_path)
                # img = Image.open(img_path)

                img = Image.fromarray(cv2.imdecode(np.fromfile(img_path, dtype=np.float32), 1))
                imgs.append(img)
            return imgs


    def load_data(self, mode, shuffle, color_jitter, rotate):
        '''
        :return : img, label
        img: (channel, w, h)
        '''
        filelist = './data/%s_split_list.txt' % mode

        imgs = []
        labels = []
        data_dir = os.getcwd()
        if mode == 'train' or mode == 'val':
            with open(filelist) as flist:
                lines = [line.strip() for line in flist]
                if shuffle:
                    np.random.shuffle(lines)

                for line in lines:
                    img_path, label = line.split('&')
                    img_path = os.path.join(data_dir, img_path)
                    try:
                        img, label = process_image((img_path, label), mode, color_jitter, rotate)
                        imgs.append(img)
                        labels.append(label)
                    except:
                        # print(img_path)
                        continue
                return imgs, labels

        elif mode == 'test':
            full_lines = os.listdir('data/test/')
            lines = [line.strip() for line in full_lines]
            for img_path in lines:
                img_path = os.path.join(data_dir, "data/test/", img_path)
                # try:
                #     img= process_image((img_path, label), mode, color_jitter, rotate)
                #     imgs.append(img)
                # except:
                #     print(img_path)
                img = process_image((img_path, 0), mode, color_jitter, rotate)
                imgs.append(img)
            return imgs

# dataset
# img_datasets = {x: myData(x) for x in ['train', 'val']}
# dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'val']}
# test_datasets = {'test': myData('test')}
# test_size = {'test': len(test_datasets)}


首先数据部分有一些改动

img_datasets = {x: myData(x) for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'val', 'test']}

# dataset准备完毕,开始创建dataloader
train_loader = DataLoader(
    dataset=img_datasets['train'],
    batch_size=batches,
    shuffle=True
)

val_loader = DataLoader(
    dataset=img_datasets['val'],
    batch_size=1,
    shuffle=False
)

test_loader = DataLoader(
    dataset=img_datasets['test'],
    batch_size=1,
    shuffle=False
)

dataloaders = {
    'train': train_loader,
    'val': val_loader,
    'test': test_loader
}

2. 训练阶段

值得参考的tricks有:

  1. 训练过程中保存最优模型的参数,在每一个epochs中,若验证的准确率有所提升则更新参数
  2. 每个epoch中都包含训练和验证两个阶段。注意写法
# train
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):   # 括号中的参数是模型,损失函数标准,优化器,学习速率衰减方式,训练次数

    best_model_wts = copy.deepcopy(model.state_dict())    # 先深拷贝一份当前模型的参数(wts=weights),后面迭代过程中若遇到更优模型则替换
    best_acc = 0.0                                        # 最佳正确率,用于判断是否替换best_model_wts

    for epoch in range(num_epochs):      # 开启每一个epoch
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))

        for phase in ['train', 'val']:   # 每个epoch中都包含训练与验证两个阶段
            if phase == 'train':      
                model.train()            
            else:                       
                model.eval()            
                # 与train不同的是,test过程中没有batch-normalization与dropout,因此要区别对待。 
                # batchnorm是针对minibatch的,测试过程中每个样本单独测试,不存在minibatch

            running_loss = 0.0           
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)           
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):   # torch.set_grad_enabled(False/True)是上下文管理器,用于确定是否对with下的所有语句设计的参数求导,如果设置为false则新节点不可求导
                    outputs = model(inputs)            # 网络模型的前向传播,就是为了从输入得到输出
                    _, preds = torch.max(outputs, 1)   # 在维度1(行方向)查找最大值
                    loss = criterion(outputs, labels)  # 输出结果与label相比较

                    if phase == 'train':
                        loss.backward()     # 误差反向传播,计算每个w与b的更新值
                        optimizer.step()    # 将这些更新值施加到模型上

                # train, val都一样
                running_loss += loss.item() * inputs.size(0)         # 计算当前epoch过程中,所有batch的损失和
                running_corrects += torch.sum(preds == labels.data)  # 判断正确的样本数 
            if phase == 'train':    # 完成本次epoch所有样本的训练与验证之后,就对学习速率进行修正
                scheduler.step()     # 在训练过程中,要根据损失的情况修正学习速率

            epoch_loss = running_loss / dataset_sizes[phase]               # 当前epoch的损失值是loss总和除以样本数
            epoch_acc = running_corrects.double() / dataset_sizes[phase]   # 当前epoch的正确率

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(         # 输出train/test,损失、正确率
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:                # 如果是val阶段,并且当前epoch的acc比best acc大
                best_acc = epoch_acc                                    # 就替换best acc为当前epoch的acc
                best_model_wts = copy.deepcopy(model.state_dict())      # 将best_model替换为当前模型
        
    print('Best val Acc: {:4f}'.format(best_acc)) 

    # 将最佳模型的相关参数加载到model中
    model.load_state_dict(best_model_wts)                        
    return model

3. 迁移学习部分

对模型所有层的所有参数都进行目标域的训练。

使用pretrain = True的方式得到预训练模型,更改全连接层的输出维度,然后训练残差模型

# 迁移学习
model_ft = models.resnet50(pretrained=True)    
num_ftrs = model_ft.fc.in_features            
model_ft.fc = nn.Linear(num_ftrs, 12)           
model_ft = model_ft.to(device)         

criterion = nn.CrossEntropyLoss()       

optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)   

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.1) 

model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=15)    # 模型训练

4. 保存模型

没啥说的

def vali(M ,dataset):
    M.eval()
    with torch.no_grad():
        correct = 0
        for (data, target) in val_loader:
            data, target = data.to(device), target.to(device)

            pred = M(data)
            _, id = torch.max(pred, 1)
            correct += torch.sum(id == target.data)
        print("test accu: %.03f%%" % (100 * correct / len(dataset)))
    return (100 * correct / len(dataset)).item()
test_accu = int(vali(model_ft, img_datasets['val']) * 100)

model_name = 'val_{}.pkl'.format(test_accu)


torch.save(model_ft.state_dict(), os.path.join("./myModels", model_name))

5. 读取模型

# 加载模型
model_ft = models.resnet50(pretrained=True)    
num_ftrs = model_ft.fc.in_features            
model_ft.fc = nn.Linear(num_ftrs, 12)   # 注意更改维度         
model_ft = model_ft.to(device) # 注意要放入gpu,保持和参数数据类型一致

model_ft.load_state_dict(torch.load("./myModels/val_9343.pkl"))
vali(model_ft, img_datasets['val'])

输出结果:

test accu: 93.433%
当使用PyTorch进行迁移学习时,可以使用预训练的ResNet-50模型作为基础模型,并在其基础上进行微调。以下是一个基于PyTorch的完整代码示例: ```python import torch import torch.nn as nn import torch.optim as optim from torchvision import models, transforms, datasets # 设置随机种子,保证实验可复现性 torch.manual_seed(123) # 数据预处理和加载 data_transforms = { 'train': transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } data_dir = 'path/to/your/dataset' # 设置数据集的路径 image_datasets = {x: datasets.ImageFolder(f'{data_dir}/{x}', data_transforms[x]) for x in ['train', 'val']} dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32, shuffle=True) for x in ['train', 'val']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} class_names = image_datasets['train'].classes # 加载预训练模型 model = models.resnet50(pretrained=True) num_ftrs = model.fc.in_features model.fc = nn.Linear(num_ftrs, len(class_names)) # 替换最后一层全连接层 # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # 训练模型 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) criterion = criterion.to(device) num_epochs = 10 best_acc = 0.0 for epoch in range(num_epochs): for phase in ['train', 'val']: if phase == 'train': model.train() else: model.eval() running_loss = 0.0 running_corrects = 0 for inputs, labels in dataloaders[phase]: inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects.double() / dataset_sizes[phase] print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}') if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc # 在测试集上评估模型 model.eval() test_dataset = datasets.ImageFolder(f'{data_dir}/test', data_transforms['val']) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False) test_corrects = 0 for inputs, labels in test_dataloader: inputs = inputs.to(device) labels = labels.to(device) with torch.set_grad_enabled(False): outputs = model(inputs) _, preds = torch.max(outputs, 1) test_corrects += torch.sum(preds == labels.data) test_acc = test_corrects.double() / len(test_dataset) print(f'Test Accuracy: {test_acc:.4f}') ``` 请注意,上述代码中的数据集路径、训练轮数、优化器参数等需要根据您的具体情况进行适当修改。此代码将在训练集上微调预训练的ResNet-50模型,并在验证集和测试集上评估性能。
评论 14
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值