ResNet18+图像二分类+pytorch

LINL631

已于 2023-07-26 15:39:55 修改

阅读量1.6k

点赞数 2

文章标签：分类 pytorch python 深度学习 cnn

于 2023-07-26 15:23:39 首次发布

本文链接：https://blog.csdn.net/L_IN_L/article/details/131939035

版权

ResNet18+图像二分类+pytorch

项目背景

最近在检查项目的时候发现有部分的图像通过opencv打开保存后自动保存为了BGR图像，而windoms系统打开查看是默认RGB的，所以会造成一定的影响

在这里插入图片描述

可以在上图看到这种现象，所以我训练了一个简单是二分类模型来对BGR和RGB图像进行分类，使用的是pytorch框架，考虑到轻量化，所以Resnet18模型

项目大纲

在这里插入图片描述

数据处理

对于这种简单的二分类模型，数据集的处理相对较为简单。

我采用的是类似coco数据集的方式来对路径进行管理：（BGR-detection/bgr-detection/data/bgr-data.yaml）

# ├── BGR-detection
#   └── dataset
#     └── image/train
#     └── label/train



path: BGR-detection/dataset # dataset root dir
train: image/train
test:  image/test
label: label/train
val: #


# Classes
names:
  0: BGR
  1: RGB

训练集（BGR-detection/dataset/image/train）里存放BGR和RGB图片：
在这里插入图片描述

我是使用txt文件来存放他们的标签的：
在这里插入图片描述

当然，如果你也对图片进行和相应类别的命名的话也可以参考如下的方式来获取标签：
在这里插入图片描述

这种方式通过读取图片的名称信息来赋标签

下面的各部分代码通过模块化的方式来编写，便于后期的管理和调整<<<<

数据读取/加载

数据加载模块：（BGR-detection/bgr-detection/utils/dataLoader.py)

import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import yaml
from model import ResNet18

# 读取.yaml
def read_yaml_config(config_file):
    with open(config_file, "r") as file:
        data = yaml.safe_load(file)

    data_root = data['path']
    train_path = os.path.join(data_root, data['train'])
    label_path = os.path.join(data_root, data['label'])
    class_names = data['names']

    return data_root, train_path, label_path, class_names

#数据集标准化
def load_dataset(data_root, train_path, label_path, class_names, batch_size):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    #自定义数据集
    class CustomDataset(Dataset):
        def __init__(self, data_dir, label_dir, class_names, transform=None):
            self.data_dir = data_dir
            self.label_dir = label_dir
            self.class_names = class_names
            self.transform = transform
            self.images = os.listdir(data_dir)

        def __len__(self):
            return len(self.images)

        def __getitem__(self, idx):
            image_name = self.images[idx]

            image_path = os.path.join(self.data_dir, image_name)
            label_path = os.path.join(self.label_dir, f"{os.path.splitext(image_name)[0]}.txt")

            image = Image.open(image_path).convert('RGB')

            with open(label_path, 'r') as label_file:
                label = int(label_file.read().strip())

            if self.transform:
                image = self.transform(image)

            return image, label

    #数据加载器
    train_dataset = CustomDataset(data_dir=train_path, label_dir=label_path, class_names=class_names, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    return train_loader

定义模型

线性层的输出神经元个数对应要分类的类别数量：(BGR-detection/bgr-detection/model/ResNet18.py)

import torch
import torchvision
from torch import nn


"""ResNet18(2,3)"""
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        if stride != 1 or in_channels != out_channels:
            self.identity = nn.Sequential(
                                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                                nn.BatchNorm2d(out_channels)
                            )
        else:
            self.identity = nn.Identity()
        
    def forward(self, x):
        identity = self.identity(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x += identity
        x = self.relu(x)
        return x

class ResNet18(nn.Module):
    def __init__(self, num_classes, in_channels=3):
        super().__init__()
        
        # 根据resnet18结构定义网络层
        self.net = nn.Sequential(
                    nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1, bias=False),
                    nn.BatchNorm2d(64),
                    nn.ReLU(),
                    ResidualBlock(64, 64, stride=1),
                    ResidualBlock(64, 64, stride=1),
                    ResidualBlock(64, 128, stride=2),
                    ResidualBlock(128, 128, stride=1),
                    ResidualBlock(128, 256, stride=2),
                    ResidualBlock(256, 256, stride=1),
                    ResidualBlock(256, 512, stride=2),
                    ResidualBlock(512, 512, stride=1),
                    nn.AdaptiveAvgPool2d((1, 1)),
                    nn.Flatten(),
                    nn.Linear(512, num_classes)
                )
        
    def forward(self, x):
        return self.net(x)

模型训练

定义训练过程：（BGR-detection/bgr-detection/utils/trainresnet.py)

import os
import shutil
from tqdm import tqdm
import torch
import torch.nn.init as init
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

from model import ResNet18




#显卡信息
def GPUinfo():
    ng = torch.cuda.device_count()
    infos = [torch.cuda.get_device_properties(i) for i in range(ng)]
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(torch.__version__)
    print("Devices:%d" %ng)
    print(infos)


#模型参数初始化
def initialize_model_params(model):
    for m in model.modules():
        if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
            init.kaiming_uniform_(m.weight)
            if m.bias is not None:
                m.bias.data.zero_()
        elif isinstance(m, torch.nn.BatchNorm2d):
            m.weight.data.fill_(1)
            m.bias.data.zero_()


#训练
def train_model(data_root, train_path, label_path, class_names,lr, epochs, batch_size, net, train_loader, criterion, optimizer, scheduler):
    
    GPUinfo()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net.to(device)
    initialize_model_params(net)

    # 保存模型和结果曲线图到 ./runs/路径下
    if not os.path.exists("BGR-detection/bgr-detection/runs"):
        os.makedirs("BGR-detection/bgr-detection/runs")

    exp_num = 1
    while os.path.exists(f"BGR-detection/bgr-detection/runs/exp{exp_num}"):
        exp_num += 1
    os.makedirs(f"BGR-detection/bgr-detection/runs/exp{exp_num}")


    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []

    best_acc = 0.0
    for epoch in range(epochs):

        net.train()
        total_loss = 0
        correct_train = 0
        total_train = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}, Training")
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total_train += labels.size(0)
            correct_train += predicted.eq(labels).sum().item()

            progress_bar.set_postfix(loss=total_loss / (len(train_loader) + 1), accuracy=100. * correct_train / total_train)


        train_loss = total_loss / len(train_loader)
        train_accuracy = 100. * correct_train / total_train
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        # 学习率调整
        scheduler.step()


        net.eval()
        total_test = 0
        correct_test = 0
        with torch.no_grad():
            for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}, Testing"):
                images, labels = images.to(device), labels.to(device)

                outputs = net(images)
                _, predicted = outputs.max(1)
                total_test += labels.size(0)
                correct_test += predicted.eq(labels).sum().item()

        test_loss = total_loss / len(train_loader)
        test_accuracy = 100. * correct_test / total_test
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}, "
              f"Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")

        # 保存最新模型和最好模型
        torch.save(net.state_dict(), f"BGR-detection/bgr-detection/runs/exp{exp_num}/latest_model.pth")
        #成功率覆盖
        if test_accuracy > best_acc:
            best_acc = test_accuracy
            shutil.copyfile(f"BGR-detection/bgr-detection/runs/exp{exp_num}/latest_model.pth",
                            f"BGR-detection/bgr-detection/runs/exp{exp_num}/best_model.pth")

    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(range(1, epochs + 1), train_losses, label='Train Loss')
    plt.plot(range(1, epochs + 1), test_losses, label='Test Loss')
    plt.xlabel('Epochs') 
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(range(1, epochs + 1), train_accuracies, label='Train Accuracy')
    plt.plot(range(1, epochs + 1), test_accuracies, label='Test Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.savefig(f"BGR-detection/bgr-detection/runs/exp{exp_num}/training_plot.png")

主程序训练：(BGR-detection/bgr-detection/train.py)

import torch
from torch import nn
from torch.optim.lr_scheduler import StepLR

from model.ResNet18 import ResNet18
from utils.dataLoader import load_dataset
from utils.dataLoader import read_yaml_config
from utils.trainresnet import train_model




if __name__ == "__main__":

    learning_rate = 0.01
    num_epochs = 5
    batch_size = 32

    config_file = "BGR-detection/bgr-detection/data/bgr-data.yaml"
    data_root, train_path, label_path, class_names = read_yaml_config(config_file)

    train_loader_d = load_dataset(data_root, train_path, label_path, class_names, batch_size)

    """2分类任务，3通道图像"""
    net_d = ResNet18(num_classes=2, in_channels=3)

    criterion_d = nn.CrossEntropyLoss()
    optimizer_d = torch.optim.SGD(net_d.parameters(), lr=learning_rate, momentum=0.9)
    scheduler_d = StepLR(optimizer_d, step_size=3, gamma=0.1)

    #训练
    train_model(data_root, train_path, label_path, class_names, lr=learning_rate, epochs=num_epochs, 
    batch_size=batch_size, net=net_d, train_loader=train_loader_d, criterion=criterion_d, optimizer=optimizer_d, scheduler=scheduler_d)

检测

因为我的需求是对一整个文件夹中的图像进行分类，并将结果分类存放，所以没有设置过多的应用场景，大家可以根据自身需修改：(BGR-detection/bgr-detection/detection.py)

import os
import shutil
from tqdm import tqdm  
from PIL import Image
import torch
import torchvision.transforms as transforms

from model.ResNet18 import ResNet18
from utils.trainresnet import GPUinfo


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


#模型加载
def load_model(model_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ResNet18(num_classes=2, in_channels=3).to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model, device

#将测试结果保存到 ./test/result 路径下
def create_result_dir(result_path):
    result_dirs = [d for d in os.listdir(result_path) if os.path.isdir(os.path.join(result_path, d))]
    max_num = max([int(d.split("result")[-1]) for d in result_dirs if d.startswith("result")] + [0])

    new_result_dir = os.path.join(result_path, f"result{max_num + 1}")
    os.makedirs(new_result_dir, exist_ok=True)

    return new_result_dir

#逐张检测，将检测结果与标签对照后分类
def batch_detect(model, device, data_path, result_path):
    new_result_dir = create_result_dir(result_path)
    result_1_dir = os.path.join(new_result_dir, "result_1")
    result_2_dir = os.path.join(new_result_dir, "result_2")

    os.makedirs(result_1_dir, exist_ok=True)
    os.makedirs(result_2_dir, exist_ok=True)

    image_list = os.listdir(data_path)
    for image_file in tqdm(image_list, desc="Processing"):  
        image_path = os.path.join(data_path, image_file)
        image = Image.open(image_path).convert("RGB")
        image_tensor = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(image_tensor)
            _, predicted = output.max(1)

        if predicted.item() == 0:
            shutil.copy(image_path, os.path.join(result_1_dir, image_file))
        else:
            shutil.copy(image_path, os.path.join(result_2_dir, image_file))


if __name__ == "__main__":
    
    GPUinfo()
    model_path = "BGR-detection/bgr-detection/runs/exp10/best_model.pth"
    data_path = "BGR-detection/dataset/image/test"
    result_path = "BGR-detection/bgr-detection/test/result"

    model, device = load_model(model_path)
    batch_detect(model, device, data_path, result_path)