ResNet18+图像二分类+pytorch

ResNet18+图像二分类+pytorch

项目背景

最近在检查项目的时候发现有部分的图像通过opencv打开保存后自动保存为了BGR图像,而windoms系统打开查看是默认RGB的,所以会造成一定的影响

在这里插入图片描述

可以在上图看到这种现象,所以我训练了一个简单是二分类模型来对BGR和RGB图像进行分类,使用的是pytorch框架,考虑到轻量化,所以Resnet18模型

项目大纲

在这里插入图片描述

数据处理

对于这种简单的二分类模型,数据集的处理相对较为简单。

我采用的是类似coco数据集的方式来对路径进行管理:(BGR-detection/bgr-detection/data/bgr-data.yaml)

# ├── BGR-detection
#   └── dataset
#     └── image/train
#     └── label/train



path: BGR-detection/dataset # dataset root dir
train: image/train
test:  image/test
label: label/train
val: #


# Classes
names:
  0: BGR
  1: RGB


训练集(BGR-detection/dataset/image/train)里存放BGR和RGB图片:
在这里插入图片描述

我是使用txt文件来存放他们的标签的:
在这里插入图片描述

当然,如果你也对图片进行和相应类别的命名的话也可以参考如下的方式来获取标签:
在这里插入图片描述

这种方式通过读取图片的名称信息来赋标签

下面的各部分代码通过模块化的方式来编写,便于后期的管理和调整<<<<

数据读取/加载

数据加载模块:(BGR-detection/bgr-detection/utils/dataLoader.py)

import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import yaml
from model import ResNet18

# 读取.yaml
def read_yaml_config(config_file):
    with open(config_file, "r") as file:
        data = yaml.safe_load(file)

    data_root = data['path']
    train_path = os.path.join(data_root, data['train'])
    label_path = os.path.join(data_root, data['label'])
    class_names = data['names']

    return data_root, train_path, label_path, class_names

#数据集标准化
def load_dataset(data_root, train_path, label_path, class_names, batch_size):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    #自定义数据集
    class CustomDataset(Dataset):
        def __init__(self, data_dir, label_dir, class_names, transform=None):
            self.data_dir = data_dir
            self.label_dir = label_dir
            self.class_names = class_names
            self.transform = transform
            self.images = os.listdir(data_dir)

        def __len__(self):
            return len(self.images)

        def __getitem__(self, idx):
            image_name = self.images[idx]

            image_path = os.path.join(self.data_dir, image_name)
            label_path = os.path.join(self.label_dir, f"{os.path.splitext(image_name)[0]}.txt")

            image = Image.open(image_path).convert('RGB')

            with open(label_path, 'r') as label_file:
                label = int(label_file.read().strip())

            if self.transform:
                image = self.transform(image)

            return image, label

    #数据加载器
    train_dataset = CustomDataset(data_dir=train_path, label_dir=label_path, class_names=class_names, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    return train_loader




定义模型

线性层的输出神经元个数对应要分类的类别数量:(BGR-detection/bgr-detection/model/ResNet18.py)

import torch
import torchvision
from torch import nn


"""ResNet18(2,3)"""
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        if stride != 1 or in_channels != out_channels:
            self.identity = nn.Sequential(
                                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                                nn.BatchNorm2d(out_channels)
                            )
        else:
            self.identity = nn.Identity()
        
    def forward(self, x):
        identity = self.identity(x)
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x += identity
        x = self.relu(x)
        return x

class ResNet18(nn.Module):
    def __init__(self, num_classes, in_channels=3):
        super().__init__()
        
        # 根据resnet18结构定义网络层
        self.net = nn.Sequential(
                    nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1, bias=False),
                    nn.BatchNorm2d(64),
                    nn.ReLU(),
                    ResidualBlock(64, 64, stride=1),
                    ResidualBlock(64, 64, stride=1),
                    ResidualBlock(64, 128, stride=2),
                    ResidualBlock(128, 128, stride=1),
                    ResidualBlock(128, 256, stride=2),
                    ResidualBlock(256, 256, stride=1),
                    ResidualBlock(256, 512, stride=2),
                    ResidualBlock(512, 512, stride=1),
                    nn.AdaptiveAvgPool2d((1, 1)),
                    nn.Flatten(),
                    nn.Linear(512, num_classes)
                )
        
    def forward(self, x):
        return self.net(x)

模型训练

定义训练过程:(BGR-detection/bgr-detection/utils/trainresnet.py)

import os
import shutil
from tqdm import tqdm
import torch
import torch.nn.init as init
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

from model import ResNet18




#显卡信息
def GPUinfo():
    ng = torch.cuda.device_count()
    infos = [torch.cuda.get_device_properties(i) for i in range(ng)]
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(torch.__version__)
    print("Devices:%d" %ng)
    print(infos)


#模型参数初始化
def initialize_model_params(model):
    for m in model.modules():
        if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
            init.kaiming_uniform_(m.weight)
            if m.bias is not None:
                m.bias.data.zero_()
        elif isinstance(m, torch.nn.BatchNorm2d):
            m.weight.data.fill_(1)
            m.bias.data.zero_()


#训练
def train_model(data_root, train_path, label_path, class_names,lr, epochs, batch_size, net, train_loader, criterion, optimizer, scheduler):
    
    GPUinfo()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net.to(device)
    initialize_model_params(net)

    # 保存模型和结果曲线图到 ./runs/路径下
    if not os.path.exists("BGR-detection/bgr-detection/runs"):
        os.makedirs("BGR-detection/bgr-detection/runs")

    exp_num = 1
    while os.path.exists(f"BGR-detection/bgr-detection/runs/exp{exp_num}"):
        exp_num += 1
    os.makedirs(f"BGR-detection/bgr-detection/runs/exp{exp_num}")


    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []

    best_acc = 0.0
    for epoch in range(epochs):

        net.train()
        total_loss = 0
        correct_train = 0
        total_train = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}, Training")
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total_train += labels.size(0)
            correct_train += predicted.eq(labels).sum().item()

            progress_bar.set_postfix(loss=total_loss / (len(train_loader) + 1), accuracy=100. * correct_train / total_train)


        train_loss = total_loss / len(train_loader)
        train_accuracy = 100. * correct_train / total_train
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        # 学习率调整
        scheduler.step()


        net.eval()
        total_test = 0
        correct_test = 0
        with torch.no_grad():
            for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}, Testing"):
                images, labels = images.to(device), labels.to(device)

                outputs = net(images)
                _, predicted = outputs.max(1)
                total_test += labels.size(0)
                correct_test += predicted.eq(labels).sum().item()

        test_loss = total_loss / len(train_loader)
        test_accuracy = 100. * correct_test / total_test
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}, "
              f"Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")

        # 保存最新模型和最好模型
        torch.save(net.state_dict(), f"BGR-detection/bgr-detection/runs/exp{exp_num}/latest_model.pth")
        #成功率覆盖
        if test_accuracy > best_acc:
            best_acc = test_accuracy
            shutil.copyfile(f"BGR-detection/bgr-detection/runs/exp{exp_num}/latest_model.pth",
                            f"BGR-detection/bgr-detection/runs/exp{exp_num}/best_model.pth")

    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(range(1, epochs + 1), train_losses, label='Train Loss')
    plt.plot(range(1, epochs + 1), test_losses, label='Test Loss')
    plt.xlabel('Epochs') 
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(range(1, epochs + 1), train_accuracies, label='Train Accuracy')
    plt.plot(range(1, epochs + 1), test_accuracies, label='Test Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.savefig(f"BGR-detection/bgr-detection/runs/exp{exp_num}/training_plot.png")



主程序训练:(BGR-detection/bgr-detection/train.py)

import torch
from torch import nn
from torch.optim.lr_scheduler import StepLR

from model.ResNet18 import ResNet18
from utils.dataLoader import load_dataset
from utils.dataLoader import read_yaml_config
from utils.trainresnet import train_model




if __name__ == "__main__":

    learning_rate = 0.01
    num_epochs = 5
    batch_size = 32

    config_file = "BGR-detection/bgr-detection/data/bgr-data.yaml"
    data_root, train_path, label_path, class_names = read_yaml_config(config_file)

    train_loader_d = load_dataset(data_root, train_path, label_path, class_names, batch_size)

    """2分类任务,3通道图像"""
    net_d = ResNet18(num_classes=2, in_channels=3)

    criterion_d = nn.CrossEntropyLoss()
    optimizer_d = torch.optim.SGD(net_d.parameters(), lr=learning_rate, momentum=0.9)
    scheduler_d = StepLR(optimizer_d, step_size=3, gamma=0.1)

    #训练
    train_model(data_root, train_path, label_path, class_names, lr=learning_rate, epochs=num_epochs, 
    batch_size=batch_size, net=net_d, train_loader=train_loader_d, criterion=criterion_d, optimizer=optimizer_d, scheduler=scheduler_d)

检测

因为我的需求是对一整个文件夹中的图像进行分类,并将结果分类存放,所以没有设置过多的应用场景,大家可以根据自身需修改:(BGR-detection/bgr-detection/detection.py)

import os
import shutil
from tqdm import tqdm  
from PIL import Image
import torch
import torchvision.transforms as transforms

from model.ResNet18 import ResNet18
from utils.trainresnet import GPUinfo


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


#模型加载
def load_model(model_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ResNet18(num_classes=2, in_channels=3).to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model, device

#将测试结果保存到 ./test/result 路径下
def create_result_dir(result_path):
    result_dirs = [d for d in os.listdir(result_path) if os.path.isdir(os.path.join(result_path, d))]
    max_num = max([int(d.split("result")[-1]) for d in result_dirs if d.startswith("result")] + [0])

    new_result_dir = os.path.join(result_path, f"result{max_num + 1}")
    os.makedirs(new_result_dir, exist_ok=True)

    return new_result_dir

#逐张检测,将检测结果与标签对照后分类
def batch_detect(model, device, data_path, result_path):
    new_result_dir = create_result_dir(result_path)
    result_1_dir = os.path.join(new_result_dir, "result_1")
    result_2_dir = os.path.join(new_result_dir, "result_2")

    os.makedirs(result_1_dir, exist_ok=True)
    os.makedirs(result_2_dir, exist_ok=True)

    image_list = os.listdir(data_path)
    for image_file in tqdm(image_list, desc="Processing"):  
        image_path = os.path.join(data_path, image_file)
        image = Image.open(image_path).convert("RGB")
        image_tensor = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(image_tensor)
            _, predicted = output.max(1)

        if predicted.item() == 0:
            shutil.copy(image_path, os.path.join(result_1_dir, image_file))
        else:
            shutil.copy(image_path, os.path.join(result_2_dir, image_file))


if __name__ == "__main__":
    
    GPUinfo()
    model_path = "BGR-detection/bgr-detection/runs/exp10/best_model.pth"
    data_path = "BGR-detection/dataset/image/test"
    result_path = "BGR-detection/bgr-detection/test/result"

    model, device = load_model(model_path)
    batch_detect(model, device, data_path, result_path)

运行示例

运行前:

请确保代你的路径设置正确,

请确保你们数据和标签相对应(避免浪费时间训练一个无用的模型),

请确保各模块代码被放置在正确位置并且被正确的调用

运行示例如下:
在这里插入图片描述

我是在服务器上运行的,所以是Linux命令,在编译器上运行同理

每一次训练结果和测试结果都会被默认保存:
在这里插入图片描述

在这里插入图片描述

我的训练集不大,就800张图片,包含了两个类别,机器是3090的显卡,训练了35个epochs,用时一个小时左右,准确率可以保证在95以上

torch版本信息:

在这里插入图片描述

Author

代码放到了github上了:https://github.com/LINL12/BGR-detection/tree/master

因为是不常见任务,所以数据集我就不放上来了(估计你们也不需要),需要的话再私信我

Design by LINL

  • 2
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值