ResNet18+图像二分类+pytorch
项目背景
最近在检查项目的时候发现有部分的图像通过opencv打开保存后自动保存为了BGR图像,而windoms系统打开查看是默认RGB的,所以会造成一定的影响
可以在上图看到这种现象,所以我训练了一个简单是二分类模型来对BGR和RGB图像进行分类,使用的是pytorch框架,考虑到轻量化,所以Resnet18模型
项目大纲
数据处理
对于这种简单的二分类模型,数据集的处理相对较为简单。
我采用的是类似coco数据集的方式来对路径进行管理:(BGR-detection/bgr-detection/data/bgr-data.yaml)
# ├── BGR-detection
# └── dataset
# └── image/train
# └── label/train
path: BGR-detection/dataset # dataset root dir
train: image/train
test: image/test
label: label/train
val: #
# Classes
names:
0: BGR
1: RGB
训练集(BGR-detection/dataset/image/train)里存放BGR和RGB图片:
我是使用txt文件来存放他们的标签的:
当然,如果你也对图片进行和相应类别的命名的话也可以参考如下的方式来获取标签:
这种方式通过读取图片的名称信息来赋标签
下面的各部分代码通过模块化的方式来编写,便于后期的管理和调整<<<<
数据读取/加载
数据加载模块:(BGR-detection/bgr-detection/utils/dataLoader.py)
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import yaml
from model import ResNet18
# 读取.yaml
def read_yaml_config(config_file):
with open(config_file, "r") as file:
data = yaml.safe_load(file)
data_root = data['path']
train_path = os.path.join(data_root, data['train'])
label_path = os.path.join(data_root, data['label'])
class_names = data['names']
return data_root, train_path, label_path, class_names
#数据集标准化
def load_dataset(data_root, train_path, label_path, class_names, batch_size):
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
#自定义数据集
class CustomDataset(Dataset):
def __init__(self, data_dir, label_dir, class_names, transform=None):
self.data_dir = data_dir
self.label_dir = label_dir
self.class_names = class_names
self.transform = transform
self.images = os.listdir(data_dir)
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
image_name = self.images[idx]
image_path = os.path.join(self.data_dir, image_name)
label_path = os.path.join(self.label_dir, f"{os.path.splitext(image_name)[0]}.txt")
image = Image.open(image_path).convert('RGB')
with open(label_path, 'r') as label_file:
label = int(label_file.read().strip())
if self.transform:
image = self.transform(image)
return image, label
#数据加载器
train_dataset = CustomDataset(data_dir=train_path, label_dir=label_path, class_names=class_names, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
return train_loader
定义模型
线性层的输出神经元个数对应要分类的类别数量:(BGR-detection/bgr-detection/model/ResNet18.py)
import torch
import torchvision
from torch import nn
"""ResNet18(2,3)"""
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
if stride != 1 or in_channels != out_channels:
self.identity = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
else:
self.identity = nn.Identity()
def forward(self, x):
identity = self.identity(x)
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x += identity
x = self.relu(x)
return x
class ResNet18(nn.Module):
def __init__(self, num_classes, in_channels=3):
super().__init__()
# 根据resnet18结构定义网络层
self.net = nn.Sequential(
nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
ResidualBlock(64, 64, stride=1),
ResidualBlock(64, 64, stride=1),
ResidualBlock(64, 128, stride=2),
ResidualBlock(128, 128, stride=1),
ResidualBlock(128, 256, stride=2),
ResidualBlock(256, 256, stride=1),
ResidualBlock(256, 512, stride=2),
ResidualBlock(512, 512, stride=1),
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten(),
nn.Linear(512, num_classes)
)
def forward(self, x):
return self.net(x)
模型训练
定义训练过程:(BGR-detection/bgr-detection/utils/trainresnet.py)
import os
import shutil
from tqdm import tqdm
import torch
import torch.nn.init as init
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from model import ResNet18
#显卡信息
def GPUinfo():
ng = torch.cuda.device_count()
infos = [torch.cuda.get_device_properties(i) for i in range(ng)]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.__version__)
print("Devices:%d" %ng)
print(infos)
#模型参数初始化
def initialize_model_params(model):
for m in model.modules():
if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
init.kaiming_uniform_(m.weight)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, torch.nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
#训练
def train_model(data_root, train_path, label_path, class_names,lr, epochs, batch_size, net, train_loader, criterion, optimizer, scheduler):
GPUinfo()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net.to(device)
initialize_model_params(net)
# 保存模型和结果曲线图到 ./runs/路径下
if not os.path.exists("BGR-detection/bgr-detection/runs"):
os.makedirs("BGR-detection/bgr-detection/runs")
exp_num = 1
while os.path.exists(f"BGR-detection/bgr-detection/runs/exp{exp_num}"):
exp_num += 1
os.makedirs(f"BGR-detection/bgr-detection/runs/exp{exp_num}")
train_losses, test_losses = [], []
train_accuracies, test_accuracies = [], []
best_acc = 0.0
for epoch in range(epochs):
net.train()
total_loss = 0
correct_train = 0
total_train = 0
progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}, Training")
for images, labels in progress_bar:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
_, predicted = outputs.max(1)
total_train += labels.size(0)
correct_train += predicted.eq(labels).sum().item()
progress_bar.set_postfix(loss=total_loss / (len(train_loader) + 1), accuracy=100. * correct_train / total_train)
train_loss = total_loss / len(train_loader)
train_accuracy = 100. * correct_train / total_train
train_losses.append(train_loss)
train_accuracies.append(train_accuracy)
# 学习率调整
scheduler.step()
net.eval()
total_test = 0
correct_test = 0
with torch.no_grad():
for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}, Testing"):
images, labels = images.to(device), labels.to(device)
outputs = net(images)
_, predicted = outputs.max(1)
total_test += labels.size(0)
correct_test += predicted.eq(labels).sum().item()
test_loss = total_loss / len(train_loader)
test_accuracy = 100. * correct_test / total_test
test_losses.append(test_loss)
test_accuracies.append(test_accuracy)
print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}, "
f"Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")
# 保存最新模型和最好模型
torch.save(net.state_dict(), f"BGR-detection/bgr-detection/runs/exp{exp_num}/latest_model.pth")
#成功率覆盖
if test_accuracy > best_acc:
best_acc = test_accuracy
shutil.copyfile(f"BGR-detection/bgr-detection/runs/exp{exp_num}/latest_model.pth",
f"BGR-detection/bgr-detection/runs/exp{exp_num}/best_model.pth")
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(range(1, epochs + 1), train_losses, label='Train Loss')
plt.plot(range(1, epochs + 1), test_losses, label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(range(1, epochs + 1), train_accuracies, label='Train Accuracy')
plt.plot(range(1, epochs + 1), test_accuracies, label='Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.savefig(f"BGR-detection/bgr-detection/runs/exp{exp_num}/training_plot.png")
主程序训练:(BGR-detection/bgr-detection/train.py)
import torch
from torch import nn
from torch.optim.lr_scheduler import StepLR
from model.ResNet18 import ResNet18
from utils.dataLoader import load_dataset
from utils.dataLoader import read_yaml_config
from utils.trainresnet import train_model
if __name__ == "__main__":
learning_rate = 0.01
num_epochs = 5
batch_size = 32
config_file = "BGR-detection/bgr-detection/data/bgr-data.yaml"
data_root, train_path, label_path, class_names = read_yaml_config(config_file)
train_loader_d = load_dataset(data_root, train_path, label_path, class_names, batch_size)
"""2分类任务,3通道图像"""
net_d = ResNet18(num_classes=2, in_channels=3)
criterion_d = nn.CrossEntropyLoss()
optimizer_d = torch.optim.SGD(net_d.parameters(), lr=learning_rate, momentum=0.9)
scheduler_d = StepLR(optimizer_d, step_size=3, gamma=0.1)
#训练
train_model(data_root, train_path, label_path, class_names, lr=learning_rate, epochs=num_epochs,
batch_size=batch_size, net=net_d, train_loader=train_loader_d, criterion=criterion_d, optimizer=optimizer_d, scheduler=scheduler_d)
检测
因为我的需求是对一整个文件夹中的图像进行分类,并将结果分类存放,所以没有设置过多的应用场景,大家可以根据自身需修改:(BGR-detection/bgr-detection/detection.py)
import os
import shutil
from tqdm import tqdm
from PIL import Image
import torch
import torchvision.transforms as transforms
from model.ResNet18 import ResNet18
from utils.trainresnet import GPUinfo
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
#模型加载
def load_model(model_path):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet18(num_classes=2, in_channels=3).to(device)
model.load_state_dict(torch.load(model_path))
model.eval()
return model, device
#将测试结果保存到 ./test/result 路径下
def create_result_dir(result_path):
result_dirs = [d for d in os.listdir(result_path) if os.path.isdir(os.path.join(result_path, d))]
max_num = max([int(d.split("result")[-1]) for d in result_dirs if d.startswith("result")] + [0])
new_result_dir = os.path.join(result_path, f"result{max_num + 1}")
os.makedirs(new_result_dir, exist_ok=True)
return new_result_dir
#逐张检测,将检测结果与标签对照后分类
def batch_detect(model, device, data_path, result_path):
new_result_dir = create_result_dir(result_path)
result_1_dir = os.path.join(new_result_dir, "result_1")
result_2_dir = os.path.join(new_result_dir, "result_2")
os.makedirs(result_1_dir, exist_ok=True)
os.makedirs(result_2_dir, exist_ok=True)
image_list = os.listdir(data_path)
for image_file in tqdm(image_list, desc="Processing"):
image_path = os.path.join(data_path, image_file)
image = Image.open(image_path).convert("RGB")
image_tensor = transform(image).unsqueeze(0).to(device)
with torch.no_grad():
output = model(image_tensor)
_, predicted = output.max(1)
if predicted.item() == 0:
shutil.copy(image_path, os.path.join(result_1_dir, image_file))
else:
shutil.copy(image_path, os.path.join(result_2_dir, image_file))
if __name__ == "__main__":
GPUinfo()
model_path = "BGR-detection/bgr-detection/runs/exp10/best_model.pth"
data_path = "BGR-detection/dataset/image/test"
result_path = "BGR-detection/bgr-detection/test/result"
model, device = load_model(model_path)
batch_detect(model, device, data_path, result_path)
运行示例
运行前:
请确保代你的路径设置正确,
请确保你们数据和标签相对应(避免浪费时间训练一个无用的模型),
请确保各模块代码被放置在正确位置并且被正确的调用
运行示例如下:
我是在服务器上运行的,所以是Linux命令,在编译器上运行同理
每一次训练结果和测试结果都会被默认保存:
我的训练集不大,就800张图片,包含了两个类别,机器是3090的显卡,训练了35个epochs,用时一个小时左右,准确率可以保证在95以上
torch版本信息:
Author
代码放到了github上了:https://github.com/LINL12/BGR-detection/tree/master
因为是不常见任务,所以数据集我就不放上来了(估计你们也不需要),需要的话再私信我
Design by LINL