目标检测
单目标检测
数据准备
数据1:检测目标
数据2:背景图(负样本)
数据3:检测目标+背景图(正样本)
目标+背景图、背景图作为正负样本,作为二分类问题,来训练模型
处理步骤
- 打开背景图
- 设置背景图大小
- 打开检测目标
- 随机缩放检测目标
- 检测目标以四通道方式粘贴到背景图上
代码实现:全卷积
优势
- 提高计算效率
- 灵活,不同尺寸图像,不需要额外代码调整
代码附录
数据集
import glob
import os.path
import numpy as np
import torch
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
t = transforms.Compose([
transforms.ToTensor()
])
path = r'E:\git_ai_coder\05-od\20240228\images\sample'
class YMDataset(Dataset):
def __init__(self):
super().__init__()
# 读取数据集
img_paths = glob.glob(os.path.join(path, '*'))
self.data = []
for img_path in img_paths:
infos = img_path.split('\\')[-1].split('.')
# 1:6
# x1 y1 x2 y2 cls
label = infos[1:6]
label = torch.tensor(np.array(label, dtype=np.float_), dtype=torch.float32)
self.data.append((label, img_path))
def __len__(self):
return len(self.data)
def __getitem__(self, index):
label, img_path = self.data[index]
# 对坐标进行归一化
loc_label = label[:4] / 300
# cls_label = label[-1:]
cls_label = label[4:]
img_pil = Image.open(img_path)
# 归一化处理 通道交换 HWC --> CHW
img_norm = t(img_pil)
return loc_label, cls_label, img_norm
if __name__ == '__main__':
dataset = YMDataset()
print(len(dataset))
print(dataset[0])
pass
模型
全连接
class Net(nn.Module):
def __init__(self):
super().__init__()
self.backbone = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(64, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(64, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(128, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(128, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(256, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(256, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(512, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2)
)
# 方式1
# self.out_layer = nn.Sequential(
# nn.Flatten(),
# nn.Linear(512, 5)
# )
# 方式2
self.out_loc_layer = nn.Sequential(
nn.Flatten(),
nn.Linear(512, 4),
# 模型训练收敛速度更快
nn.Sigmoid()
)
self.out_cls_layer = nn.Sequential(
nn.Flatten(),
nn.Linear(512, 1),
# 模型训练收敛速度更快
nn.Sigmoid()
)
def forward(self, x):
x = self.backbone(x)
# 方式1
# out = self.out_layer(x)
# 方式2
out_loc = self.out_loc_layer(x)
out_cls = self.out_cls_layer(x)
# 拼接数据 [[x1 y1 x2 y2]] [[cls]] --> [[x1 y1 x2 y2 cls]]
out = torch.cat((out_loc, out_cls), dim=1)
return out
全卷积
class Net2(nn.Module):
def __init__(self):
super().__init__()
self.backbone = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(64, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(64, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(128, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(128, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(256, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(256, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2),
nn.Conv2d(512, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2)
)
# 方式1
self.out_layer = nn.Sequential(
nn.Conv2d(512, 5, 1),
nn.Flatten(),
nn.Sigmoid()
)
# 方式2
self.out_loc_layer = nn.Sequential(
nn.Conv2d(512, 4, 1),
nn.Flatten(),
# 模型训练收敛速度更快
nn.Sigmoid()
)
self.out_cls_layer = nn.Sequential(
nn.Conv2d(512, 1, 1),
nn.Flatten(),
# 模型训练收敛速度更快
nn.Sigmoid()
)
def forward(self, x):
x = self.backbone(x)
# 方式1
# out = self.out_layer(x)
# 方式2
out_loc = self.out_loc_layer(x)
out_cls = self.out_cls_layer(x)
# 拼接数据 [[x1 y1 x2 y2]] [[cls]] --> [[x1 y1 x2 y2 cls]]
out = torch.cat((out_loc, out_cls), dim=1)
return out
训练
"""
目标检测
分类:二分类
损失函数:二分类损失函数BCELoss
回归(坐标)
均方差损失函数
MSELoss
"""
import os.path
import torch.nn
import tqdm
from PIL import Image, ImageDraw
from torchvision import transforms
from dataset import YMDataset
from torch.utils.data import DataLoader
from net import Net, Net2
model_path = 'weights/best_fcnn2.pt'
class Trainner:
def __init__(self):
# 数据准备
train_set = YMDataset()
test_set = YMDataset()
self.train_loader = DataLoader(train_set, batch_size=10, shuffle=True)
self.test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
# 网络
# net = Net()
net = Net2()
if os.path.exists(model_path):
print('loading model parameters successfully')
net.load_state_dict(torch.load(model_path))
self.net = net
# 损失函数
self.loc_loss_fn = torch.nn.MSELoss()
# BCELoss内部没有Sigmoid 需要外接
self.cls_loss_fn = torch.nn.BCELoss()
# 优化器
self.opt = torch.optim.Adam(net.parameters())
def train(self, epoch):
sum_loss = 0
self.net.train()
for loc_label, cls_label, img in tqdm.tqdm(self.train_loader, desc='train', total=len(self.train_loader)):
pred_out = self.net(img)
pred_loc_out = pred_out[:, :4]
pred_cls_out = pred_out[:, 4:]
# 损失函数
loc_loss = self.loc_loss_fn(pred_loc_out, loc_label)
cls_loss = self.cls_loss_fn(pred_cls_out, cls_label)
loss = cls_loss + loc_loss
self.opt.zero_grad()
loss.backward()
self.opt.step()
sum_loss += loss.item()
avg_loss = sum_loss / len(self.train_loader)
print(f'train: epoch:{epoch}\t avg_loss:{avg_loss}')
torch.save(self.net.state_dict(), model_path)
def test(self, epoch):
self.net.eval()
for loc_label, cls_label, img in self.test_loader:
pred_out = self.net(img)
pred_loc_out = pred_out[:, :4]
pred_cls_out = pred_out[:, 4:]
# 显示图像
# self.show_image(img, loc_label, pred_loc_out)
print(f'test: epoch:{epoch} cls:{int(pred_cls_out[0].item())}\t'
f'loc:{torch.tensor(pred_loc_out[0] * 300, dtype=torch.int32).tolist()}')
def show_image(self, img, box, boxes):
# 张量转为PIL图像
img_pil = transforms.ToPILImage()(img[0])
iou_val = self.bbox_iou(box[0], boxes)
if iou_val > 0.5:
label_box = box[0] * 300
pred_box = boxes[0] * 300
label_x1, label_y1, label_x2, label_y2 = label_box
pred_x1, pred_y1, pred_x2, pred_y2 = pred_box
draw = ImageDraw.Draw(img_pil)
draw.rectangle((label_x1, label_y1, label_x2, label_y2), outline='red', width=2)
draw.rectangle((pred_x1, pred_y1, pred_x2, pred_y2), outline='green', width=2)
img_pil.show()
def bbox_iou(self, box, boxes):
"""
x1 y1 x2 y2
"""
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
l_x = torch.maximum(box[0], boxes[:, 0])
l_y = torch.maximum(box[1], boxes[:, 1])
r_x = torch.minimum(box[2], boxes[:, 2])
r_y = torch.minimum(box[3], boxes[:, 3])
w = torch.maximum(r_x - l_x, torch.tensor(0))
h = torch.maximum(r_y - l_y, torch.tensor(0))
inter_area = w * h
iou_val = inter_area / (box_area + boxes_areas - inter_area)
return iou_val
def run(self):
for epoch in range(100):
self.train(epoch)
self.test(epoch)
if __name__ == '__main__':
trainner = Trainner()
trainner.run()
pass