目标检测项目：设定正负样本实现单目标检测

最新推荐文章于 2024-07-21 12:40:56 发布

饭碗、碗碗香

最新推荐文章于 2024-07-21 12:40:56 发布

阅读量326

点赞数 2

分类专栏：目标检测计算机视觉人工智能文章标签：目标检测人工智能计算机视觉

本文链接：https://blog.csdn.net/weixin_44240478/article/details/138867769

版权

计算机视觉同时被 3 个专栏收录

5 篇文章 0 订阅

订阅专栏

目标检测

3 篇文章 0 订阅

订阅专栏

人工智能

1 篇文章 0 订阅

订阅专栏

目标检测

单目标检测

数据准备

数据1：检测目标

数据2：背景图（负样本）

数据3：检测目标+背景图（正样本）

目标+背景图、背景图作为正负样本，作为二分类问题，来训练模型

处理步骤

打开背景图
设置背景图大小
打开检测目标
随机缩放检测目标
检测目标以四通道方式粘贴到背景图上

代码实现：全卷积

优势

提高计算效率
灵活，不同尺寸图像，不需要额外代码调整

代码附录

数据集

import glob
import os.path

import numpy as np
import torch
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

t = transforms.Compose([
    transforms.ToTensor()
])
path = r'E:\git_ai_coder\05-od\20240228\images\sample'


class YMDataset(Dataset):
    def __init__(self):
        super().__init__()
        # 读取数据集
        img_paths = glob.glob(os.path.join(path, '*'))
        self.data = []
        for img_path in img_paths:
            infos = img_path.split('\\')[-1].split('.')
            # 1:6
            # x1 y1 x2 y2 cls
            label = infos[1:6]
            label = torch.tensor(np.array(label, dtype=np.float_), dtype=torch.float32)
            self.data.append((label, img_path))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        label, img_path = self.data[index]
        # 对坐标进行归一化
        loc_label = label[:4] / 300
        # cls_label = label[-1:]
        cls_label = label[4:]
        img_pil = Image.open(img_path)
        # 归一化处理 通道交换 HWC --> CHW
        img_norm = t(img_pil)
        return loc_label, cls_label, img_norm


if __name__ == '__main__':
    dataset = YMDataset()
    print(len(dataset))
    print(dataset[0])
    pass

模型

全连接

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(256, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(256, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(512, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2)
        )

        # 方式1
        # self.out_layer = nn.Sequential(
        #     nn.Flatten(),
        #     nn.Linear(512, 5)
        # )

        # 方式2
        self.out_loc_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 4),
            # 模型训练收敛速度更快
            nn.Sigmoid()
        )

        self.out_cls_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 1),
            # 模型训练收敛速度更快
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.backbone(x)

        # 方式1
        # out = self.out_layer(x)

        # 方式2
        out_loc = self.out_loc_layer(x)
        out_cls = self.out_cls_layer(x)
        # 拼接数据 [[x1 y1 x2 y2]] [[cls]] --> [[x1 y1 x2 y2 cls]]
        out = torch.cat((out_loc, out_cls), dim=1)
        return out

全卷积

class Net2(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(256, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(256, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(512, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2)
        )

        # 方式1
        self.out_layer = nn.Sequential(
            nn.Conv2d(512, 5, 1),
            nn.Flatten(),
            nn.Sigmoid()
        )

        # 方式2
        self.out_loc_layer = nn.Sequential(
            nn.Conv2d(512, 4, 1),
            nn.Flatten(),
            # 模型训练收敛速度更快
            nn.Sigmoid()
        )

        self.out_cls_layer = nn.Sequential(
            nn.Conv2d(512, 1, 1),
            nn.Flatten(),
            # 模型训练收敛速度更快
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.backbone(x)

        # 方式1
        # out = self.out_layer(x)

        # 方式2
        out_loc = self.out_loc_layer(x)
        out_cls = self.out_cls_layer(x)
        # 拼接数据 [[x1 y1 x2 y2]] [[cls]] --> [[x1 y1 x2 y2 cls]]
        out = torch.cat((out_loc, out_cls), dim=1)
        return out

训练

"""
目标检测
分类：二分类
损失函数：二分类损失函数BCELoss
回归（坐标）
均方差损失函数
MSELoss
"""
import os.path

import torch.nn
import tqdm
from PIL import Image, ImageDraw
from torchvision import transforms

from dataset import YMDataset
from torch.utils.data import DataLoader
from net import Net, Net2
model_path = 'weights/best_fcnn2.pt'


class Trainner:
    def __init__(self):
        # 数据准备
        train_set = YMDataset()
        test_set = YMDataset()
        self.train_loader = DataLoader(train_set, batch_size=10, shuffle=True)
        self.test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
        # 网络
        # net = Net()
        net = Net2()
        if os.path.exists(model_path):
            print('loading model parameters successfully')
            net.load_state_dict(torch.load(model_path))
        self.net = net
        # 损失函数
        self.loc_loss_fn = torch.nn.MSELoss()
        # BCELoss内部没有Sigmoid 需要外接
        self.cls_loss_fn = torch.nn.BCELoss()
        # 优化器
        self.opt = torch.optim.Adam(net.parameters())

    def train(self, epoch):
        sum_loss = 0
        self.net.train()
        for loc_label, cls_label, img in tqdm.tqdm(self.train_loader, desc='train', total=len(self.train_loader)):
            pred_out = self.net(img)
            pred_loc_out = pred_out[:, :4]
            pred_cls_out = pred_out[:, 4:]
            # 损失函数
            loc_loss = self.loc_loss_fn(pred_loc_out, loc_label)
            cls_loss = self.cls_loss_fn(pred_cls_out, cls_label)
            loss = cls_loss + loc_loss
            self.opt.zero_grad()
            loss.backward()
            self.opt.step()
            sum_loss += loss.item()

        avg_loss = sum_loss / len(self.train_loader)
        print(f'train: epoch:{epoch}\t avg_loss:{avg_loss}')
        torch.save(self.net.state_dict(), model_path)

    def test(self, epoch):
        self.net.eval()
        for loc_label, cls_label, img in self.test_loader:
            pred_out = self.net(img)
            pred_loc_out = pred_out[:, :4]
            pred_cls_out = pred_out[:, 4:]
            # 显示图像
            # self.show_image(img, loc_label, pred_loc_out)
            print(f'test: epoch:{epoch} cls:{int(pred_cls_out[0].item())}\t'
                  f'loc:{torch.tensor(pred_loc_out[0] * 300, dtype=torch.int32).tolist()}')

    def show_image(self, img, box, boxes):
        # 张量转为PIL图像
        img_pil = transforms.ToPILImage()(img[0])
        iou_val = self.bbox_iou(box[0], boxes)
        if iou_val > 0.5:
            label_box = box[0] * 300
            pred_box = boxes[0] * 300
            label_x1, label_y1, label_x2, label_y2 = label_box
            pred_x1, pred_y1, pred_x2, pred_y2 = pred_box
            draw = ImageDraw.Draw(img_pil)
            draw.rectangle((label_x1, label_y1, label_x2, label_y2), outline='red', width=2)
            draw.rectangle((pred_x1, pred_y1, pred_x2, pred_y2), outline='green', width=2)
            img_pil.show()

    def bbox_iou(self, box, boxes):
        """
        x1 y1 x2 y2
        """
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        boxes_areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        l_x = torch.maximum(box[0], boxes[:, 0])
        l_y = torch.maximum(box[1], boxes[:, 1])
        r_x = torch.minimum(box[2], boxes[:, 2])
        r_y = torch.minimum(box[3], boxes[:, 3])
        w = torch.maximum(r_x - l_x, torch.tensor(0))
        h = torch.maximum(r_y - l_y, torch.tensor(0))
        inter_area = w * h
        iou_val = inter_area / (box_area + boxes_areas - inter_area)
        return iou_val

    def run(self):
        for epoch in range(100):
            self.train(epoch)
            self.test(epoch)


if __name__ == '__main__':
    trainner = Trainner()
    trainner.run()
    pass