目标检测项目:设定正负样本实现单目标检测

目标检测

单目标检测

数据准备

数据1:检测目标

数据2:背景图(负样本)

数据3:检测目标+背景图(正样本)

目标+背景图、背景图作为正负样本,作为二分类问题,来训练模型

处理步骤

  1. 打开背景图
  2. 设置背景图大小
  3. 打开检测目标
  4. 随机缩放检测目标
  5. 检测目标以四通道方式粘贴到背景图上

代码实现:全卷积

优势

  1. 提高计算效率
  2. 灵活,不同尺寸图像,不需要额外代码调整

代码附录

数据集

import glob
import os.path

import numpy as np
import torch
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

t = transforms.Compose([
    transforms.ToTensor()
])
path = r'E:\git_ai_coder\05-od\20240228\images\sample'


class YMDataset(Dataset):
    def __init__(self):
        super().__init__()
        # 读取数据集
        img_paths = glob.glob(os.path.join(path, '*'))
        self.data = []
        for img_path in img_paths:
            infos = img_path.split('\\')[-1].split('.')
            # 1:6
            # x1 y1 x2 y2 cls
            label = infos[1:6]
            label = torch.tensor(np.array(label, dtype=np.float_), dtype=torch.float32)
            self.data.append((label, img_path))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        label, img_path = self.data[index]
        # 对坐标进行归一化
        loc_label = label[:4] / 300
        # cls_label = label[-1:]
        cls_label = label[4:]
        img_pil = Image.open(img_path)
        # 归一化处理 通道交换 HWC --> CHW
        img_norm = t(img_pil)
        return loc_label, cls_label, img_norm


if __name__ == '__main__':
    dataset = YMDataset()
    print(len(dataset))
    print(dataset[0])
    pass

模型

全连接

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(256, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(256, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(512, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2)
        )

        # 方式1
        # self.out_layer = nn.Sequential(
        #     nn.Flatten(),
        #     nn.Linear(512, 5)
        # )

        # 方式2
        self.out_loc_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 4),
            # 模型训练收敛速度更快
            nn.Sigmoid()
        )

        self.out_cls_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 1),
            # 模型训练收敛速度更快
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.backbone(x)

        # 方式1
        # out = self.out_layer(x)

        # 方式2
        out_loc = self.out_loc_layer(x)
        out_cls = self.out_cls_layer(x)
        # 拼接数据 [[x1 y1 x2 y2]] [[cls]] --> [[x1 y1 x2 y2 cls]]
        out = torch.cat((out_loc, out_cls), dim=1)
        return out

全卷积

class Net2(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 64, 3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 128, 3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(256, 256, 3, padding=1, bias=False), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(256, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(512, 512, 3, padding=1, bias=False), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2)
        )

        # 方式1
        self.out_layer = nn.Sequential(
            nn.Conv2d(512, 5, 1),
            nn.Flatten(),
            nn.Sigmoid()
        )

        # 方式2
        self.out_loc_layer = nn.Sequential(
            nn.Conv2d(512, 4, 1),
            nn.Flatten(),
            # 模型训练收敛速度更快
            nn.Sigmoid()
        )

        self.out_cls_layer = nn.Sequential(
            nn.Conv2d(512, 1, 1),
            nn.Flatten(),
            # 模型训练收敛速度更快
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.backbone(x)

        # 方式1
        # out = self.out_layer(x)

        # 方式2
        out_loc = self.out_loc_layer(x)
        out_cls = self.out_cls_layer(x)
        # 拼接数据 [[x1 y1 x2 y2]] [[cls]] --> [[x1 y1 x2 y2 cls]]
        out = torch.cat((out_loc, out_cls), dim=1)
        return out

训练

"""
目标检测
分类:二分类
损失函数:二分类损失函数BCELoss
回归(坐标)
均方差损失函数
MSELoss
"""
import os.path

import torch.nn
import tqdm
from PIL import Image, ImageDraw
from torchvision import transforms

from dataset import YMDataset
from torch.utils.data import DataLoader
from net import Net, Net2
model_path = 'weights/best_fcnn2.pt'


class Trainner:
    def __init__(self):
        # 数据准备
        train_set = YMDataset()
        test_set = YMDataset()
        self.train_loader = DataLoader(train_set, batch_size=10, shuffle=True)
        self.test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
        # 网络
        # net = Net()
        net = Net2()
        if os.path.exists(model_path):
            print('loading model parameters successfully')
            net.load_state_dict(torch.load(model_path))
        self.net = net
        # 损失函数
        self.loc_loss_fn = torch.nn.MSELoss()
        # BCELoss内部没有Sigmoid 需要外接
        self.cls_loss_fn = torch.nn.BCELoss()
        # 优化器
        self.opt = torch.optim.Adam(net.parameters())

    def train(self, epoch):
        sum_loss = 0
        self.net.train()
        for loc_label, cls_label, img in tqdm.tqdm(self.train_loader, desc='train', total=len(self.train_loader)):
            pred_out = self.net(img)
            pred_loc_out = pred_out[:, :4]
            pred_cls_out = pred_out[:, 4:]
            # 损失函数
            loc_loss = self.loc_loss_fn(pred_loc_out, loc_label)
            cls_loss = self.cls_loss_fn(pred_cls_out, cls_label)
            loss = cls_loss + loc_loss
            self.opt.zero_grad()
            loss.backward()
            self.opt.step()
            sum_loss += loss.item()

        avg_loss = sum_loss / len(self.train_loader)
        print(f'train: epoch:{epoch}\t avg_loss:{avg_loss}')
        torch.save(self.net.state_dict(), model_path)

    def test(self, epoch):
        self.net.eval()
        for loc_label, cls_label, img in self.test_loader:
            pred_out = self.net(img)
            pred_loc_out = pred_out[:, :4]
            pred_cls_out = pred_out[:, 4:]
            # 显示图像
            # self.show_image(img, loc_label, pred_loc_out)
            print(f'test: epoch:{epoch} cls:{int(pred_cls_out[0].item())}\t'
                  f'loc:{torch.tensor(pred_loc_out[0] * 300, dtype=torch.int32).tolist()}')

    def show_image(self, img, box, boxes):
        # 张量转为PIL图像
        img_pil = transforms.ToPILImage()(img[0])
        iou_val = self.bbox_iou(box[0], boxes)
        if iou_val > 0.5:
            label_box = box[0] * 300
            pred_box = boxes[0] * 300
            label_x1, label_y1, label_x2, label_y2 = label_box
            pred_x1, pred_y1, pred_x2, pred_y2 = pred_box
            draw = ImageDraw.Draw(img_pil)
            draw.rectangle((label_x1, label_y1, label_x2, label_y2), outline='red', width=2)
            draw.rectangle((pred_x1, pred_y1, pred_x2, pred_y2), outline='green', width=2)
            img_pil.show()

    def bbox_iou(self, box, boxes):
        """
        x1 y1 x2 y2
        """
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        boxes_areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        l_x = torch.maximum(box[0], boxes[:, 0])
        l_y = torch.maximum(box[1], boxes[:, 1])
        r_x = torch.minimum(box[2], boxes[:, 2])
        r_y = torch.minimum(box[3], boxes[:, 3])
        w = torch.maximum(r_x - l_x, torch.tensor(0))
        h = torch.maximum(r_y - l_y, torch.tensor(0))
        inter_area = w * h
        iou_val = inter_area / (box_area + boxes_areas - inter_area)
        return iou_val

    def run(self):
        for epoch in range(100):
            self.train(epoch)
            self.test(epoch)


if __name__ == '__main__':
    trainner = Trainner()
    trainner.run()
    pass
  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

饭碗、碗碗香

感谢壮士的慷概解囊!

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值