手写--单目标检测

最新推荐文章于 2023-10-07 08:50:58 发布

默执_

最新推荐文章于 2023-10-07 08:50:58 发布

阅读量195

点赞数

分类专栏： Pytorch框架文章标签：目标检测计算机视觉深度学习

本文链接：https://blog.csdn.net/qq_42102546/article/details/129657403

版权

Pytorch框架专栏收录该内容

30 篇文章 4 订阅

订阅专栏

该文介绍了如何使用PyTorch进行目标检测任务，包括读取自定义数据集，构建包含多个损失函数的网络模型，以及进行训练。数据集分为训练和测试集，网络模型由多个卷积层组成，用于预测目标存在、坐标和类别。训练过程中，结合BCEWithLogitsLoss、MSELoss和CrossEntropyLoss进行优化。

摘要由CSDN通过智能技术生成

做3件事：

读取数据
搭建网络
训练代码

在目标检测任务中通常有3个损失需要进行训练：1.图片是否存在检测目标、2.目标左上角与右下角坐标位置、3.目标为什么类别

读取数据代码：

from torch.utils.data import Dataset
import os
import cv2
import numpy as np
import torch


class MyDataset(Dataset):
    # 导入数据 保存路径即可

    def __init__(self, is_train=True):

        self.dataset = []
        self.train = "data/images/train/"
        self.test = "data/images/test/"

        self.lab = []
        self.train_lab = "data/labels/train/"
        self.test_lab = "data/labels/test/"

        if is_train:
            # 读取图像
            path_img = os.listdir(self.train)
            # print(path_img)
            for i in path_img:
                self.dataset.append(os.path.join(self.train, i))
            # 读取标签
            path_lab = os.listdir(self.train_lab)
            for j in path_lab:
                self.lab.append((os.path.join(self.train_lab, j)))

        else:
            # 读取图像
            path_img = os.listdir(self.test)
            # print(path_img)
            for i in path_img:
                self.dataset.append(os.path.join(self.test, i))
            # 读取标签
            path_lab = os.listdir(self.test_lab)
            for j in path_lab:
                self.lab.append((os.path.join(self.test_lab, j)))

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        data = self.dataset[index]
        img = cv2.imread(data)
        img = img / 255
        # img = cv2.resize(img, (300, 300))

        # cv2的图像：           H-高  W-宽  C-通道
        # 卷积需要的图像：N-批次、C-通道、H-高、W-宽、

        # 根据索引 交换位置：cv2 的索引是正常的 0,1,2 现在需要换位置

        # 方法一 np的方法
        # new_img = np.transpose(img, (2, 0, 1))

        # 方法2 torch的方法
        new_img = torch.tensor(img).permute(2, 0, 1)
        # print(new_img)

        # 读取标签
        lab = self.lab[index]
        fo = open(lab, "r")

        label, str_1, str_2, str_3, str_4, str_class = fo.readline().split(" ")

        return np.float32(new_img), np.float32(label),\
               np.float32([int(str_1) / 300, int(str_2) / 300, int(str_3) / 300, int(str_4) / 300]),\
               int(str_class)


if __name__ == '__main__':
    path = MyDataset()
    for i in path:
        print(i)

搭建网络

from torch import nn
import torch


class Mynet(nn.Module):
    def __init__(self):
        super(Mynet, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(3, 16, 3),
            nn.ReLU(),
            nn.MaxPool2d(3),

            nn.Conv2d(16, 22, 3),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(22, 32, 5),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 5),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 82, 3),
            nn.ReLU(),

            nn.Conv2d(82, 128, 3),
            nn.ReLU(),

            nn.Conv2d(128, 25, 3),
            nn.ReLU(),
        )


        # 是否为背景
        self.label_layer = nn.Sequential(
            nn.Conv2d(25, 1, 3),
            nn.ReLU(),
        )

        # x1 y1 x2 y2 坐标位置
        self.position_layer = nn.Sequential(
            nn.Conv2d(25, 4, 3),
            nn.ReLU(),
        )
        # 多类别
        self.sort_layer = nn.Sequential(
            nn.Conv2d(25, 20, 3),
            nn.ReLU(),
        )

    def forward(self, x):
        out = self.layers(x)
        # 当dim不设值时，去掉输入的tensor的所有维度为1的维度;
        # 当dim为某一整数（0 <= dim < input.dim()）时，判断dim维的维度是否为1，若是则去掉，否则不变。
        # 另外，当input是一维的时候，squeeze不变

        # dim = 3 是指 torch.Size[1,1,1,1] 从0开始 左向右数 到3，也就是第4个
        label = self.label_layer(out)
        label = torch.squeeze(label, dim=3)
        label = torch.squeeze(label, dim=2)
        label = torch.squeeze(label, dim=1)

        position = self.position_layer(out)
        position = torch.squeeze(position, dim=3)
        position = torch.squeeze(position, dim=2)

        sort = self.sort_layer(out)
        sort = torch.squeeze(sort, dim=3)
        sort = torch.squeeze(sort, dim=2)

        return label, position, sort


if __name__ == '__main__':
    net = Mynet()
    x = torch.randn(3, 3, 300, 300)

    print(net(x)[0].shape)
    print(net(x)[1].shape)
    print(net(x)[2].shape)

训练代码

from net import Mynet
from data import MyDataset
from torch.utils.data import DataLoader
# from torch.utils.tensorboard import SummaryWriter
from torch import nn, optim
import torch
import datetime
import os

DEVICE = 'cuda'


class Train:
    def __init__(self, weight_path):
        # self.summaryWriter = SummaryWriter('logs')

        self.train_dataset = MyDataset(is_train=True)
        # self.test_dataset = MyDataset(is_train=False)

        # 加载数据
        # 打乱shuffle
        self.train_dataLoader = DataLoader(self.train_dataset, batch_size=50, shuffle=True)
        # self.test_dataloader = DataLoader(self.test_dataset, batch_size=4, shuffle=True)

        # 加载模型
        self.net = Mynet().to(DEVICE)

        if os.path.exists(weight_path):
            self.net.load_state_dict(torch.load(weight_path))

        self.opt = optim.Adam(self.net.parameters())

        # 2分类
        self.label_loss_fun = nn.BCEWithLogitsLoss()  # 自带激活
        # 回归
        self.positon_loss_fun = nn.MSELoss()
        # 多分类
        self.sort_loss_fun = nn.CrossEntropyLoss()  # 自带

        # self.train = True
        # self.test = True

    def __call__(self):
        for epoch in range(1000):
            for i, (img, label, position, sort) in enumerate(self.train_dataLoader):

                img, label, position, sort = img.to(DEVICE), label.to(DEVICE), position.to(DEVICE), sort.to(DEVICE)
                # 对3个进行优化器求解
                out_label, out_osition, out_sort = self.net(img)

                label_loss = self.label_loss_fun(out_label, label)
                osition_loss = self.positon_loss_fun(out_osition, position)
                sort_loss = self.sort_loss_fun(out_sort, sort)

                train_loss = 0.2 * label_loss + osition_loss * 0.6 + 0.2 * sort_loss

                self.opt.zero_grad()
                train_loss.backward()
                self.opt.step()

                if i % 10 == 0:
                    print(f'第{i}次  train_loss = {train_loss.item()}')
            print(f"第{epoch}轮")
            date_time = str(datetime.datetime.now()).replace(' ', '-').replace(':', '_').replace('.', '_')
            torch.save(self.net.state_dict(), f'p/{date_time}-{epoch}.pt')


if __name__ == '__main__':
    train = Train("2023-03-19-20_54_33_272814-17.pt")
    train()

测试代码

from net import Mynet
import cv2
import torch


def cv_show(neme, img):
    cv2.imshow(neme, img)  # 必要参数：名字和变量名
    cv2.waitKey(0)  # 括号中0=任意键终止，单位为毫秒级别
    cv2.destroyAllWindows()  # 关闭所有窗口--图片


model = Mynet()
model.load_state_dict(torch.load('p/1.pt', map_location=torch.device('cpu')))

img = cv2.imread("1.jpg")
img = cv2.resize(img, (300, 300))
img_data = torch.tensor(img).permute(2, 0, 1)
img_data = torch.unsqueeze(img_data, dim=0) / 255
rst = model(img_data)
label = torch.sigmoid(rst[0])
sort = torch.softmax(rst[2], dim=1)


rst_1 = rst[1] * 300
x1, y1, x2, y2 = rst_1.cpu().detach().numpy()[0]


# 绘制矩形 参数：图像、左上坐标、右下坐标、颜色(蓝B,绿G,红R)、粗细
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3)
cv_show('neme', img)

参考：github源码地址：https://github.com/qfs1980398040/pytorch-Single-target-detection-Minions
参考：原始数据集链接：https://pan.baidu.com/s/1AVifpTWr29BEfQYGhuEd_w
提取码：p5rj