Pytorch实现YOLO V3(过拟合版本)


代码目录:
在这里插入图片描述

一:cfg.py

# 配置文件
IMG_HEIGHT = 416
IMG_WIDTH = 416
CLASS_NUM = 4

"anchor box是对coco数据集聚类获得"
ANCHORS_GROUP_KMEANS = {  # k-means聚类得到的先验框(建议框)
    52: [[10, 13], [16, 30], [33, 23]],  # 在52尺寸特征图上聚类出来的框
    26: [[30, 61], [62, 45], [59, 119]],
    13: [[116, 90], [156, 198], [373, 326]]}

ANCHORS_GROUP = {  # 人工根据经验设置的框
    13: [[360, 360], [360, 180], [180, 360]],
    26: [[180, 180], [180, 90], [90, 180]],
    52: [[90, 90], [90, 45], [45, 90]]}

ANCHORS_GROUP_AREA = {  # 计算建议框的面积
    13: [x * y for x, y in ANCHORS_GROUP[13]],
    26: [x * y for x, y in ANCHORS_GROUP[26]],
    52: [x * y for x, y in ANCHORS_GROUP[52]],
}

if __name__ == '__main__':

    for feature_size, anchors in ANCHORS_GROUP.items():
        print(feature_size)  # 13 26 52
        print(anchors)
    for feature_size, anchor_area in ANCHORS_GROUP_AREA.items():
        print(feature_size)
        print(anchor_area)

二、dataset.py

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
import numpy as np
import cfg
import os
from PIL import Image
import math
from torchvision import transforms

LABEL_FILE_PATH = "data2/label.txt"
IMG_BASE_DIR = "data2"
# LABEL_FILE_PATH = "data/person_label.txt"
# IMG_BASE_DIR = "data"

transforms = torchvision.transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


def one_hot(cls_num, i):
    b = np.zeros(cls_num)
    b[i] = 1.
    return b


class MyDataset(Dataset):

    def __init__(self):
        with open(LABEL_FILE_PATH) as f:
            self.dataset = f.readlines()  # 读取所有行数据

    def __len__(self):
        return len(self.dataset)  # 返回数据集的长度

    def __getitem__(self, index):
        labels = {}
        line = self.dataset[index]  # 拿到整行数据,比如: images/21.jpg 0 18 45 258 264 1 258 99 290 250

        strs = line.strip().split()  # ['images/25.jpg', '3', '74', '142', '357', '323']
        # print(strs)

        _img_data = Image.open(os.path.join(IMG_BASE_DIR, strs[0]))  # 打开每一张图片得到图片数据
        img_data = transforms(_img_data)

        # _boxes = np.array(float(x) for x in strs[1:])
        # 拿到图片数据
        _boxes = np.array(list(map(float, strs[1:])))  # [0.0, 2.0, 49.0, 344.0, 261.0, 1.0, 103.0, 76.0, 496.0, 303.0]
        # print(_boxes)

        # 拿到标签框信息
        boxes = np.split(_boxes, len(_boxes) // 5)
        # [array([  0.,   2.,  49., 344., 261.]), array([  1., 103.,  76., 496., 303.])]

        for feature_size, anchors in cfg.ANCHORS_GROUP.items():  # 人工设置的建议框
            # print(feature_size)  # 13
            # print(anchors)  # [[360, 360], [360, 180], [180, 360]]
            # print(cfg.CLASS_NUM)

            # 生成13尺寸、26尺寸、52尺寸的零矩阵,目的是把有目标的中心0替换成1
            labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM))
            # 3表示每组尺寸有三个建议框
            # print(labels)

            for box in boxes:  # 遍历每个目标的标签框
                cls, cx, cy, w, h = box  # 1.0 256.0 308.0 513.0 617.0
                # print(cls, cx, cy, w, h)

                # 目标中心点取到小数部分和整数部分, 网络学习的是小数部分。
                cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMG_WIDTH)  # 相当于cx / 32
                # print(feature_size,'---- ',cy,cy*feature_size)
                cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMG_WIDTH)

                for i, anchor in enumerate(anchors):  # 循环一个尺度下的三个建议框。
                    # print(i)  # 0
                    # print(anchor)  # [360, 360]

                    anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i]  # 循环三个建议框的面积
                    # print(anchor_area)  # 129600

                    p_w, p_h = w / anchor[0], h / anchor[1]  # 标签框(真实框)的宽度除以建议框的宽度
                    p_area = w * h  # 标签框的面积

                    # 值相当于置信度. 建议框和真实框都是同一个中心点,要求是同心框。 作用可以过滤掉一些比较小的建议框
                    iou = min(p_area, anchor_area) / max(p_area, anchor_area)
                    # print(iou)
                    # print(*one_hot(cfg.CLASS_NUM, int(cls)))  # 0.0 0.0 0.0 1.0

                    labels[feature_size][int(cy_index), int(cx_index), i] = np.array(
                        [iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h),
                         *one_hot(cfg.CLASS_NUM, int(cls))])  # 10,i
                    # print(labels)  # 前面把H,W,3作为维度,后面15个值作为填充

        # print(labels[13].shape)  # (13, 13, 3, 9)
        # print(labels[26].shape)  # (26, 26, 3, 9)
        # print(labels[52].shape)  # (52, 52, 3, 9)
        return labels[13], labels[26], labels[52], img_data


if __name__ == '__main__':

    # x = one_hot(10, 2)
    # print(x)

    data = MyDataset()
    dataloader = DataLoader(data, 2, shuffle=True)
    for i, x in enumerate(dataloader):
        print("====")
        # print(x[0].shape)  # torch.Size([2, 13, 13, 3, 9])
        # print(x[1].shape)  # torch.Size([2, 26, 26, 3, 9])
        # print(x[2].shape)  # torch.Size([2, 52, 52, 3, 9])
        # print(x[3].shape)  # torch.Size([2, 3, 416, 416])
    # for target_13, target_26, target_52, img_data in dataloader:
        # print(target_13.shape)  # torch.Size([2, 13, 13, 3, 9])
        # print(target_26.shape)  # torch.Size([2, 26, 26, 3, 9])
        # print(target_52.shape)  # torch.Size([2, 52, 52, 3, 9])
        # print(img_data.shape)  # torch.Size([2, 3, 416, 416])

三:model.py

import torch
import torch.nn.functional as F
from FRN import FRN

# 定义上采样层,邻近插值
class UpsampleLayer(torch.nn.Module):
    def __init__(self):
        super(UpsampleLayer, self).__init__()

    def forward(self, x):  # 邻近插值速度较快
        return F.interpolate(x, scale_factor=2, mode='nearest')


# 定义卷积层
class ConvolutionalLayer(torch.nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
        super(ConvolutionalLayer, self).__init__()

        self.sub_module = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias),
            torch.nn.BatchNorm2d(out_channels),
            torch.nn.LeakyReLU(0.1),
            # FRN(out_channels)

        )

    def forward(self, x):
        return self.sub_module(x)


# 定义残差结构
class ResidualLayer(torch.nn.Module):

    def __init__(self, in_channels):
        super(ResidualLayer, self).__init__()

        self.sub_module = torch.nn.Sequential(
            ConvolutionalLayer(in_channels, in_channels // 2, 1, 1, 0),
            ConvolutionalLayer(in_channels // 2, in_channels, 3, 1, 1),
        )

    def forward(self, x):
        return x + self.sub_module(x)


# 定义下采样层
class DownsamplingLayer(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DownsamplingLayer, self).__init__()

        self.sub_module = torch.nn.Sequential(
            ConvolutionalLayer(in_channels, out_channels, 3, 2, 1)
        )

    def forward(self, x):
        return self.sub_module(x)


# 定义卷积块
class ConvolutionalSet(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvolutionalSet, self).__init__()

        self.sub_module = torch.nn.Sequential(
            ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
            ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),

            ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
            ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),

            ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
        )

    def forward(self, x):
        return self.sub_module(x)


# 定义主网络
class MainNet(torch.nn.Module):

    def __init__(self):
        super(MainNet, self).__init__()

        self.trunk_52 = torch.nn.Sequential(
            ConvolutionalLayer(3, 32, 3, 1, 1),
            DownsamplingLayer(32, 64),
            ResidualLayer(64),
            DownsamplingLayer(64, 128),
            ResidualLayer(128),
            ResidualLayer(128),
            DownsamplingLayer(128, 256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
            ResidualLayer(256),
        )

        self.trunk_26 = torch.nn.Sequential(
            DownsamplingLayer(256, 512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
            ResidualLayer(512),
        )

        self.trunk_13 = torch.nn.Sequential(
            DownsamplingLayer(512, 1024),
            ResidualLayer(1024),
            ResidualLayer(1024),
            ResidualLayer(1024),
            ResidualLayer(1024)
        )

        self.convset_13 = torch.nn.Sequential(
            ConvolutionalSet(1024, 512)
        )

        self.detetion_13 = torch.nn.Sequential(
            ConvolutionalLayer(512, 1024, 3, 1, 1),
            torch.nn.Conv2d(1024, 27, 1, 1, 0)  # 3*(1+4+10) = 45
        )  # 一个尺度下的三个建议框,每个建议框包括置信度,中心点,长,宽,十个类

        self.up_26 = torch.nn.Sequential(
            ConvolutionalLayer(512, 256, 1, 1, 0),
            UpsampleLayer()
        )

        self.convset_26 = torch.nn.Sequential(
            ConvolutionalSet(768, 256)
        )

        self.detetion_26 = torch.nn.Sequential(
            ConvolutionalLayer(256, 512, 3, 1, 1),
            torch.nn.Conv2d(512, 27, 1, 1, 0)
        )

        self.up_52 = torch.nn.Sequential(
            ConvolutionalLayer(256, 128, 1, 1, 0),
            UpsampleLayer()
        )

        self.convset_52 = torch.nn.Sequential(
            ConvolutionalSet(384, 128)
        )

        self.detetion_52 = torch.nn.Sequential(
            ConvolutionalLayer(128, 256, 3, 1, 1),
            torch.nn.Conv2d(256, 27, 1, 1, 0)
        )

    def forward(self, x):
        h_52 = self.trunk_52(x)
        h_26 = self.trunk_26(h_52)
        h_13 = self.trunk_13(h_26)
        convset_out_13 = self.convset_13(h_13)
        detetion_out_13 = self.detetion_13(convset_out_13)
        up_out_26 = self.up_26(convset_out_13)
        route_out_26 = torch.cat((up_out_26, h_26), dim=1)

        convset_out_26 = self.convset_26(route_out_26)
        detetion_out_26 = self.detetion_26(convset_out_26)
        up_out_52 = self.up_52(convset_out_26)
        route_out_52 = torch.cat((up_out_52, h_52), dim=1)

        convset_out_52 = self.convset_52(route_out_52)
        detetion_out_52 = self.detetion_52(convset_out_52)
        return detetion_out_13, detetion_out_26, detetion_out_52


# 测试网络
if __name__ == '__main__':
    net = MainNet()
    x = torch.randn([2, 3, 416, 416], dtype=torch.float32)
    # 测试网络
    y_13, y_26, y_52 = net(x)
    print(y_13.shape)  # torch.Size([2, 45, 13, 13])
    print(y_26.shape)  # torch.Size([2, 45, 26, 26])
    print(y_52.shape)  # torch.Size([2, 45, 52, 52])

    print(y_13.permute([0, 2, 3, 1]).shape)
    print(y_13.view(-1, 13, 13, 3, 15).shape)

四:trainer.py

import dataset
from model import *
import torch
from torch.utils.data import DataLoader
import os


# 损失
def loss_fn(output, target, alpha):
    conf_loss_fn = torch.nn.BCEWithLogitsLoss()
    coord_loss_fn = torch.nn.MSELoss()
    cls_loss_fn = torch.nn.CrossEntropyLoss()

    # [N,C,H,W]-->>[N,H,W,C]
    output = output.permute(0, 2, 3, 1)
    # [N,C,H,W]-->>[N,H,W,3,15]
    output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
    output = output.cpu().double()

    # print(target.shape)  # torch.Size([2, 13, 13, 3, 9])
    mask_obj = target[..., 0] > 0  # 取大于零的掩码来选择输出和标签的值.  (iou值大于零,把背景过滤掉)
    # print(mask_obj.shape)  # torch.Size([2, 13, 13, 3])
    output_obj = output[mask_obj]
    # print(output.shape)  # torch.Size([2, 13, 13, 3, 9])
    # print(output_obj.shape)  # torch.Size([9, 9])
    target_obj = target[mask_obj]
    # print(target_obj.shape)  # torch.Size([9, 9])
    # print(output_obj[:, 0].shape)  # torch.Size([9])
    # print(target_obj[:, 0].shape)  # torch.Size([9])
    # print(output_obj[:, 1:5].shape)  # torch.Size([9, 4])
    # print(target_obj[:, 1:5].shape)  # torch.Size([9, 4])
    # print(output_obj[:, 5:].shape)  # torch.Size([9, 4])
    # print(target_obj[:, 5:].shape)  # torch.Size([9, 4])

    loss_obj_conf = conf_loss_fn(output_obj[:, 0], target_obj[:, 0])
    loss_obj_coord = coord_loss_fn(output_obj[:, 1:5], target_obj[:, 1:5])

    target_obj = torch.argmax(target_obj[:, 5:], dim=1)
    loss_obj_cls = cls_loss_fn(output_obj[:, 5:], target_obj)

    loss_obj = loss_obj_conf + loss_obj_coord + loss_obj_cls

    mask_noobj = target[..., 0] == 0  # 没有目标的损失函数,只需要训练置信度。
    output_noobj = output[mask_noobj]
    # print(output_noobj.shape)  # torch.Size([1008, 9])

    target_noobj = target[mask_noobj]
    # print(target_noobj.shape)  # torch.Size([1008, 9])
    # print(output_noobj[:, 0].shape)  # torch.Size([1008])
    # print(target_noobj[:, 0].shape)  # torch.Size([1008])

    loss_noobj = conf_loss_fn(output_noobj[:, 0], target_noobj[:, 0])
    loss = alpha * loss_obj + (1 - alpha) * loss_noobj  # 正样本训练的比较多,召回率低

    return loss


if __name__ == '__main__':
    save_path = "models/net_yolo.pth3"
    myDataset = dataset.MyDataset()
    train_loader = DataLoader(myDataset, batch_size=2, shuffle=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = MainNet().to(device)

    if os.path.exists(save_path):
        net.load_state_dict(torch.load(save_path))
    else:
        print("NO Param")

    net.train()
    opt = torch.optim.Adam(net.parameters())

    epoch = 0
    while True:
        for target_13, target_26, target_52, img_data in train_loader:
            # print(target_13.shape)  # torch.Size([2, 13, 13, 3, 9])

            img_data = img_data.to(device)
            output_13, output_26, output_52 = net(img_data)
            # print(output_13.shape)  # torch.Size([2, 45, 13, 13])

            loss_13 = loss_fn(output_13, target_13, 0.9)
            loss_26 = loss_fn(output_26, target_26, 0.9)
            loss_52 = loss_fn(output_52, target_52, 0.9)
            loss = loss_13 + loss_26 + loss_52

            opt.zero_grad()
            loss.backward()
            opt.step()
            if epoch % 10 == 0:
                torch.save(net.state_dict(), save_path)

                print('save epoch: {}'.format(epoch))

            print("loss:", loss.item())

        epoch += 1

五:detector.py

from model import *
import cfg
import torch
import numpy as np
import PIL.Image as pimg
import PIL.ImageDraw as draw
from PIL import ImageFont
import tool
from torchvision import transforms
from Test_files.Convert_square import trans_square
from Test_files.padding_pixel import padding_pixel, padding_pixel2


class Detector(torch.nn.Module):

    def __init__(self, save_path):
        super(Detector, self).__init__()

        self.net = MainNet().cuda()
        self.net.load_state_dict(torch.load(save_path))
        self.net.eval()

    # torch.randn(3, 3, 416, 416), 0.3, cfg.ANCHORS_GROUP
    def forward(self, input, thresh, anchors):
        output_13, output_26, output_52 = self.net(input)
        # print(output_13.shape)  # torch.Size([3, 27, 13, 13])

        idxs_13, vecs_13 = self._filter(output_13, thresh)
        boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13])
        # 拿到所需的索引和输出向量(9个值)再解析出来。 32是反算到原图的比值

        idxs_26, vecs_26 = self._filter(output_26, thresh)
        boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26])

        idxs_52, vecs_52 = self._filter(output_52, thresh)
        boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52])

        return torch.cat([boxes_13, boxes_26, boxes_52], dim=0)  # 按批次进行拼接

    def _filter(self, output, thresh):

        output = output.permute(0, 2, 3, 1)  # torch.Size([3, 13, 13, 27])

        output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
        # print(output.shape)  # torch.Size([3, 13, 13, 3, 9])

        # print(output[..., 0].shape)  # torch.Size([3, 13, 13, 3])
        mask = torch.sigmoid(output[..., 0]) > thresh  # 取到大于阈值的掩码 (iou)
        # print(mask.shape)  # torch.Size([3, 13, 13, 3])

        idxs = mask.nonzero()  # 取到非零元素的索引
        # print(idxs.shape)  # torch.Size([14, 4])

        vecs = output[mask]  # 利用掩码取选择输出的结果
        # print(np.shape(vecs))  # torch.Size([14, 9])

        return idxs, vecs

    def _parse(self, idxs, vecs, t, anchors):
        if len(idxs) == 0:
            return torch.randn(0, 6).cuda()
        else:
            anchors = torch.tensor(anchors, dtype=torch.float32).cuda()
            # print(idxs.shape)  # torch.Size([14, 4])  N,H,W,3
            a = idxs[:, 3]  # 建议框:3
            # print(a.shape)  # torch.Size([14])

            # confidence = vecs[:, 0]
            # "压缩置信度值到0-1之间"
            confidence = torch.sigmoid(vecs[:, 0])
            # print(confidence.shape)  # torch.Size([14])

            _classify = vecs[:, 5:]
            # print(_classify.shape)  # torch.Size([14, 4])

            classify = torch.argmax(_classify, dim=1).float()
            # print(classify.shape)  # torch.Size([14])

            # idxs:N,H,W,3         网络所输出的vecs: iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h)
            cy = (idxs[:, 1].float() + torch.sigmoid(vecs[:, 2])) * t
            # print(cy.shape)  # torch.Size([14])

            cx = (idxs[:, 2].float() + torch.sigmoid(vecs[:, 1])) * t
            # print(cx.shape)  # torch.Size([14])

            w = anchors[a, 0] * torch.exp(vecs[:, 3])

            h = anchors[a, 1] * torch.exp(vecs[:, 4])
            x1 = cx - w / 2
            y1 = cy - h / 2
            x2 = x1 + w
            y2 = y1 + h
            # print(confidence)
            out = torch.stack([confidence, x1, y1, x2, y2, classify], dim=1)

            return out


if __name__ == '__main__':
    save_path = "models/net_yolo.pth3"
    dicts = {"0.0": "人", "1.0": "老虎", "2.0": "狮子", "3.0": "熊猫"}
    font_path = "simsun.ttc"
    font = ImageFont.truetype(font_path, size=20)
    detector = Detector(save_path)
    # a = torch.randn(3, 3, 416, 416).cuda()
    # y = detector(a, 0.3, cfg.ANCHORS_GROUP)
    # print(y.shape)
    # exit()

    img1 = pimg.open(r'data2\images\01.jpg')  # 传入任意大小的图片
    w, h = img1.size
    merge_img, paste_coord = trans_square(img1)  # 将图片转成正方形

    w1, h1 = merge_img.size
    resize_img = merge_img.resize((416, 416))
    w2, h2 = resize_img.size
    scale = w2 / w1  # 缩放图片后的宽比上原图片的宽
    # print(scale)

    # img = np.array(img) / 255
    # img = torch.Tensor(img)
    # img = img.unsqueeze(0)
    # img = img.permute(0, 3, 1, 2)

    transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    img = transforms(resize_img)
    img = img.unsqueeze(0)

    # print(np.shape(img))  # torch.Size([1, 3, 416, 416])

    img = img.cuda()

    out_value = detector(img, 0.3, cfg.ANCHORS_GROUP)
    # print(out_value)  # torch.Size([12, 6])  confidence, x1, y1, x2, y2, classify
    # print(out_value[..., -1])  # tensor([0., 0., 1., 1., 0., 0., 0., 1., 1., 1., 0., 1.], )

    boxes = []

    for j in range(4):  # 几个类别循环几次
        classify_mask = (out_value[..., -1] == j)  # 拿到所需要的类别掩码,比如0,1,2,3
        # print(classify_mask)

        _boxes = out_value[classify_mask]  # 用类别掩码去选择所需要的数据
        # print(_boxes)

        _boxes = _boxes.cpu()
        boxes.append(tool.nms(_boxes))  # 同类别做NMS

    # for box in boxes:
    #     try:
    #         img_draw = draw.ImageDraw(img1)
    #         c, x1, y1, x2, y2 = box[0, 0:5]
    #         # print(c, x1, y1, x2, y2)
    #         img_draw.rectangle((x1, y1, x2, y2))
    #     except:
    #         continue

    # 遍历所有nms后的boxes, 三维
    count = 1

    # print(boxes)
    for box in boxes:
        # 遍历每一类的所有box,二维
        # print(box)
        for _box in box:
            # print(_box)
            # exit()
            # try:
            img_draw = draw.ImageDraw(merge_img)

            confidence = round(_box[0].item(), 2)
            # x1 = _box[1].item() / scale
            # y1 = _box[2].item() / scale
            # x2 = _box[3].item() / scale
            # y2 = _box[4].item() / scale

            x1 = max(0, _box[1].item() / scale)
            y1 = max(0, _box[2].item() / scale)
            x2 = min(w1, _box[3].item() / scale)
            y2 = min(h1, _box[4].item() / scale)
            cls = _box[5].item()
            cls = dicts[str(cls)]  # 拿到字典所对应的字符串

            print(cls, confidence, x1, y1, x2, y2)

            img_draw.rectangle((x1, y1, x2, y2), outline='red', width=2)
            padding_pixel2(merge_img, x1, y1, 80, 20)
            img_draw.text((x1, y1), cls, fill=(0, 0, 0), font=font)
            img_draw.text((x1+40, y1), str(confidence), fill=(0, 0, 0), font=font)
            # except:
            #     continue
    merge_img2 = merge_img.crop((paste_coord[0], paste_coord[1], paste_coord[0]+w, paste_coord[1]+h))
    # merge_img2.save("./Save_images2/{}.jpg".format(count))

    merge_img2.show()



六:Utils

6.1 tool.py

import numpy as np
import torch


def ious(box, boxes, isMin = False):
    box_area = (box[3] - box[1]) * (box[4] - box[2])
    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 4] - boxes[:, 2])
    xx1 = torch.max(box[1], boxes[:, 1])
    yy1 = torch.max(box[2], boxes[:, 2])
    xx2 = torch.min(box[3], boxes[:, 3])
    yy2 = torch.min(box[4], boxes[:, 4])

    w = torch.clamp(xx2 - xx1, min=0)
    h = torch.clamp(yy2 - yy1, min=0)

    inter = w * h

    ovr2 = inter/ (box_area + area - inter)

    return ovr2


def nms(boxes, thresh=0.3, isMin = True):

    if boxes.shape[0] == 0:
        return np.array([])

    _boxes = boxes[(-boxes[:, 0]).argsort()]
    r_boxes = []

    while _boxes.shape[0] > 1:
        a_box = _boxes[0]
        b_boxes = _boxes[1:]
        r_boxes.append(a_box)

        index = np.where(ious(a_box, b_boxes,isMin) < thresh)
        _boxes = b_boxes[index]
    if _boxes.shape[0] > 0:
        r_boxes.append(_boxes[0])

    return torch.stack(r_boxes)


if __name__ == '__main__':
    # a = np.array([1,1,11,11])
    # bs = np.array([[1,1,10,10],[11,11,20,20]])
    # print(iou(a,bs))

    bs = torch.tensor([[1, 1, 10, 10, 40,8], [1, 1, 9, 9, 10,9], [9, 8, 13, 20, 15,3], [6, 11, 18, 17, 13,2]])
    # print(bs[:,3].argsort())
    print(nms(bs))

6.2 parse_xml.py

from xml.dom.minidom import parse
import os
import traceback
from PIL import Image

dir_path = r"D:\PycharmProjects(2)\YOLO v3\data2"
xml_path = r"D:\PycharmProjects(2)\YOLO v3\data2\outputs2"

label_filename = os.path.join(dir_path, "label.txt")
dicts = {"人": 0, "老虎": 1, "狮子": 2, "熊猫": 3}

try:
    label_file = open(label_filename, "w")

    count = 0
    for filename in os.listdir(xml_path):
        try:
            dom = parse(os.path.join(xml_path, filename))  # 开始解析xml文件
            root = dom.documentElement

            img_name = root.getElementsByTagName("path")[0].childNodes[0].data  # D:\PycharmProjects(2)\YOLO v3\data2\images\01.jpg

            item = root.getElementsByTagName("item")

            label_file.write("images2/{0}.jpg ".format(str(count+1).zfill(2)))
            for box in item:

                cls_name = box.getElementsByTagName("name")[0].childNodes[0].data  # 拿到name所对应的数据
                value = dicts[cls_name]

                x1 = int(box.getElementsByTagName("xmin")[0].childNodes[0].data)  # 拿到x1的坐标
                y1 = int(box.getElementsByTagName("ymin")[0].childNodes[0].data)
                x2 = int(box.getElementsByTagName("xmax")[0].childNodes[0].data)
                y2 = int(box.getElementsByTagName("ymax")[0].childNodes[0].data)
                cx = int(x1 + (x2 - x1) / 2)
                cy = int(y1 + (y2 - y1) / 2)
                w = x2 - x1
                h = y2 - y1

                label_file.write("{0} {1} {2} {3} {4} " .format(
                    value, cx, cy, w, h
                ))

            label_file.write("\n")

            count += 1
        except Exception as e:
            traceback.print_exc()

finally:
    label_file.close()

6.3 FRN.py

import torch
import torch.nn as nn
import numpy as np


class FRN(nn.Module):
    def __init__(self, num_features, eps=1e-6, learnable_eps=False):
        super().__init__()
        shape = (1, num_features, 1, 1)
        # print(shape)  # (1, 16, 1, 1)
        # print(torch.ones(*shape) * eps)

        self.eps = nn.Parameter(torch.ones(*shape) * eps, requires_grad=True)
        if not learnable_eps:
            self.eps.requires_grad_(False)
        self.gamma = nn.Parameter(torch.Tensor(*shape), requires_grad=True)
        self.beta = nn.Parameter(torch.Tensor(*shape), requires_grad=True)
        self.tau = nn.Parameter(torch.Tensor(*shape), requires_grad=True)
        self.reset_parameters()

    def forward(self, x):  # x = torch.rand(10, 16, 224, 224)
        avg_dims = tuple(range(2, x.dim()))  # range(2,4)=2,3
        # print(np.shape(avg_dims))  # (2,)
        # print(np.shape(x))  # torch.Size([10, 16, 224, 224])

        nu2 = torch.pow(x, 2).mean(dim=avg_dims, keepdim=True)
        # print(nu2.shape)  # torch.Size([10, 16, 1, 1])

        # nu2 = torch.pow(x, 2).mean(dim=(2,3), keepdim=True)
        # x = x * torch.rsqrt(nu2 + torch.abs(self.eps))
        x = x / torch.sqrt(nu2 + torch.abs(self.eps))
        # print(x.shape)  # torch.Size([10, 16, 224, 224])

        return torch.max(self.gamma * x + self.beta, self.tau)

    def reset_parameters(self):
        nn.init.ones_(self.gamma)
        nn.init.ones_(self.beta)
        nn.init.ones_(self.tau)


if __name__ == '__main__':
    x = torch.rand(10, 16, 224, 224)
    frn = FRN(16)
    frn(x)

    # print(frn(x))
    print(frn(x).shape)  # torch.Size([10, 16, 224, 224])

七:效果图

在这里插入图片描述
在这里插入图片描述

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值