文章目录
代码目录:
一:cfg.py
# 配置文件
IMG_HEIGHT = 416
IMG_WIDTH = 416
CLASS_NUM = 4
"anchor box是对coco数据集聚类获得"
ANCHORS_GROUP_KMEANS = { # k-means聚类得到的先验框(建议框)
52: [[10, 13], [16, 30], [33, 23]], # 在52尺寸特征图上聚类出来的框
26: [[30, 61], [62, 45], [59, 119]],
13: [[116, 90], [156, 198], [373, 326]]}
ANCHORS_GROUP = { # 人工根据经验设置的框
13: [[360, 360], [360, 180], [180, 360]],
26: [[180, 180], [180, 90], [90, 180]],
52: [[90, 90], [90, 45], [45, 90]]}
ANCHORS_GROUP_AREA = { # 计算建议框的面积
13: [x * y for x, y in ANCHORS_GROUP[13]],
26: [x * y for x, y in ANCHORS_GROUP[26]],
52: [x * y for x, y in ANCHORS_GROUP[52]],
}
if __name__ == '__main__':
for feature_size, anchors in ANCHORS_GROUP.items():
print(feature_size) # 13 26 52
print(anchors)
for feature_size, anchor_area in ANCHORS_GROUP_AREA.items():
print(feature_size)
print(anchor_area)
二、dataset.py
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
import numpy as np
import cfg
import os
from PIL import Image
import math
from torchvision import transforms
LABEL_FILE_PATH = "data2/label.txt"
IMG_BASE_DIR = "data2"
# LABEL_FILE_PATH = "data/person_label.txt"
# IMG_BASE_DIR = "data"
transforms = torchvision.transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
def one_hot(cls_num, i):
b = np.zeros(cls_num)
b[i] = 1.
return b
class MyDataset(Dataset):
def __init__(self):
with open(LABEL_FILE_PATH) as f:
self.dataset = f.readlines() # 读取所有行数据
def __len__(self):
return len(self.dataset) # 返回数据集的长度
def __getitem__(self, index):
labels = {}
line = self.dataset[index] # 拿到整行数据,比如: images/21.jpg 0 18 45 258 264 1 258 99 290 250
strs = line.strip().split() # ['images/25.jpg', '3', '74', '142', '357', '323']
# print(strs)
_img_data = Image.open(os.path.join(IMG_BASE_DIR, strs[0])) # 打开每一张图片得到图片数据
img_data = transforms(_img_data)
# _boxes = np.array(float(x) for x in strs[1:])
# 拿到图片数据
_boxes = np.array(list(map(float, strs[1:]))) # [0.0, 2.0, 49.0, 344.0, 261.0, 1.0, 103.0, 76.0, 496.0, 303.0]
# print(_boxes)
# 拿到标签框信息
boxes = np.split(_boxes, len(_boxes) // 5)
# [array([ 0., 2., 49., 344., 261.]), array([ 1., 103., 76., 496., 303.])]
for feature_size, anchors in cfg.ANCHORS_GROUP.items(): # 人工设置的建议框
# print(feature_size) # 13
# print(anchors) # [[360, 360], [360, 180], [180, 360]]
# print(cfg.CLASS_NUM)
# 生成13尺寸、26尺寸、52尺寸的零矩阵,目的是把有目标的中心0替换成1
labels[feature_size] = np.zeros(shape=(feature_size, feature_size, 3, 5 + cfg.CLASS_NUM))
# 3表示每组尺寸有三个建议框
# print(labels)
for box in boxes: # 遍历每个目标的标签框
cls, cx, cy, w, h = box # 1.0 256.0 308.0 513.0 617.0
# print(cls, cx, cy, w, h)
# 目标中心点取到小数部分和整数部分, 网络学习的是小数部分。
cx_offset, cx_index = math.modf(cx * feature_size / cfg.IMG_WIDTH) # 相当于cx / 32
# print(feature_size,'---- ',cy,cy*feature_size)
cy_offset, cy_index = math.modf(cy * feature_size / cfg.IMG_WIDTH)
for i, anchor in enumerate(anchors): # 循环一个尺度下的三个建议框。
# print(i) # 0
# print(anchor) # [360, 360]
anchor_area = cfg.ANCHORS_GROUP_AREA[feature_size][i] # 循环三个建议框的面积
# print(anchor_area) # 129600
p_w, p_h = w / anchor[0], h / anchor[1] # 标签框(真实框)的宽度除以建议框的宽度
p_area = w * h # 标签框的面积
# 值相当于置信度. 建议框和真实框都是同一个中心点,要求是同心框。 作用可以过滤掉一些比较小的建议框
iou = min(p_area, anchor_area) / max(p_area, anchor_area)
# print(iou)
# print(*one_hot(cfg.CLASS_NUM, int(cls))) # 0.0 0.0 0.0 1.0
labels[feature_size][int(cy_index), int(cx_index), i] = np.array(
[iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h),
*one_hot(cfg.CLASS_NUM, int(cls))]) # 10,i
# print(labels) # 前面把H,W,3作为维度,后面15个值作为填充
# print(labels[13].shape) # (13, 13, 3, 9)
# print(labels[26].shape) # (26, 26, 3, 9)
# print(labels[52].shape) # (52, 52, 3, 9)
return labels[13], labels[26], labels[52], img_data
if __name__ == '__main__':
# x = one_hot(10, 2)
# print(x)
data = MyDataset()
dataloader = DataLoader(data, 2, shuffle=True)
for i, x in enumerate(dataloader):
print("====")
# print(x[0].shape) # torch.Size([2, 13, 13, 3, 9])
# print(x[1].shape) # torch.Size([2, 26, 26, 3, 9])
# print(x[2].shape) # torch.Size([2, 52, 52, 3, 9])
# print(x[3].shape) # torch.Size([2, 3, 416, 416])
# for target_13, target_26, target_52, img_data in dataloader:
# print(target_13.shape) # torch.Size([2, 13, 13, 3, 9])
# print(target_26.shape) # torch.Size([2, 26, 26, 3, 9])
# print(target_52.shape) # torch.Size([2, 52, 52, 3, 9])
# print(img_data.shape) # torch.Size([2, 3, 416, 416])
三:model.py
import torch
import torch.nn.functional as F
from FRN import FRN
# 定义上采样层,邻近插值
class UpsampleLayer(torch.nn.Module):
def __init__(self):
super(UpsampleLayer, self).__init__()
def forward(self, x): # 邻近插值速度较快
return F.interpolate(x, scale_factor=2, mode='nearest')
# 定义卷积层
class ConvolutionalLayer(torch.nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias=False):
super(ConvolutionalLayer, self).__init__()
self.sub_module = torch.nn.Sequential(
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias),
torch.nn.BatchNorm2d(out_channels),
torch.nn.LeakyReLU(0.1),
# FRN(out_channels)
)
def forward(self, x):
return self.sub_module(x)
# 定义残差结构
class ResidualLayer(torch.nn.Module):
def __init__(self, in_channels):
super(ResidualLayer, self).__init__()
self.sub_module = torch.nn.Sequential(
ConvolutionalLayer(in_channels, in_channels // 2, 1, 1, 0),
ConvolutionalLayer(in_channels // 2, in_channels, 3, 1, 1),
)
def forward(self, x):
return x + self.sub_module(x)
# 定义下采样层
class DownsamplingLayer(torch.nn.Module):
def __init__(self, in_channels, out_channels):
super(DownsamplingLayer, self).__init__()
self.sub_module = torch.nn.Sequential(
ConvolutionalLayer(in_channels, out_channels, 3, 2, 1)
)
def forward(self, x):
return self.sub_module(x)
# 定义卷积块
class ConvolutionalSet(torch.nn.Module):
def __init__(self, in_channels, out_channels):
super(ConvolutionalSet, self).__init__()
self.sub_module = torch.nn.Sequential(
ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),
ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
ConvolutionalLayer(out_channels, in_channels, 3, 1, 1),
ConvolutionalLayer(in_channels, out_channels, 1, 1, 0),
)
def forward(self, x):
return self.sub_module(x)
# 定义主网络
class MainNet(torch.nn.Module):
def __init__(self):
super(MainNet, self).__init__()
self.trunk_52 = torch.nn.Sequential(
ConvolutionalLayer(3, 32, 3, 1, 1),
DownsamplingLayer(32, 64),
ResidualLayer(64),
DownsamplingLayer(64, 128),
ResidualLayer(128),
ResidualLayer(128),
DownsamplingLayer(128, 256),
ResidualLayer(256),
ResidualLayer(256),
ResidualLayer(256),
ResidualLayer(256),
ResidualLayer(256),
ResidualLayer(256),
ResidualLayer(256),
ResidualLayer(256),
)
self.trunk_26 = torch.nn.Sequential(
DownsamplingLayer(256, 512),
ResidualLayer(512),
ResidualLayer(512),
ResidualLayer(512),
ResidualLayer(512),
ResidualLayer(512),
ResidualLayer(512),
ResidualLayer(512),
ResidualLayer(512),
)
self.trunk_13 = torch.nn.Sequential(
DownsamplingLayer(512, 1024),
ResidualLayer(1024),
ResidualLayer(1024),
ResidualLayer(1024),
ResidualLayer(1024)
)
self.convset_13 = torch.nn.Sequential(
ConvolutionalSet(1024, 512)
)
self.detetion_13 = torch.nn.Sequential(
ConvolutionalLayer(512, 1024, 3, 1, 1),
torch.nn.Conv2d(1024, 27, 1, 1, 0) # 3*(1+4+10) = 45
) # 一个尺度下的三个建议框,每个建议框包括置信度,中心点,长,宽,十个类
self.up_26 = torch.nn.Sequential(
ConvolutionalLayer(512, 256, 1, 1, 0),
UpsampleLayer()
)
self.convset_26 = torch.nn.Sequential(
ConvolutionalSet(768, 256)
)
self.detetion_26 = torch.nn.Sequential(
ConvolutionalLayer(256, 512, 3, 1, 1),
torch.nn.Conv2d(512, 27, 1, 1, 0)
)
self.up_52 = torch.nn.Sequential(
ConvolutionalLayer(256, 128, 1, 1, 0),
UpsampleLayer()
)
self.convset_52 = torch.nn.Sequential(
ConvolutionalSet(384, 128)
)
self.detetion_52 = torch.nn.Sequential(
ConvolutionalLayer(128, 256, 3, 1, 1),
torch.nn.Conv2d(256, 27, 1, 1, 0)
)
def forward(self, x):
h_52 = self.trunk_52(x)
h_26 = self.trunk_26(h_52)
h_13 = self.trunk_13(h_26)
convset_out_13 = self.convset_13(h_13)
detetion_out_13 = self.detetion_13(convset_out_13)
up_out_26 = self.up_26(convset_out_13)
route_out_26 = torch.cat((up_out_26, h_26), dim=1)
convset_out_26 = self.convset_26(route_out_26)
detetion_out_26 = self.detetion_26(convset_out_26)
up_out_52 = self.up_52(convset_out_26)
route_out_52 = torch.cat((up_out_52, h_52), dim=1)
convset_out_52 = self.convset_52(route_out_52)
detetion_out_52 = self.detetion_52(convset_out_52)
return detetion_out_13, detetion_out_26, detetion_out_52
# 测试网络
if __name__ == '__main__':
net = MainNet()
x = torch.randn([2, 3, 416, 416], dtype=torch.float32)
# 测试网络
y_13, y_26, y_52 = net(x)
print(y_13.shape) # torch.Size([2, 45, 13, 13])
print(y_26.shape) # torch.Size([2, 45, 26, 26])
print(y_52.shape) # torch.Size([2, 45, 52, 52])
print(y_13.permute([0, 2, 3, 1]).shape)
print(y_13.view(-1, 13, 13, 3, 15).shape)
四:trainer.py
import dataset
from model import *
import torch
from torch.utils.data import DataLoader
import os
# 损失
def loss_fn(output, target, alpha):
conf_loss_fn = torch.nn.BCEWithLogitsLoss()
coord_loss_fn = torch.nn.MSELoss()
cls_loss_fn = torch.nn.CrossEntropyLoss()
# [N,C,H,W]-->>[N,H,W,C]
output = output.permute(0, 2, 3, 1)
# [N,C,H,W]-->>[N,H,W,3,15]
output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
output = output.cpu().double()
# print(target.shape) # torch.Size([2, 13, 13, 3, 9])
mask_obj = target[..., 0] > 0 # 取大于零的掩码来选择输出和标签的值. (iou值大于零,把背景过滤掉)
# print(mask_obj.shape) # torch.Size([2, 13, 13, 3])
output_obj = output[mask_obj]
# print(output.shape) # torch.Size([2, 13, 13, 3, 9])
# print(output_obj.shape) # torch.Size([9, 9])
target_obj = target[mask_obj]
# print(target_obj.shape) # torch.Size([9, 9])
# print(output_obj[:, 0].shape) # torch.Size([9])
# print(target_obj[:, 0].shape) # torch.Size([9])
# print(output_obj[:, 1:5].shape) # torch.Size([9, 4])
# print(target_obj[:, 1:5].shape) # torch.Size([9, 4])
# print(output_obj[:, 5:].shape) # torch.Size([9, 4])
# print(target_obj[:, 5:].shape) # torch.Size([9, 4])
loss_obj_conf = conf_loss_fn(output_obj[:, 0], target_obj[:, 0])
loss_obj_coord = coord_loss_fn(output_obj[:, 1:5], target_obj[:, 1:5])
target_obj = torch.argmax(target_obj[:, 5:], dim=1)
loss_obj_cls = cls_loss_fn(output_obj[:, 5:], target_obj)
loss_obj = loss_obj_conf + loss_obj_coord + loss_obj_cls
mask_noobj = target[..., 0] == 0 # 没有目标的损失函数,只需要训练置信度。
output_noobj = output[mask_noobj]
# print(output_noobj.shape) # torch.Size([1008, 9])
target_noobj = target[mask_noobj]
# print(target_noobj.shape) # torch.Size([1008, 9])
# print(output_noobj[:, 0].shape) # torch.Size([1008])
# print(target_noobj[:, 0].shape) # torch.Size([1008])
loss_noobj = conf_loss_fn(output_noobj[:, 0], target_noobj[:, 0])
loss = alpha * loss_obj + (1 - alpha) * loss_noobj # 正样本训练的比较多,召回率低
return loss
if __name__ == '__main__':
save_path = "models/net_yolo.pth3"
myDataset = dataset.MyDataset()
train_loader = DataLoader(myDataset, batch_size=2, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = MainNet().to(device)
if os.path.exists(save_path):
net.load_state_dict(torch.load(save_path))
else:
print("NO Param")
net.train()
opt = torch.optim.Adam(net.parameters())
epoch = 0
while True:
for target_13, target_26, target_52, img_data in train_loader:
# print(target_13.shape) # torch.Size([2, 13, 13, 3, 9])
img_data = img_data.to(device)
output_13, output_26, output_52 = net(img_data)
# print(output_13.shape) # torch.Size([2, 45, 13, 13])
loss_13 = loss_fn(output_13, target_13, 0.9)
loss_26 = loss_fn(output_26, target_26, 0.9)
loss_52 = loss_fn(output_52, target_52, 0.9)
loss = loss_13 + loss_26 + loss_52
opt.zero_grad()
loss.backward()
opt.step()
if epoch % 10 == 0:
torch.save(net.state_dict(), save_path)
print('save epoch: {}'.format(epoch))
print("loss:", loss.item())
epoch += 1
五:detector.py
from model import *
import cfg
import torch
import numpy as np
import PIL.Image as pimg
import PIL.ImageDraw as draw
from PIL import ImageFont
import tool
from torchvision import transforms
from Test_files.Convert_square import trans_square
from Test_files.padding_pixel import padding_pixel, padding_pixel2
class Detector(torch.nn.Module):
def __init__(self, save_path):
super(Detector, self).__init__()
self.net = MainNet().cuda()
self.net.load_state_dict(torch.load(save_path))
self.net.eval()
# torch.randn(3, 3, 416, 416), 0.3, cfg.ANCHORS_GROUP
def forward(self, input, thresh, anchors):
output_13, output_26, output_52 = self.net(input)
# print(output_13.shape) # torch.Size([3, 27, 13, 13])
idxs_13, vecs_13 = self._filter(output_13, thresh)
boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13])
# 拿到所需的索引和输出向量(9个值)再解析出来。 32是反算到原图的比值
idxs_26, vecs_26 = self._filter(output_26, thresh)
boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26])
idxs_52, vecs_52 = self._filter(output_52, thresh)
boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52])
return torch.cat([boxes_13, boxes_26, boxes_52], dim=0) # 按批次进行拼接
def _filter(self, output, thresh):
output = output.permute(0, 2, 3, 1) # torch.Size([3, 13, 13, 27])
output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1)
# print(output.shape) # torch.Size([3, 13, 13, 3, 9])
# print(output[..., 0].shape) # torch.Size([3, 13, 13, 3])
mask = torch.sigmoid(output[..., 0]) > thresh # 取到大于阈值的掩码 (iou)
# print(mask.shape) # torch.Size([3, 13, 13, 3])
idxs = mask.nonzero() # 取到非零元素的索引
# print(idxs.shape) # torch.Size([14, 4])
vecs = output[mask] # 利用掩码取选择输出的结果
# print(np.shape(vecs)) # torch.Size([14, 9])
return idxs, vecs
def _parse(self, idxs, vecs, t, anchors):
if len(idxs) == 0:
return torch.randn(0, 6).cuda()
else:
anchors = torch.tensor(anchors, dtype=torch.float32).cuda()
# print(idxs.shape) # torch.Size([14, 4]) N,H,W,3
a = idxs[:, 3] # 建议框:3
# print(a.shape) # torch.Size([14])
# confidence = vecs[:, 0]
# "压缩置信度值到0-1之间"
confidence = torch.sigmoid(vecs[:, 0])
# print(confidence.shape) # torch.Size([14])
_classify = vecs[:, 5:]
# print(_classify.shape) # torch.Size([14, 4])
classify = torch.argmax(_classify, dim=1).float()
# print(classify.shape) # torch.Size([14])
# idxs:N,H,W,3 网络所输出的vecs: iou, cx_offset, cy_offset, np.log(p_w), np.log(p_h)
cy = (idxs[:, 1].float() + torch.sigmoid(vecs[:, 2])) * t
# print(cy.shape) # torch.Size([14])
cx = (idxs[:, 2].float() + torch.sigmoid(vecs[:, 1])) * t
# print(cx.shape) # torch.Size([14])
w = anchors[a, 0] * torch.exp(vecs[:, 3])
h = anchors[a, 1] * torch.exp(vecs[:, 4])
x1 = cx - w / 2
y1 = cy - h / 2
x2 = x1 + w
y2 = y1 + h
# print(confidence)
out = torch.stack([confidence, x1, y1, x2, y2, classify], dim=1)
return out
if __name__ == '__main__':
save_path = "models/net_yolo.pth3"
dicts = {"0.0": "人", "1.0": "老虎", "2.0": "狮子", "3.0": "熊猫"}
font_path = "simsun.ttc"
font = ImageFont.truetype(font_path, size=20)
detector = Detector(save_path)
# a = torch.randn(3, 3, 416, 416).cuda()
# y = detector(a, 0.3, cfg.ANCHORS_GROUP)
# print(y.shape)
# exit()
img1 = pimg.open(r'data2\images\01.jpg') # 传入任意大小的图片
w, h = img1.size
merge_img, paste_coord = trans_square(img1) # 将图片转成正方形
w1, h1 = merge_img.size
resize_img = merge_img.resize((416, 416))
w2, h2 = resize_img.size
scale = w2 / w1 # 缩放图片后的宽比上原图片的宽
# print(scale)
# img = np.array(img) / 255
# img = torch.Tensor(img)
# img = img.unsqueeze(0)
# img = img.permute(0, 3, 1, 2)
transforms = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
img = transforms(resize_img)
img = img.unsqueeze(0)
# print(np.shape(img)) # torch.Size([1, 3, 416, 416])
img = img.cuda()
out_value = detector(img, 0.3, cfg.ANCHORS_GROUP)
# print(out_value) # torch.Size([12, 6]) confidence, x1, y1, x2, y2, classify
# print(out_value[..., -1]) # tensor([0., 0., 1., 1., 0., 0., 0., 1., 1., 1., 0., 1.], )
boxes = []
for j in range(4): # 几个类别循环几次
classify_mask = (out_value[..., -1] == j) # 拿到所需要的类别掩码,比如0,1,2,3
# print(classify_mask)
_boxes = out_value[classify_mask] # 用类别掩码去选择所需要的数据
# print(_boxes)
_boxes = _boxes.cpu()
boxes.append(tool.nms(_boxes)) # 同类别做NMS
# for box in boxes:
# try:
# img_draw = draw.ImageDraw(img1)
# c, x1, y1, x2, y2 = box[0, 0:5]
# # print(c, x1, y1, x2, y2)
# img_draw.rectangle((x1, y1, x2, y2))
# except:
# continue
# 遍历所有nms后的boxes, 三维
count = 1
# print(boxes)
for box in boxes:
# 遍历每一类的所有box,二维
# print(box)
for _box in box:
# print(_box)
# exit()
# try:
img_draw = draw.ImageDraw(merge_img)
confidence = round(_box[0].item(), 2)
# x1 = _box[1].item() / scale
# y1 = _box[2].item() / scale
# x2 = _box[3].item() / scale
# y2 = _box[4].item() / scale
x1 = max(0, _box[1].item() / scale)
y1 = max(0, _box[2].item() / scale)
x2 = min(w1, _box[3].item() / scale)
y2 = min(h1, _box[4].item() / scale)
cls = _box[5].item()
cls = dicts[str(cls)] # 拿到字典所对应的字符串
print(cls, confidence, x1, y1, x2, y2)
img_draw.rectangle((x1, y1, x2, y2), outline='red', width=2)
padding_pixel2(merge_img, x1, y1, 80, 20)
img_draw.text((x1, y1), cls, fill=(0, 0, 0), font=font)
img_draw.text((x1+40, y1), str(confidence), fill=(0, 0, 0), font=font)
# except:
# continue
merge_img2 = merge_img.crop((paste_coord[0], paste_coord[1], paste_coord[0]+w, paste_coord[1]+h))
# merge_img2.save("./Save_images2/{}.jpg".format(count))
merge_img2.show()
六:Utils
6.1 tool.py
import numpy as np
import torch
def ious(box, boxes, isMin = False):
box_area = (box[3] - box[1]) * (box[4] - box[2])
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 4] - boxes[:, 2])
xx1 = torch.max(box[1], boxes[:, 1])
yy1 = torch.max(box[2], boxes[:, 2])
xx2 = torch.min(box[3], boxes[:, 3])
yy2 = torch.min(box[4], boxes[:, 4])
w = torch.clamp(xx2 - xx1, min=0)
h = torch.clamp(yy2 - yy1, min=0)
inter = w * h
ovr2 = inter/ (box_area + area - inter)
return ovr2
def nms(boxes, thresh=0.3, isMin = True):
if boxes.shape[0] == 0:
return np.array([])
_boxes = boxes[(-boxes[:, 0]).argsort()]
r_boxes = []
while _boxes.shape[0] > 1:
a_box = _boxes[0]
b_boxes = _boxes[1:]
r_boxes.append(a_box)
index = np.where(ious(a_box, b_boxes,isMin) < thresh)
_boxes = b_boxes[index]
if _boxes.shape[0] > 0:
r_boxes.append(_boxes[0])
return torch.stack(r_boxes)
if __name__ == '__main__':
# a = np.array([1,1,11,11])
# bs = np.array([[1,1,10,10],[11,11,20,20]])
# print(iou(a,bs))
bs = torch.tensor([[1, 1, 10, 10, 40,8], [1, 1, 9, 9, 10,9], [9, 8, 13, 20, 15,3], [6, 11, 18, 17, 13,2]])
# print(bs[:,3].argsort())
print(nms(bs))
6.2 parse_xml.py
from xml.dom.minidom import parse
import os
import traceback
from PIL import Image
dir_path = r"D:\PycharmProjects(2)\YOLO v3\data2"
xml_path = r"D:\PycharmProjects(2)\YOLO v3\data2\outputs2"
label_filename = os.path.join(dir_path, "label.txt")
dicts = {"人": 0, "老虎": 1, "狮子": 2, "熊猫": 3}
try:
label_file = open(label_filename, "w")
count = 0
for filename in os.listdir(xml_path):
try:
dom = parse(os.path.join(xml_path, filename)) # 开始解析xml文件
root = dom.documentElement
img_name = root.getElementsByTagName("path")[0].childNodes[0].data # D:\PycharmProjects(2)\YOLO v3\data2\images\01.jpg
item = root.getElementsByTagName("item")
label_file.write("images2/{0}.jpg ".format(str(count+1).zfill(2)))
for box in item:
cls_name = box.getElementsByTagName("name")[0].childNodes[0].data # 拿到name所对应的数据
value = dicts[cls_name]
x1 = int(box.getElementsByTagName("xmin")[0].childNodes[0].data) # 拿到x1的坐标
y1 = int(box.getElementsByTagName("ymin")[0].childNodes[0].data)
x2 = int(box.getElementsByTagName("xmax")[0].childNodes[0].data)
y2 = int(box.getElementsByTagName("ymax")[0].childNodes[0].data)
cx = int(x1 + (x2 - x1) / 2)
cy = int(y1 + (y2 - y1) / 2)
w = x2 - x1
h = y2 - y1
label_file.write("{0} {1} {2} {3} {4} " .format(
value, cx, cy, w, h
))
label_file.write("\n")
count += 1
except Exception as e:
traceback.print_exc()
finally:
label_file.close()
6.3 FRN.py
import torch
import torch.nn as nn
import numpy as np
class FRN(nn.Module):
def __init__(self, num_features, eps=1e-6, learnable_eps=False):
super().__init__()
shape = (1, num_features, 1, 1)
# print(shape) # (1, 16, 1, 1)
# print(torch.ones(*shape) * eps)
self.eps = nn.Parameter(torch.ones(*shape) * eps, requires_grad=True)
if not learnable_eps:
self.eps.requires_grad_(False)
self.gamma = nn.Parameter(torch.Tensor(*shape), requires_grad=True)
self.beta = nn.Parameter(torch.Tensor(*shape), requires_grad=True)
self.tau = nn.Parameter(torch.Tensor(*shape), requires_grad=True)
self.reset_parameters()
def forward(self, x): # x = torch.rand(10, 16, 224, 224)
avg_dims = tuple(range(2, x.dim())) # range(2,4)=2,3
# print(np.shape(avg_dims)) # (2,)
# print(np.shape(x)) # torch.Size([10, 16, 224, 224])
nu2 = torch.pow(x, 2).mean(dim=avg_dims, keepdim=True)
# print(nu2.shape) # torch.Size([10, 16, 1, 1])
# nu2 = torch.pow(x, 2).mean(dim=(2,3), keepdim=True)
# x = x * torch.rsqrt(nu2 + torch.abs(self.eps))
x = x / torch.sqrt(nu2 + torch.abs(self.eps))
# print(x.shape) # torch.Size([10, 16, 224, 224])
return torch.max(self.gamma * x + self.beta, self.tau)
def reset_parameters(self):
nn.init.ones_(self.gamma)
nn.init.ones_(self.beta)
nn.init.ones_(self.tau)
if __name__ == '__main__':
x = torch.rand(10, 16, 224, 224)
frn = FRN(16)
frn(x)
# print(frn(x))
print(frn(x).shape) # torch.Size([10, 16, 224, 224])
七:效果图