对YOLOv3进行阅读,因为本人是小白,可能理解不到位的地方,请见谅。源码fork自eriklindernoren/PyTorch-YOLOv3,如需下载,请移步github,自行搜索。
本文介绍utils.py。
from __future__ import division
import math
import time
import tqdm#进度条
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt#用于绘图
import matplotlib.patches as patches
#tesor从GPU下载到cpu
def to_cpu(tensor):
return tensor.detach().cpu()
##加载data/coco.names里的物体的80种类别,被test.py,detect.py和train.py引用
def load_classes(path):
"""
Loads class labels at 'path'
"""
fp = open(path, "r")
names = fp.read().split("\n")[:-1]#data/coco.names里面,每行一类物体的名称
return names
#自定义初始化权重的函数,被train.py引用,
#model.apply(weights_init_normal)用来初始化模型中每一个子模块的参数。
def weights_init_normal(m):
# m 是网络中的(每)一个submodule(子模块)
classname = m.__class__.__name__
if classname.find("Conv") != -1:#Conv正态分布
torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find("BatchNorm2d") != -1:#BatchNorm2d正态分布
torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
torch.nn.init.constant_(m.bias.data, 0.0)#偏置初始化为0
#神经网络最后预测出关于416*416图像尺寸的boxes,转化到原始图像大小上去,被detect.py引用
#对图像进行detect的时候,大小设置成416*416,即是current_dim=416,得到的boxes要还原到原图像大小上去
def rescale_boxes(boxes, current_dim, original_shape):
""" Rescales bounding boxes to the original shape """
orig_h, orig_w = original_shape#原始图像的高、宽
# The amount of padding that was added
pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
# Image height and width after padding is removed
unpad_h = current_dim - pad_y
unpad_w = current_dim - pad_x
# Rescale bounding boxes to dimension of original image
boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
return boxes
#将中心坐标和高宽,转成左上角右下角的坐标,被下面的non_max_suppression和test.py引用
def xywh2xyxy(x):
y = x.new(x.shape)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
#------------------------------------------一下三个函数为性能指标计算---------------------
#计算每个类的预测的精度,被test.py引用
#输入为:真阳性、置信度、预测的类别、真实值类别的列表
#输出为:统计precision, recall, AP, f1, ap_class指标
def ap_per_class(tp, conf, pred_cls, target_cls):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
tp: True positives (list).
conf: Objectness value from 0-1 (list).
pred_cls: Predicted object classes (list).
target_cls: True object classes (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
# Find unique classes
unique_classes = np.unique(target_cls)
# Create Precision-Recall curve and compute AP for each class
ap, p, r = [], [], []
for c in tqdm