YOLOv3代码阅读笔记之utils.py（第六篇）

本文链接：https://blog.csdn.net/weixin_37718439/article/details/104239760

本文是作者阅读YOLOv3源码的笔记，主要探讨utils.py文件。作者作为初学者，可能会有不足之处，源码来源于eriklindernoren的PyTorch-YOLOv3 GitHub仓库。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

对YOLOv3进行阅读，因为本人是小白，可能理解不到位的地方，请见谅。源码fork自eriklindernoren/PyTorch-YOLOv3，如需下载，请移步github,自行搜索。
本文介绍utils.py。

from __future__ import division
import math
import time
import tqdm#进度条
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt#用于绘图
import matplotlib.patches as patches

#tesor从GPU下载到cpu
def to_cpu(tensor):
    return tensor.detach().cpu()

##加载data/coco.names里的物体的80种类别，被test.py,detect.py和train.py引用
def load_classes(path):
    """
    Loads class labels at 'path'
    """
    fp = open(path, "r")
    names = fp.read().split("\n")[:-1]#data/coco.names里面，每行一类物体的名称
    return names

#自定义初始化权重的函数，被train.py引用，
#model.apply（weights_init_normal）用来初始化模型中每一个子模块的参数。
def weights_init_normal(m):
    # m 是网络中的（每）一个submodule（子模块）    
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:#Conv正态分布
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find("BatchNorm2d") != -1:#BatchNorm2d正态分布
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)#偏置初始化为0

#神经网络最后预测出关于416*416图像尺寸的boxes，转化到原始图像大小上去，被detect.py引用
#对图像进行detect的时候，大小设置成416*416，即是current_dim=416，得到的boxes要还原到原图像大小上去
def rescale_boxes(boxes, current_dim, original_shape):
    """ Rescales bounding boxes to the original shape """
    orig_h, orig_w = original_shape#原始图像的高、宽
    # The amount of padding that was added
    pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
    pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
    # Image height and width after padding is removed
    unpad_h = current_dim - pad_y
    unpad_w = current_dim - pad_x
    # Rescale bounding boxes to dimension of original image
    boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
    boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
    boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
    boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
    return boxes

#将中心坐标和高宽，转成左上角右下角的坐标，被下面的non_max_suppression和test.py引用
def xywh2xyxy(x):
    y = x.new(x.shape)
    y[..., 0] = x[..., 0] - x[..., 2] / 2
    y[..., 1] = x[..., 1] - x[..., 3] / 2
    y[..., 2] = x[..., 0] + x[..., 2] / 2
    y[..., 3] = x[..., 1] + x[..., 3] / 2
    return y

#------------------------------------------一下三个函数为性能指标计算---------------------
#计算每个类的预测的精度，被test.py引用
#输入为：真阳性、置信度、预测的类别、真实值类别的列表
#输出为：统计precision, recall, AP, f1, ap_class指标
def ap_per_class(tp, conf, pred_cls, target_cls):
    """ Compute the average precision, given the recall and precision curves.
    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
    # Arguments
        tp:    True positives (list).
        conf:  Objectness value from 0-1 (list).
        pred_cls: Predicted object classes (list).
        target_cls: True object classes (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """

    # Sort by objectness
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]

    # Find unique classes
    unique_classes = np.unique(target_cls)

    # Create Precision-Recall curve and compute AP for each class
    ap, p, r = [], [], []
    for c in tqdm