YoloV5源码注释解读(ultralytics版本)(detect.py)

朴公英不会飞
已于 2023-03-22 20:02:19 修改
阅读量1.2k
点赞数 4
文章标签： YOLO 计算机视觉深度学习
于 2023-03-22 19:51:22 首次发布
本文链接：https://blog.csdn.net/qq_43234191/article/details/129714937
版权
近期在读源码，ultralytics版本应该是github上最热版本，其功能也非常强大，因此进行了注释学习。
下面是ultralytics的Yolov5的推理文件detect.py的注释解读，通过学习其实可以发现很多功能没有被注意到，可以对于项目有非常大的帮助。
简单总结，detect.py按照执行顺序进行解读：
第一部分：导入包(系统包或自定义包)
第二部分：程序入口，执行程序
第三部分：Yolov5参数设置(命令行传参解析参数或代码更改参数)
第四部分：执行main函数(检查环境,执行run函数)
第五部分：执行run函数(推理)(又分为5小节)
参考链接：Yolov5(ultralytics版本detect.py讲解)
该视频讲的很细，同时加上了一些自己的理解，整理如下。由于官网代码也在更新，因此可能有出入。
以下是整体代码注释版：
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
# 此部分为官网指导使用命令行进行推理的命令
"""
Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.

Usage - sources:
    $ python detect.py --weights yolov5s.pt --source 0                               # webcam
                                                     img.jpg                         # image
                                                     vid.mp4                         # video
                                                     screen                          # screenshot
                                                     path/                           # directory
                                                     list.txt                        # list of images
                                                     list.streams                    # list of streams
                                                     'path/*.jpg'                    # glob
                                                     'https://youtu.be/Zgi9g1ksQHc'  # YouTube
                                                     'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream

Usage - formats:
    $ python detect.py --weights yolov5s.pt                 # PyTorch
                                 yolov5s.torchscript        # TorchScript
                                 yolov5s.onnx               # ONNX Runtime or OpenCV DNN with --dnn
                                 yolov5s_openvino_model     # OpenVINO
                                 yolov5s.engine             # TensorRT
                                 yolov5s.mlmodel            # CoreML (macOS-only)
                                 yolov5s_saved_model        # TensorFlow SavedModel
                                 yolov5s.pb                 # TensorFlow GraphDef
                                 yolov5s.tflite             # TensorFlow Lite
                                 yolov5s_edgetpu.tflite     # TensorFlow Edge TPU
                                 yolov5s_paddle_model       # PaddlePaddle
"""
#-----------------------------------------------------------------------------------------------------------------------------#

# 第一部分：导包 #################################################################################################################
#-----------------------------------------------------------------------------------------------------------------------------#
import argparse                             # 解析命令行
import os                                   # 操作系统接口库
os.environ["GIT_PYTHON_REFRESH"] = "quiet"  # 为解决报错增加的
import platform                             # 系统环境存在win、linux、类unix情况，使用platform模块可以判断当前的系统环境
import sys                                  # sys模块是与python解释器交互的一个接口，sys模块提供了许多函数和变量来处理Python运行时环境的不同部分
from pathlib import Path                    # 面向对象的方式来与文件系统交互
import torch                                # torch包可以用来构建和训练神经网络，它是一个基于Python的科学计算库，支持GPU加速的张量计算和深度学习。

# 根路径设置，用于后续参数设置中
FILE = Path(__file__).resolve()                             # 获得当前文件绝对路径 D:\yolov5-master\yolov5-master\detect.py
ROOT = FILE.parents[0]  # YOLOv5 root directory             # 获得YOLOv5根目录路径 D:\yolov5-master\yolov5-master
if str(ROOT) not in sys.path:                               # 将YOLOv5路径加入到模块查询列表中
    sys.path.append(str(ROOT))
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative  # 转换成相对路径 ROOT = （D:\yolov5-master\yolov5-master）

# 加载自定义模块
# 这些模块均从该yolov5项目中不同文件夹中加载，从而实现不同作用

# DetectMultiBackend 不同深度学习框架选择
from models.common import DetectMultiBackend

# IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm'  # include image suffixes
# VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'  # include video suffixes
# LoadImages：图片或视频加载； LoadScreenshots：屏幕加载； LoadStreams：网络流加载  # RTSP, RTMP, HTTP streams`
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams

# LOGGER：日志打印 Profile：时间处理 check_file：文件检查 check_img_size：图片尺寸检查 check_imshow:图像展示检查
# check_requirements：requirements.txt环境文件检查 colorstr:颜色检查 cv2:opencv库
# increment_path:路径增量  non_max_suppression:NMS print_args：信息打印 scale_boxes：检测框尺度调整
# strip_optimizer：优化器  xyxy2xywh:坐标转换 (xyxy) to (x_center,ycenter,width,height)
from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
                           increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)

# Annotator:绘图工具 colors:颜色实例 save_one_box：保存标记框
from utils.plots import Annotator, colors, save_one_box

# select_device：选择cpu or GPU  smart_inference_mode: torch装饰器
from utils.torch_utils import select_device, smart_inference_mode

# 第五部分：执行run ############################################################################################################
#-----------------------------------------------------------------------------------------------------------------------------
@smart_inference_mode()
# 初始化参数，若相应参数修改，则此部分相应参数会被覆盖
def run(
        weights=ROOT / 'yolov5s.pt',  # model path or triton URL           # 权重
        source=ROOT / 'data/images',  # file/dir/URL/glob/screen/0(webcam) # 推理源地址
        data=ROOT / 'data/coco128.yaml',  # dataset.yaml path              # 数据集yaml文件
        imgsz=(640, 640),  # inference size (height, width)                # 图片尺寸
        conf_thres=0.25,  # confidence threshold                           # 置信度参数，高于此参数将会保存于图片中
        iou_thres=0.45,  # NMS IOU threshold                               # IOU参数，高于此参数将会保存于图片中
        max_det=1000,  # maximum detections per image                      # 每张图片最大检测数
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu                # cup or GPU
        view_img=False,  # show results                                    # 展示结果
        save_txt=False,  # save results to *.txt                           # 保存检测框坐标txt文件
        save_conf=False,  # save confidences in --save-txt labels          # 保存置信度
        save_crop=False,  # save cropped prediction boxes                  # 检测框裁剪
        nosave=False,  # do not save images/videos                         # 是否保存检测框信息
        classes=None,  # filter by class: --class 0, or --class 0 2 3      # 是否进行特定目标的检测
        agnostic_nms=False,  # class-agnostic NMS                          # NMS去除不同类别之间的框
        augment=False,  # augmented inference                              # 图像增强，推理时候才TTA推理，官网有介绍
        visualize=False,  # visualize features                             # 可视化特征
        update=False,  # update all models                                 # 更新所有模型
        project=ROOT / 'runs/detect',  # save results to project/name      # 结果保存路径
        name='exp',  # save results to project/name                        # 结果具体保存路径
        exist_ok=False,  # existing project/name ok, do not increment      # 推理结果是否覆盖,不覆盖则exp,exp1,exp2递增
        line_thickness=3,  # bounding box thickness (pixels)               # 标记框线宽度
        hide_labels=False,  # hide labels                                  # 隐藏标签
        hide_conf=False,  # hide confidences                               # 隐藏置信度
        half=False,  # use FP16 half-precision inference                   # 是否采用半精度
        dnn=False,  # use OpenCV DNN for ONNX inference                    # 是否使用OpenCV DNN预测
        vid_stride=1,  # video frame-rate stride                           # 视频流推理步长
):

# 第五部分：执行run #############################################################################################################
# 第五部分第一小节   ------------------------------------------------------------------------------------------------------------
# 输入源判断(True or False)，图片、视频(is_file)、网络流(is_url)、摄像头(webcam)、屏幕(screenshot)；

    # 转换source为字符串
    source = str(source)

    # save inference images # 保存推理图片; 若nosave传入false，且source不是以txt结尾，save_img = True
    # endswith检查字符串是否以指定字符串结尾，是返回True,否则False
    save_img = not nosave and not source.endswith('.txt')

    # Path(pathlib模块)提取路径信息，若sourece = 'D:/data/image/a.jpg',Path(source) ='D:/data/image/a.jpg'
    # suffix(提取后缀)，Path(source).suffix[1:] = jpg; Path(source).suffix[1] = j
    # 判断后缀名字是否在IMG_FORMATS + VID_FORMATS列表中，可通过ctrl查看
    # IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm'  # include image suffixes
    # VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv'  # include video suffixes
    # 如果存在此目录中，则返回True，即is_file = True
    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)

    # lower转小写,upper转化大写;将source信息转化为小写
    # startwith检查字符串是否以指定字符串开头，是返回True,否则False
    # 若以相应网络流开头，is_url = True
    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))

    # isnumeric检测字符串是否只有数字组成
    # webcam通过三个或，判断是否是摄像头,若是,webcam = True
    webcam = source.isnumeric() or source.endswith('.streams') or (is_url and not is_file)

    # screenshot判断是否是当前屏幕
    # 若True，screenshot = True
    screenshot = source.lower().startswith('screen')

    # 若is_url和is_file同时成立，则通过check_file进行下载相应文件
    if is_url and is_file:
        source = check_file(source)  # download

# 第五部分        #############################################################################################################
# 第五部分第二小节   ------------------------------------------------------------------------------------------------------------
# save_dir:推理完成(带标记框)图片路径设置; save_dir/labels:开启保存标记信息的txt路径设置；

    # Directories
    # project，name从第三部分传参中获取
    # 增量文件或文件路径； i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)                  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

# 第五部分        #############################################################################################################
# 第五部分第三小节   ------------------------------------------------------------------------------------------------------------
# 加载模型：cpu or GPU选择；深度学习框架选择；加载stride步长，names类别名，权重pt文件

    # Load model
    # 根据device参数，选择cpu或者GPU
    device = select_device(device)

    # DetectMultiBackend：判断不同深度学习框架：
    #   PyTorch:                        weights = *.pt
    #   TorchScript:                    *.torchscript
    #   ONNX Runtime:                   *.onnx
    #   ONNX OpenCV DNN:                *.onnx --dnn
    #   OpenVINO:                       *_openvino_model
    #   CoreML:                         *.mlmodel
    #   TensorRT:                       *.engine
    #   TensorFlow SavedModel:          *_saved_model
    #   TensorFlow GraphDef:            *.pb
    #   TensorFlow Lite:                *.tflite
    #   TensorFlow Edge TPU:            *_edgetpu.tflite
    #   PaddlePaddle:                   *_paddle_model
    # 传入weight(权重)、device = cup or GPU, dnn推理，data文件(yaml)、fp16半精度推理
    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)

    # 模型步长，模型类别名，模型pt文件(PyTorch)
    # stride一般是32的倍数，names对应data/*.yaml中的names(类别),pt对应权重文件
    # yolov5n,yolov5s,yolov5m,yolov5l,yolov5x没有将会下载
    stride, names, pt = model.stride, model.names, model.pt

    # 检测图片尺寸(32倍数)
    imgsz = check_img_size(imgsz, s=stride)  # check image size

# 第五部分        #############################################################################################################
# 第五部分第四小节   ------------------------------------------------------------------------------------------------------------
# 加载模型：webcam or screenshot or source(根据布尔值进行调用各自函数加载)

    # Dataloader
    bs = 1  # batch_size 每次推理一张图片

    # webcam是否True(摄像头检测)
    if webcam:
        view_img = check_imshow(warn=True)
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
        bs = len(dataset)

    # screenshot是否True(屏幕检测)
    elif screenshot:
        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)

    # LoadImages：加载图片文件
    # 传入source,imasz,stride,pt,vid_stride
    # 在LoadImages，主要是对source进行操作，若是文件夹，则会遍历文件夹中所有图片存入dataset中
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)

    # vid_path,vid_writer = [None][None]
    # vid_path保存视频的路径,vid_writer;
    vid_path, vid_writer = [None] * bs, [None] * bs

# 第五部分        #############################################################################################################
# 第五部分第五小节   ------------------------------------------------------------------------------------------------------------
# 推理

    # Run inference
    # warmup,通过运行一次warmup来预热模型
    model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz))  # warmup

    # seen = 0(计数，后续通过+1,处理多少图片)
    # windows空列表，用于后续处理显示图片
    # dt：耗时记录
    seen, windows, dt = 0, [], (Profile(), Profile(), Profile())

    # dataset通过LoadImages加载而来
    # LoadImages中存储了path(路径信息), im(经尺寸处理后的图片), im0(原图),vid_cap(视频参数),s是打印信息
    for path, im, im0s, vid_cap, s in dataset:
        
        with dt[0]:

            # torch.from_numpy转换成torch支持的格式,放入模型device中
            im = torch.from_numpy(im).to(model.device)

            # 判断是否版精度
            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32

            # 图片所有像素点归一化(简化计算,否则电脑受不了)
            im /= 255  # 0 - 255 to 0.0 - 1.0
            if len(im.shape) == 3:
                im = im[None]  # expand for batch dim 为batch扩充维度;(1,3,640,480)

        # Inference
        # 推理阶段，得到所有检测框pred信息 torch.size=[1,18900,85]
        # 以data/images输入的第一张bus.jpg为例子
        # 经上面处理图片是(1,3,640,480),经过8倍,16倍,32倍下采样
        # 从三个层级划分网格(可查yolov5网络结构)，每个网格又预测3个检测框，(80*60+40*30+20*15)*3=18900
        # 85 = [x,y,w,h,置信度，coco80个类别]
        with dt[1]:
            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
            pred = model(im, augment=augment, visualize=visualize)

        # NMS
        # 非极大值抑制
        # pred返回;最终保留了5个检测框，每个检测框包含6个信息：[xmin,ymin,xmax,ymax,置信度p,类别c]
        with dt[2]:
            pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)

        # Second-stage classifier (optional)
        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)

        # Process predictions
        # i是每个batch的信息，处理到第几个物体
        # det代表每个检测框的信息，在这张图片中有5个检测框，且有6个信息[5,6]=[xmin,ymin,xmax,ymax,置信度p,类别c]
        for i, det in enumerate(pred):  # per image
            seen += 1 # 计数，来遍历所有图片

            # webcam = True
            if webcam:  # batch_size >= 1
                p, im0, frame = path[i], im0s[i].copy(), dataset.count
                s += f'{i}: '
            else:
                # p路径，im0原图，frame视频流的图片
                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
            p = Path(p)  # to Path

            # 检测图片存储路径
            save_path = str(save_dir / p.name)  # im.jpg

            # 坐标txt存储路径
            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt

            # s图片尺寸信息
            s += '%gx%g ' % im.shape[2:]  # print string

            # 原图片宽和高信息
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

            # 当裁剪参数为真，将进行检测框裁剪为imc
            imc = im0.copy() if save_crop else im0  # for save_crop

            # Annotator绘图工具(原图，线条尺寸，类别名)
            annotator = Annotator(im0, line_width=line_thickness, example=str(names))

            # 通过判断det长度不为0，来开始画框
            if len(det):

                # Rescale boxes from img_size to im0 size
                # scale_boxes坐标映射，将检测框缩放回原图比例
                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                # 打印结果
                for c in det[:, 5].unique():
                    n = (det[:, 5] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                # 保存预测结果，图片，txt，裁剪信息
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        # 将xyxy格式转为xywh(中心点+宽高)格式，并归一化，转化为列表再保存
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        # line的形式是：save_conf为true，则line的形式是：类别 x_center y_center weight height 置信度
                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
                        # 写入对应txt文件夹
                        with open(f'{txt_path}.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    # 在原图上画框,剪裁目标框图片,可视化结果
                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class 类别
                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')
                        annotator.box_label(xyxy, label, color=colors(c, True)) # 绘制边框
                    # 在原图上画框，剪裁保存，保存在save_dir/crops下(单独保存)
                    if save_crop:
                        save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)

            # Stream results
            # im0 = 画好框的图片
            im0 = annotator.result()

            # viwe_img为True，显示图片
            if view_img:
                if platform.system() == 'Linux' and p not in windows:
                    windows.append(p)
                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)
                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond 暂停1秒

            # 保存相应结果
            # Save results (image with detections)
            if save_img:
                # 保存图片文件
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                # 保存视频或流文件
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release()  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)             # 视频帧速率 FPS
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # 获取视频帧宽度
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # 获取视频帧高度
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                        save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos
                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
                    vid_writer[i].write(im0)

        # Print time (inference-only)
        # 打印推理耗时
        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")

    # Print results
    # 打印结果信息
    t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image 推理每张图片耗时
    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)
    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)


# 第三部分：parse_opt传参数 ##################################################################################################
# --------------------------------------------------------------------------------------------------------------------------
def parse_opt():
    parser = argparse.ArgumentParser()
    # 权重文件，Yolov5有五个默认权重文件,按照权重模型的小到大分别为yolov5n,yolov5s,yolov5m,yolov5l,yolov5x，精度逐渐增加，同时由于模型增大，相应推理时间会增大
    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path or triton URL')
    # 推理源地址，可以是单张图片，文件夹，摄像头地址等，详见最上面关于官方推理说明
    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)')
    # 数据集的yaml文件，不同数据集，相应的yaml文件也不同
    parser.add_argument('--data', type=str, default=ROOT / 'data/coco.yaml', help='(optional) dataset.yaml path')
    # 推理图片大小，32倍数
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
    # 置信度参数
    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
    # iou参数
    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
    # 每张图片最大检测框，1000个标记框出来
    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
    # 默认cpu计算，如果设置参数0，1，2，3等将启用GPU，0，1，2，3为电脑GPU显卡编号，同理，若想使用GPU需要配置GPU环境
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    # 是否可视化结果，store_true为动作，只有传入时才会触发此条件，如命令python detect.py --save-txt
    # 当然，也可以改为store_false,这样运行此参数就为真    # 下面的store_true原理相同
    parser.add_argument('--view-img', action='store_true', help='show results')
    # 是否保存检测框的坐标信息，以yolo格式的txt形式保存
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    # 是否保存置信度信息
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    # 是否保存裁剪信息，设置为真，则会把检测框的图片裁剪下来保存
    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
    # 是否保存含有框标记的图片文件
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    # 通过设置参数0，1，2等等，只进行某一类或者某几类的识别任务，如0在coco数据集中代表person,如设置成0，则将只识别人
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
    # 在执行目标检测任务时，可能对同一个目标进行多次检测，NMS是确保算法对每个对象得到一个检测框的方法，具体如何是实现可以去搜索对应的文章
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    # 是否使用使用数据增强的方式进行推理，官网指导为TTA
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    # 可视化特征图
    parser.add_argument('--visualize', action='store_true', help='visualize features')
    # 如果为True，则对所有模型进行strip_optimizer操作，去除pt文件中的优化器等信息，默认为False
    parser.add_argument('--update', action='store_true', help='update all models')
    # 推理文件存储路径
    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
    # 具体的存储文件夹路径，增量方式,exp,exp1...
    parser.add_argument('--name', default='exp', help='save results to project/name')
    # 模型输出是否保存在已有的文件夹下，如果设置该参数，每次运行的结果都会存储在原有的文件夹下
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    # 检测框线条的粗细
    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
    # 是否隐藏检测结果的标签
    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
    # 是否隐藏置信度
    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
    #  该参数是是否使用FP16半精度推理；
    #  在训练阶段，梯度更新往往是很微小的，需要相对较高的精度，一般要用FP32以上；
    #  在预测时，精度要求没有那么高，一般F16（半精度）就可以，甚至可以用INT8，精度影响不会很大；同时低精度的模型占用空间更小。有利于部署在嵌入式模型里面
    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
    # dnn;使用使用OpenCV DNN进行ONNX推理
    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
    # 视频流的步长
    parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
    opt = parser.parse_args()
    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand 判断图片尺寸的变量长度是否是2,最终将转化为[640,640]
    print_args(vars(opt))   # 打印所有opt中参数信息
    return opt              # opt存储了所有变量信息

# 第四部分：执行main(opt)，检查环境，执行run函数 #################################################################################
# --------------------------------------------------------------------------------------------------------------------------
def main(opt):
    check_requirements(exclude=('tensorboard', 'thop')) # 调用check_requirements,根据requirements.txt检查环境依赖
    # 运行run函数
    # 一个星（ * ）：表示接收的参数作为元组来处理
    # 两个星（ ** ）：表示接收的参数作为字典来处理
    # 将opt变量的属性和属性值作为关键字参数传递给run()函数。
    run(**vars(opt))

# 第二部分：程序入口 #############################################################################################################
# -----------------------------------------------------------------------------------------------------------------------------
if __name__ == '__main__':
    opt = parse_opt()   # 实例化parse_opt函数，将其参数赋予opt
    main(opt)           # 将opt送入main函数中，调用main()