yolo v3 的 yolo 层计算代码解析

最新推荐文章于 2024-09-04 00:10:59 发布

hfq0219

最新推荐文章于 2024-09-04 00:10:59 发布

阅读量6k

点赞数 7

分类专栏：图像处理神经网络机器学习 yolo

本文链接：https://blog.csdn.net/hfq0219/article/details/90141698

版权

本文深入解析 Yolo v3 的 [yolo] 层，从初始化到前向传播、误差反向传播，详述预测边界框计算和误差计算过程，探讨如何筛选置信度超过阈值的预测框。

摘要由CSDN通过智能技术生成

文章目录

1. [yolo] 层
2. 主要函数
3. 结尾

1. [yolo] 层

yolo v3 用 [yolo] 层来计算损失函数以及预测分类和边界框回归，前面经过 darknet-53 的基础网络提取特征，又经过一些特征融合，就得到了3个不同尺度的 yolo 层，分别预测大、中、小物体。主要代码在 ./src/yolo_layer.c。

[yolo]
mask = 0,1,2 #该层预测哪个规模的框，0,1,2表示预测小物体
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326 #锚盒，聚类算法得到
classes=80 #Coco 80种类别
num=9 #锚盒对数
jitter=.3 #数据增强
ignore_thresh = .7 #正样本阈值
truth_thresh = 1 #完全预测正确样本
random=1 #随机调整训练网络大小，从 320x320 ~ 608x608

2. 主要函数

--layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes);
--void resize_yolo_layer(layer *l, int w, int h);
--box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, --int stride);
--float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int ----lh, int w, int h, float *delta, float scale, int stride);
--void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat);
--static int entry_index(layer l, int batch, int location, int entry);
--void forward_yolo_layer(const layer l, network net);
--void backward_yolo_layer(const layer l, network net);
--void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative);
--int yolo_num_detections(layer l, float thresh);
--void avg_flipped_yolo(layer l);
--int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets);

① `make_yolo_layer` 完成 yolo 层初始化操作

layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes)
{
   
    int i;
    layer l = {
   0}; //定义一个层
    l.type = YOLO; //层类型设为 YOLO

    l.n = n; //该层每个 grid 预测的框的个数，yolov3.cfg 为3
    l.total = total; //总 anchors 的对数，yolov3.cfg 为9
    l.batch = batch;
    l.h = h;
    l.w = w;
    l.c = n*(classes + 4 + 1); //输入和输出相等，yolo 层是最后一层，不需要把输出传递到下一层。每个 grid 预测 n 个 box，预测 x,y,w,h,置信度和80种类别
    l.out_w = l.w;
    l.out_h = l.h;
    l.out_c = l.c;
    l.classes = classes;
    l.cost = calloc(1, sizeof(float)); //误差代价分配空间
    l.biases = calloc(total*2, sizeof(float)); //保存 anchor 的大小
    if(mask) l.mask = mask; //l.mask 里保存了 [yolo] 配置里 “mask = 0,1,2” 的数值
    else{
   
        l.mask = calloc(n, sizeof(int));
        for(i = 0; i < n; ++i){
   
            l.mask[i] = i;
        }
    }
    l.bias_updates = calloc(n*2, sizeof(float));
    l.outputs = h*w*n*(classes + 4 + 1);
    l.inputs = l.outputs;
    l.truths = 90*(4 + 1); //每张图片最多保存90个标签
    l.delta = calloc(batch*l.outputs, sizeof(float)); //反向传播误差导数
    l.output = calloc(batch*l.outputs, sizeof(float)); //预测结果
    for(i = 0; i < total*2; ++i){
   
        l.biases[i] = .5; //如果未指定 anchors，默认设置为0.5，否则在 ./src/parser.c 里会把 l.biases 的值设为 anchors 的大小
    }

    l.forward = forward_yolo_layer; //函数指针，前向和后向计算
    l.backward = backward_yolo_layer;
#ifdef GPU //cuda 加速
    l.forward_gpu = forward_yolo_layer_gpu;
    l.backward_gpu = backward_yolo_layer_gpu;
    l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
    l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs)