【darknet源码解析-19】region_layer.h 和 region_layer.c 解析

最新推荐文章于 2021-01-14 15:02:07 发布

caicaiatnbu

最新推荐文章于 2021-01-14 15:02:07 发布

阅读量1.4k

点赞数

分类专栏： darknet源码解析

本文链接：https://blog.csdn.net/caicaiatnbu/article/details/102923953

版权

本系列为darknet源码解析，本次解析src/region_layer.h 与 src/region_layer.c 两个。region_layer主要完成了yolo v2最后一层13*13*125，是yolo v2这篇论文的核心部分。

在阅读本节源码之前，请先了解一下 13*13*125 是什么样子的逻辑存储形式，在物体存储是一维数组；以及yolov2中bbox的[x, y, w, h]是如何进行表示的，本节只解析了yolov2的训练阶段的源码，inference阶段未进行解析；配对的cfg文件为cfg/yolov2-voc.cfg

yolov2 对每个预测box的[x,y]，confidence进行逻辑回归，类别进行softmax回归；

region_layer.h 的定义如下：

#ifndef REORG_LAYER_H
#define REORG_LAYER_H

#include "image.h"
#include "cuda.h"
#include "layer.h"
#include "network.h"

// 构造yolov2 reorg层
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra);
void resize_reorg_layer(layer *l, int w, int h);
// yolov2 reorg层前向传播函数
void forward_reorg_layer(const layer l, network net);
// yolov2 reorg层反向传播函数
void backward_reorg_layer(const layer l, network net);

#ifdef GPU
void forward_reorg_layer_gpu(layer l, network net);
void backward_reorg_layer_gpu(layer l, network net);
#endif

#endif

region_layer.c 详细解释如下：

//
// Created by wdong on 19-11-7.
//


#include "region_layer.h"
#include "activations.h"
#include "blas.h"
#include "box.h"
#include "cuda.h"
#include "utils.h"

#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>

/**
 * yolov2中的region层
 * @param batch 一个batch中包含图片的张数
 * @param w 输入特征图的宽度
 * @param h 输入特征图的高度
 * @param n 一个cell预测多少个bbox
 * @param classes 网络需要识别的物体类数
 * @param coords 一个bbox包含的[x, y, w, h]
 * @return
 */
layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
{
    layer l = {0};
    l.type = REGION; // 层类别

    l.n = n; // 一个cell预测多少个bbox
    l.batch = batch; // 一个batch中包含图片的张数
    l.h = h; // 输入图片的宽度
    l.w = w; // 输入图片的宽度
    l.c = n*(classes + coords + 1); // 输入图片的通道数 n*(20 + 4 + 1)
    l.out_w = l.w; // 输出图片的宽度
    l.out_h = l.h; // 输出图片的高度
    l.out_c = l.c; // 输出图片的通道数
    l.classes = classes; // 网络需要识别的物体类数
    l.coords = coords; // 一个bbox包含的[x, y, w, h]
    l.cost = calloc(1, sizeof(float)); // region层的总损失
    l.biases = calloc(n*2, sizeof(float)); // 存储bbox的Anchor box的[w,h]
    l.bias_updates = calloc(n*2, sizeof(float)); //存储bbox的Anchor box的[w,h]的更新值
    l.outputs = h*w*n*(classes + coords + 1); // reorg层对应输入图片的输出元素个数，reorg层输入输出元素个数不发生变化
    l.inputs = l.outputs; // reorg层一张输入图片的元素个数
    l.truths = 30*(l.coords + 1); // GT: 30*(4+1) 存储30个bbox的信息，这里是假设图片中GT bbox的数量是小于30的，这里是写死的；此处与yolov1 处是不同的；
    l.delta = calloc(batch*l.outputs, sizeof(float)); // region层误差项（包含整个batch的）
    l.output = calloc(batch*l.outputs, sizeof(float)); // region层所有输出（包含整个batch的）
    int i;
    // 存储bbox的Anchor box的[w,h]的初始化,在src/parse.c中parse_yolo函数会加载cfg中Anchor尺寸
    for(i = 0; i < n*2; ++i){
        l.biases[i] = .5;
    }

    l.forward = forward_region_layer; // reorg层前向传播
    l.backward = backward_region_layer; // reorg层反向传播
#ifdef GPU
    l.forward_gpu = forward_region_layer_gpu;
    l.backward_gpu = backward_region_layer_gpu;
    l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
    l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
#endif

    fprintf(stderr, "detection\n");
    srand(0);

    return l;
}

void resize_region_layer(layer *l, int w, int h)
{
    l->w = w;
    l->h = h;

    l->outputs = h*w*l->n*(l->classes + l->coords + 1);
    l->inputs = l->outputs;

    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
    l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));

#ifdef GPU
    cuda_free(l->delta_gpu);
    cuda_free(l->output_gpu);

    l->delta_gpu =     cuda_make_array(l->delta, l->batch*l->outputs);
    l->output_gpu =    cuda_make_array(l->output, l->batch*l->outputs);
#endif
}

// get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
{
    box b;
    b.x = (i + x[index + 0*stride]) / w;
    b.y = (j + x[index + 1*stride]) / h;
    b.w = exp(x[index + 2*stride]) * biases[2*n]   / w;
    b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
    return b;
}


// float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1)
box float_to_box(float *f, int stride)
{
    box b = {0};
    b.x = f[0];
    b.y = f[1*stride];
    b.w = f[2*stride];
    b.h = f[3*stride];
    return b;
}

// delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h);
float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride)
{
    // 获得第j*w+i个cell第n个bbox在当前特征图上位置和宽高
    box pred = get_region_box(x, biases, n, index, i, j, w, h, stride);
    float iou = box_iou(pred, truth); // 计算pred bbox 与 GT bbox的IOU【前12800GT boox

最低0.47元/天解锁文章

caicaiatnbu

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
【darknet源码解析-19】region_layer.h 和 region_layer.c 解析

本系列为darknet源码解析，本次解析src/region_layer.h 与 src/region_layer.c 两个。region_layer主要完成了yolo v2最后一层13*13*125，是yolo v2这篇论文的核心部分。在阅读本节源码之前，请先了解一下 13*13*125 是什么样子的逻辑存储形式，在物体存储是一维数组；以及yolov2中bbox的[x, y, w, h]是如...
复制链接

扫一扫

专栏目录