本系列为darknet源码解析,本次解析src/region_layer.h 与 src/region_layer.c 两个。region_layer主要完成了yolo v2最后一层13*13*125,是yolo v2这篇论文的核心部分。
在阅读本节源码之前,请先了解一下 13*13*125 是什么样子的逻辑存储形式,在物体存储是一维数组;以及yolov2中bbox的[x, y, w, h]是如何进行表示的,本节只解析了yolov2的训练阶段的源码,inference阶段未进行解析;配对的cfg文件为cfg/yolov2-voc.cfg
yolov2 对每个预测box的[x,y],confidence进行逻辑回归,类别进行softmax回归;
region_layer.h 的定义如下:
#ifndef REORG_LAYER_H
#define REORG_LAYER_H
#include "image.h"
#include "cuda.h"
#include "layer.h"
#include "network.h"
// 构造yolov2 reorg层
layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse, int flatten, int extra);
void resize_reorg_layer(layer *l, int w, int h);
// yolov2 reorg层前向传播函数
void forward_reorg_layer(const layer l, network net);
// yolov2 reorg层反向传播函数
void backward_reorg_layer(const layer l, network net);
#ifdef GPU
void forward_reorg_layer_gpu(layer l, network net);
void backward_reorg_layer_gpu(layer l, network net);
#endif
#endif
region_layer.c 详细解释如下:
//
// Created by wdong on 19-11-7.
//
#include "region_layer.h"
#include "activations.h"
#include "blas.h"
#include "box.h"
#include "cuda.h"
#include "utils.h"
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
/**
* yolov2中的region层
* @param batch 一个batch中包含图片的张数
* @param w 输入特征图的宽度
* @param h 输入特征图的高度
* @param n 一个cell预测多少个bbox
* @param classes 网络需要识别的物体类数
* @param coords 一个bbox包含的[x, y, w, h]
* @return
*/
layer make_region_layer(int batch, int w, int h, int n, int classes, int coords)
{
layer l = {0};
l.type = REGION; // 层类别
l.n = n; // 一个cell预测多少个bbox
l.batch = batch; // 一个batch中包含图片的张数
l.h = h; // 输入图片的宽度
l.w = w; // 输入图片的宽度
l.c = n*(classes + coords + 1); // 输入图片的通道数 n*(20 + 4 + 1)
l.out_w = l.w; // 输出图片的宽度
l.out_h = l.h; // 输出图片的高度
l.out_c = l.c; // 输出图片的通道数
l.classes = classes; // 网络需要识别的物体类数
l.coords = coords; // 一个bbox包含的[x, y, w, h]
l.cost = calloc(1, sizeof(float)); // region层的总损失
l.biases = calloc(n*2, sizeof(float)); // 存储bbox的Anchor box的[w,h]
l.bias_updates = calloc(n*2, sizeof(float)); //存储bbox的Anchor box的[w,h]的更新值
l.outputs = h*w*n*(classes + coords + 1); // reorg层对应输入图片的输出元素个数,reorg层输入输出元素个数不发生变化
l.inputs = l.outputs; // reorg层一张输入图片的元素个数
l.truths = 30*(l.coords + 1); // GT: 30*(4+1) 存储30个bbox的信息,这里是假设图片中GT bbox的数量是小于30的,这里是写死的;此处与yolov1 处是不同的;
l.delta = calloc(batch*l.outputs, sizeof(float)); // region层误差项(包含整个batch的)
l.output = calloc(batch*l.outputs, sizeof(float)); // region层所有输出(包含整个batch的)
int i;
// 存储bbox的Anchor box的[w,h]的初始化,在src/parse.c中parse_yolo函数会加载cfg中Anchor尺寸
for(i = 0; i < n*2; ++i){
l.biases[i] = .5;
}
l.forward = forward_region_layer; // reorg层前向传播
l.backward = backward_region_layer; // reorg层反向传播
#ifdef GPU
l.forward_gpu = forward_region_layer_gpu;
l.backward_gpu = backward_region_layer_gpu;
l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
#endif
fprintf(stderr, "detection\n");
srand(0);
return l;
}
void resize_region_layer(layer *l, int w, int h)
{
l->w = w;
l->h = h;
l->outputs = h*w*l->n*(l->classes + l->coords + 1);
l->inputs = l->outputs;
l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
#ifdef GPU
cuda_free(l->delta_gpu);
cuda_free(l->output_gpu);
l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
#endif
}
// get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h, int stride)
{
box b;
b.x = (i + x[index + 0*stride]) / w;
b.y = (j + x[index + 1*stride]) / h;
b.w = exp(x[index + 2*stride]) * biases[2*n] / w;
b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
return b;
}
// float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1)
box float_to_box(float *f, int stride)
{
box b = {0};
b.x = f[0];
b.y = f[1*stride];
b.w = f[2*stride];
b.h = f[3*stride];
return b;
}
// delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h);
float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale, int stride)
{
// 获得第j*w+i个cell第n个bbox在当前特征图上位置和宽高
box pred = get_region_box(x, biases, n, index, i, j, w, h, stride);
float iou = box_iou(pred, truth); // 计算pred bbox 与 GT bbox的IOU【前12800GT boox为当前cell第n个bbox的Anchor】
float tx = (truth.x*w - i); // 计算GT bbox的tx,ty,tw,th
float ty = (truth.y*h - j);
float tw = log(truth.w*w / biases[2*n]);
float th = log(truth.h*h / biases[2*n + 1]);
delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); // 计算tx,ty,tw,th梯度
delta[index + 1*stride] = scale * (ty - x[index + 1*stride]);
delta[index + 2*stride] = scale * (tw - x[index + 2*stride]);
delta[index + 3*stride] = scale * (th - x[index + 3*stride]);
return iou;
}
void delta_region_mask(float *truth, float *x, int n, int index, float *delta, int stride, int scale)
{
int i;
for(i = 0; i < n; ++i){
delta[index + i*stride] = scale*(truth[i] - x[index + i*stride]);
}
}
// delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax);
void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat, int tag)
{
int i, n;
if(hier){ // 在yolov2 中region层, 此部分不参与计算【这是在yolo9000才参与计算】
float pred = 1;
while(class >= 0){
pred *= output[index + stride*class];
int g = hier->group[class];
int offset = hier->group_offset[g];
for(i = 0; i < hier->group_size[g]; ++i){
delta[index + stride*(offset + i)] = scale * (0 - output[index + stride*(offset + i)]);
}
delta[index + stride*class] = scale * (1 - output[index + stride*class]);
class = hier->parent[class];
}
*avg_cat += pred;
} else {
if (delta[index] && tag){ //yolo v2中softmax = 1, 所以此处tag=0, 不参与计算
delta[index + stride*class] = scale * (1 - output[index + stride*class]);
return;
}
for(n = 0; n < classes; ++n){
// 计算类别损失的梯度, 反向传递到误差项l.delta中, 在yolo v2中scale=1
delta[index + stride*n] = scale * (((n == class)?1 : 0) - output[index + stride*n]);
if(n == class) *avg_cat += output[index + stride*n]; // 统计正确的得分
}
}
}
float logit(float x)
{
return log(x/(1.-x));
}
float tisnan(float x)
{
return (x != x);
}
// int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
int entry_index(layer l, int batch, int location, int entry)
{
int n = location / (l.w*l.h);
int loc = location % (l.w*l.h);
return batch*l.outputs + n*l.w*l.h*(l.coords+l.classes+1) + entry*l.w*l.h + loc;
}
/**
* region层前向传播函数
* @param l 当前region层
* @param net 整个网络
*/
void forward_region_layer(const layer l, network net)
{
int i,j,b,t,n;
// 内存拷贝, l.output = net.input
memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
#ifndef GPU
for (b = 0; b < l.batch; ++b){
for(n = 0; n < l.n; ++n){
int index = entry_index(l, b, n*l.w*l.h, 0);
activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); // 对[x,y]进行逻辑回归
index = entry_index(l, b, n*l.w*l.h, l.coords);
// l.background = 0, 对confidence进行逻辑回归
if(!l.background) activate_array(l.output + index, l.w*l.h, LOGISTIC);
index = entry_index(l, b, n*l.w*l.h, l.coords + 1);
if(!l.softmax && !l.softmax_tree) activate_array(l.output + index, l.classes*l.w*l.h, LOGISTIC);
}
}
if (l.softmax_tree){
int i;
int count = l.coords + 1;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count);
count += group_size;
}
} else if (l.softmax){ // l.softmax 对class进行softmax回归
int index = entry_index(l, 0, 0, l.coords + !l.background);
softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index);
}
#endif
memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); // 将reorg层的误差项进行初始化(包含整个batch的)
if(!net.train) return; // inference阶段,则到此结束
float avg_iou = 0;
float recall = 0;
float avg_cat = 0;
float avg_obj = 0;
float avg_anyobj = 0;
int count = 0;
int class_count = 0;
*(l.cost) = 0; // // region层的总损失初始化为0
for (b = 0; b < l.batch; ++b) { // 遍历batch中每一张图片
if(l.softmax_tree){ //【这是在yolo9000才参与计算】
int onlyclass = 0;
for(t = 0; t < 30; ++t){
box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1);
if(!truth.x) break;
int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords];
float maxp = 0;
int maxi = 0;
if(truth.x > 100000 && truth.y > 100000){
for(n = 0; n < l.n*l.w*l.h; ++n){
int class_index = entry_index(l, b, n, l.coords + 1);
int obj_index = entry_index(l, b, n, l.coords);
float scale = l.output[obj_index];
l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]);
float p = scale*get_hierarchy_probability(l.output + class_index, l.softmax_tree, class, l.w*l.h);
if(p > maxp){
maxp = p;
maxi = n;
}
}
int class_index = entry_index(l, b, maxi, l.coords + 1);
int obj_index = entry_index(l, b, maxi, l.coords);
delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax);
if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]);
else l.delta[obj_index] = 0;
l.delta[obj_index] = 0;
++class_count;
onlyclass = 1;
break;
}
}
if(onlyclass) continue;
} // if(l.softmax_tree)
for (j = 0; j < l.h; ++j) {
for (i = 0; i < l.w; ++i) { // 遍历每个cell, 当前cell编号为[j, i]
for (n = 0; n < l.n; ++n) { // 遍历每个bbox,当前bbox编号为[n]
int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); // 获得第j*w+i个cell第n个bbox的index
// 获得第j*w+i个cell第n个bbox在当前特征图上位置和宽高
box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
float best_iou = 0; // 保存最大IOU
for(t = 0; t < 30; ++t){ // 遍历每一个GT bbox
// 将第t个bbox由float数组转bbox结构体,方便计算IOU
box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1);
if(!truth.x) break; // 如果x坐标为0则取消, 因为yolov2这里定义了30 bbox, 可能实际上没有bbox
float iou = box_iou(pred, truth); // 计算pred与第t个GT之间的IOU
if (iou > best_iou) {
best_iou = iou; // 最大IOU更新
}
}
int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, l.coords); // 获得第j*w+i个cell第n个bbox的confidence的index
avg_anyobj += l.output[obj_index]; // 统计有目标的概率
// 与yolov1相似, 先将所有pred bbox都当做noobject,计算其confidence损失梯度
l.delta[obj_index] = l.noobject_scale * (0 - l.output[obj_index]);
if(l.background) l.delta[obj_index] = l.noobject_scale * (1 - l.output[obj_index]); // 在yolov2中并没有执行
if (best_iou > l.thresh) { // best_iou大于阈值则说明有object, 在yolo v2中阈值为0.6
l.delta[obj_index] = 0;
}
if(*(net.seen) < 12800){ // net.seen 保存当前是训练第多少张图片
// 对于训练阶段的前12800张图片,GT bbox 直接用了anchor box
box truth = {0}; // 计算第[j, i]cell, 第n个bbox的anchor bbox
truth.x = (i + .5)/l.w; // +0.5是因为x位于几何中心, 然后计算x相对整张特征图的位置
truth.y = (j + .5)/l.h;
truth.w = l.biases[2*n]/l.w;
truth.h = l.biases[2*n+1]/l.h;
// 将pred bbox的tx,ty,tw,th和上面的truth box的差值反向传递到l.detla
delta_region_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, l.delta, .01, l.w*l.h);
}
}
}
}
for(t = 0; t < 30; ++t){ // 遍历每一个GT bbox
// 将第t个bbox由float数组转bbox结构体,方便计算IOU
box truth = float_to_box(net.truth + t*(l.coords + 1) + b*l.truths, 1);
if(!truth.x) break; // 如果x坐标为0则取消, 因为yolov2这里定义了30 bbox, 可能实际上没有bbox
float best_iou = 0; // 保存最大IOU
int best_n = 0; // 保存最大IOU的bbox index
i = (truth.x * l.w); // 获得当前第t个GT bbox所在cell
j = (truth.y * l.h);
box truth_shift = truth;
truth_shift.x = 0; // 将truth_shift的box移动到0,0
truth_shift.y = 0;
for(n = 0; n < l.n; ++n){ // 遍历cell[j,i]所在的n个预测bbox
int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); // 获得第j*w+i个cell第n个bbox的index
// 获得第j*w+i个cell第n个bbox在当前特征图上位置和宽高
box pred = get_region_box(l.output, l.biases, n, box_index, i, j, l.w, l.h, l.w*l.h);
if(l.bias_match){ // yolov2 reorg层 bias_match = 1
pred.w = l.biases[2*n]/l.w; // 然后计算pred box的w相对整张特征图的位置
pred.h = l.biases[2*n+1]/l.h; // 然后计算pred box的h相对整张特征图的位置
}
pred.x = 0; // 将预测的bbox移动到0,0
pred.y = 0;
float iou = box_iou(pred, truth_shift); // 计算GT box truth_shift 与 预测bbox pred 二者之间的IOU
if (iou > best_iou){
best_iou = iou; // 记录IOU最大的IOU
best_n = n; // 以及记录该bbox的编号n
}
}
int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0); // 获得best_iou对应bbox的index
// 计算获得best_iou的pred bbox 与 GT bbox之间的真实iou, 之前best_iou是方便计算,以及加速,
// 同时完成坐标损失的反向传递
float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h);
if(l.coords > 4){
int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4);
delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale);
}
// 如果iou大于0.5, recall ++;
if(iou > .5) recall += 1;
avg_iou += iou;
// 获得best_iou对应bbox的confidence的index
int obj_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords);
avg_obj += l.output[obj_index]; // 统计有目标的概率
// 与yolov1相似, 该pred bbox中是有object,计算其confidence损失梯度; object_scale = 5
l.delta[obj_index] = l.object_scale * (1 - l.output[obj_index]); // 实际上是没有执行, 因为被下面一句覆盖了
if (l.rescore) { // yolov2 reorg层中rescore = 1, 参于计算
// 该pred bbox中是有object,计算其confidence损失梯度的方法发生变化; object_scale = 5,
l.delta[obj_index] = l.object_scale * (iou - l.output[obj_index]);
}
if(l.background){ // yolov2 reorg层中background = 0, 不参与计算
l.delta[obj_index] = l.object_scale * (0 - l.output[obj_index]);
}
// 获取GT bbox的类别信息
int class = net.truth[t*(l.coords + 1) + b*l.truths + l.coords];
if (l.map) class = l.map[class]; // yolov2 reorg层中map = 0, 不参与计算 【这是在yolo9000才参与计算】
// 获取best_iou对应bbox的class的index
int class_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, l.coords + 1);
// // 与yolov1相似, 该pred bbox中是有object,计算其class损失梯度; class_scale = 1
delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat, !l.softmax);
++count; // 正样本个数+1
++class_count; // emmmm, 跟count的含义貌似一样啊
}
}
// 计算均方误差
*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count);
}
/**
* yolov2 reorg层反向传播函数
* @param l 当前reorg层
* @param net 整个网络
* 说明:其实这部分其实作者已经写入到前向传播函数中了
*/
void backward_region_layer(const layer l, network net)
{
/*
int b;
int size = l.coords + l.classes + 1;
for (b = 0; b < l.batch*l.n; ++b){
int index = (b*size + 4)*l.w*l.h;
gradient_array(l.output + index, l.w*l.h, LOGISTIC, l.delta + index);
}
axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1);
*/
}
void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative)
{
int i;
int new_w=0;
int new_h=0;
if (((float)netw/w) < ((float)neth/h)) {
new_w = netw;
new_h = (h * netw)/w;
} else {
new_h = neth;
new_w = (w * neth)/h;
}
for (i = 0; i < n; ++i){
box b = dets[i].bbox;
b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw);
b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth);
b.w *= (float)netw/new_w;
b.h *= (float)neth/new_h;
if(!relative){
b.x *= w;
b.w *= w;
b.y *= h;
b.h *= h;
}
dets[i].bbox = b;
}
}
void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets)
{
int i,j,n,z;
float *predictions = l.output;
if (l.batch == 2) {
float *flip = l.output + l.outputs;
for (j = 0; j < l.h; ++j) {
for (i = 0; i < l.w/2; ++i) {
for (n = 0; n < l.n; ++n) {
for(z = 0; z < l.classes + l.coords + 1; ++z){
int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i;
int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1);
float swap = flip[i1];
flip[i1] = flip[i2];
flip[i2] = swap;
if(z == 0){
flip[i1] = -flip[i1];
flip[i2] = -flip[i2];
}
}
}
}
}
for(i = 0; i < l.outputs; ++i){
l.output[i] = (l.output[i] + flip[i])/2.;
}
}
for (i = 0; i < l.w*l.h; ++i){
int row = i / l.w;
int col = i % l.w;
for(n = 0; n < l.n; ++n){
int index = n*l.w*l.h + i;
for(j = 0; j < l.classes; ++j){
dets[index].prob[j] = 0;
}
int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords);
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4);
float scale = l.background ? 1 : predictions[obj_index];
dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h);
dets[index].objectness = scale > thresh ? scale : 0;
if(dets[index].mask){
for(j = 0; j < l.coords - 4; ++j){
dets[index].mask[j] = l.output[mask_index + j*l.w*l.h];
}
}
int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background);
if(l.softmax_tree){
hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h);
if(map){
for(j = 0; j < 200; ++j){
int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]);
float prob = scale*predictions[class_index];
dets[index].prob[j] = (prob > thresh) ? prob : 0;
}
} else {
int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h);
dets[index].prob[j] = (scale > thresh) ? scale : 0;
}
} else {
if(dets[index].objectness){
for(j = 0; j < l.classes; ++j){
int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j);
float prob = scale*predictions[class_index];
dets[index].prob[j] = (prob > thresh) ? prob : 0;
}
}
}
}
}
correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative);
}
#ifdef GPU
void forward_region_layer_gpu(const layer l, network net)
{
copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1);
int b, n;
for (b = 0; b < l.batch; ++b){
for(n = 0; n < l.n; ++n){
int index = entry_index(l, b, n*l.w*l.h, 0);
activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC);
if(l.coords > 4){
index = entry_index(l, b, n*l.w*l.h, 4);
activate_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC);
}
index = entry_index(l, b, n*l.w*l.h, l.coords);
if(!l.background) activate_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC);
index = entry_index(l, b, n*l.w*l.h, l.coords + 1);
if(!l.softmax && !l.softmax_tree) activate_array_gpu(l.output_gpu + index, l.classes*l.w*l.h, LOGISTIC);
}
}
if (l.softmax_tree){
int index = entry_index(l, 0, 0, l.coords + 1);
softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree);
} else if (l.softmax) {
int index = entry_index(l, 0, 0, l.coords + !l.background);
softmax_gpu(net.input_gpu + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
}
if(!net.train || l.onlyforward){
cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
return;
}
cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs);
forward_region_layer(l, net);
//cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
if(!net.train) return;
cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);
}
void backward_region_layer_gpu(const layer l, network net)
{
int b, n;
for (b = 0; b < l.batch; ++b){
for(n = 0; n < l.n; ++n){
int index = entry_index(l, b, n*l.w*l.h, 0);
gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index);
if(l.coords > 4){
index = entry_index(l, b, n*l.w*l.h, 4);
gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index);
}
index = entry_index(l, b, n*l.w*l.h, l.coords);
if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index);
}
}
axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
}
#endif
void zero_objectness(layer l)
{
int i, n;
for (i = 0; i < l.w*l.h; ++i){
for(n = 0; n < l.n; ++n){
int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords);
l.output[obj_index] = 0;
}
}
}
完,