这篇博客主要写multibox_loss_layer,multibox_loss_layer也是SSD比较关键内容,主要包括内建了两个layer进行loss回归,还包括比如FindMatches,MineHardExamples,EncodeLocPrediction && EncodeConfPrediction等都是比较重要的函数(其中有一部分在bbox_util中,后面会介绍)
代码:
-
#include <algorithm>
-
#include <map>
-
#include <utility>
-
#include <vector>
-
-
#include “caffe/layers/multibox_loss_layer.hpp”
-
#include “caffe/util/math_functions.hpp”
-
-
namespace caffe {
-
-
// layer setup,在这个函数里面还分别新建了两个layer用于loc回归和conf loss的计算
-
template <
typename Dtype>
-
void MultiBoxLossLayer<Dtype>::LayerSetUp(
const
vector<Blob<Dtype>*>& bottom,
-
const
vector<Blob<Dtype>*>& top) {
-
LossLayer<Dtype>::LayerSetUp(bottom, top);
-
if (
this->layer_param_.propagate_down_size() ==
0) {
-
this->layer_param_.add_propagate_down(
true);
// 定位
-
this->layer_param_.add_propagate_down(
true);
// 分类得分
-
this->layer_param_.add_propagate_down(
false);
// prior
-
this->layer_param_.add_propagate_down(
false);
// ground truth
-
}
-
const MultiBoxLossParameter& multibox_loss_param =
-
this->layer_param_.multibox_loss_param();
-
multibox_loss_param_ =
this->layer_param_.multibox_loss_param();
// 这句话多余吧?
-
-
num_ = bottom[
0]->num();
// batch size
-
num_priors_ = bottom[
2]->height() /
4;
// 先验的个数,每个先验包含左上角和右下角的点坐标
-
// Get other parameters.
-
CHECK(multibox_loss_param.has_num_classes()) <<
“Must provide num_classes.”;
-
num_classes_ = multibox_loss_param.num_classes();
// 类别个数
-
CHECK_GE(num_classes_,
1) <<
“num_classes should not be less than 1.”;
-
share_location_ = multibox_loss_param.share_location();
// 共享类别位置预测 default = true
-
loc_classes_ = share_location_ ?
1 : num_classes_;
// 如果shared表示所有的类别同用一个location prediction,否则每一类各自预测。还不是很懂这样做的原因
-
background_label_id_ = multibox_loss_param.background_label_id();
// background的id
-
use_difficult_gt_ = multibox_loss_param.use_difficult_gt();
// 是否使用difficutlt的ground truth,这个具体是什么还有待考虑
-
mining_type_ = multibox_loss_param.mining_type();
// 这里跟老版SSD代码有些许不同
-
if (multibox_loss_param.has_do_neg_mining()) {
-
LOG(WARNING) <<
“do_neg_mining is deprecated, use mining_type instead.”;
-
do_neg_mining_ = multibox_loss_param.do_neg_mining();
// 难例挖掘 true
-
CHECK_EQ(do_neg_mining_,
-
mining_type_ != MultiBoxLossParameter_MiningType_NONE);
// MultiBoxLossParameter_MiningType_NONE变量?还不清楚具体的用法
-
}
-
do_neg_mining_ = mining_type_ != MultiBoxLossParameter_MiningType_NONE;
-
-
if (!
this->layer_param_.loss_param().has_normalization() &&
// loss normalization,出自LossParameter,默认VALID
-
this->layer_param_.loss_param().has_normalize()) {
-
normalization_ =
this->layer_param_.loss_param().normalize() ?
-
LossParameter_NormalizationMode_VALID :
-
LossParameter_NormalizationMode_BATCH_SIZE;
-
}
else {
-
normalization_ =
this->layer_param_.loss_param().normalization();
-
}
-
-
if (do_neg_mining_) {
-
CHECK(share_location_)
-
<<
“Currently only support negative mining if share_location is true.”;
-
}
-
-
vector<
int> loss_shape(
1,
1);
-
// Set up localization loss layer. // 定位loss
-
loc_weight_ = multibox_loss_param.loc_weight();
// loc weight 1.0
-
loc_loss_type_ = multibox_loss_param.loc_loss_type();
// loss 类型 SMOOTH_L1
-
// fake shape.
-
vector<
int> loc_shape(
1,
1);
// 1维
-
loc_shape.push_back(
4);
// 1,4
-
loc_pred_.Reshape(loc_shape);
// 1*2 [1,4]
-
loc_gt_.Reshape(loc_shape);
// [1,4]
-
loc_bottom_vec_.push_back(&loc_pred_);
// 存放前面的指针
-
loc_bottom_vec_.push_back(&loc_gt_);
// 存放gt的指针
-
loc_loss_.Reshape(loss_shape);
// location的loss [1,4]
-
loc_top_vec_.push_back(&loc_loss_);
// 存放top的指针
-
if (loc_loss_type_ == MultiBoxLossParameter_LocLossType_L2) {
// 新建一个层,实现对locationloss的计算
-
LayerParameter layer_param;
-
layer_param.set_name(
this->layer_param_.name() +
“_l2_loc”);
-
layer_param.set_type(
“EuclideanLoss”);
-
layer_param.add_loss_weight(loc_weight_);
-
loc_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);
-
loc_loss_layer_->SetUp(loc_bottom_vec_, loc_top_vec_);
-
}
else
if (loc_loss_type_ == MultiBoxLossParameter_LocLossType_SMOOTH_L1) {
// SMOOTH_L1,SSD是选这个
-
LayerParameter layer_param;
-
layer_param.set_name(
this->layer_param_.name() +
“_smooth_L1_loc”);
// mbox_loss_smooth_L1_loc
-
layer_param.set_type(
“SmoothL1Loss”);
-
layer_param.add_loss_weight(loc_weight_);
// 1.0
-
loc_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);
// 创建layer
-
loc_loss_layer_->SetUp(loc_bottom_vec_, loc_top_vec_);
//送入推断和gt,输出loc_loss,有一点不太清楚loc_bottom_vec_是两个地址,后面怎么弄?
-
}
else {
-
LOG(FATAL) <<
“Unknown localization loss type.”;
-
}
-
// Set up confidence loss layer.
-
// 新建一个层,实现的是对confidence loss的计算
-
conf_loss_type_ = multibox_loss_param.conf_loss_type();
// SOFTMAX
-
conf_bottom_vec_.push_back(&conf_pred_);
// conf_pred_ 是blob
-
conf_bottom_vec_.push_back(&conf_gt_);
// conf_gt_ 是blob
-
conf_loss_.Reshape(loss_shape);
// [1,4]
-
conf_top_vec_.push_back(&conf_loss_);
// 也是一维向量
-
if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_SOFTMAX) {
-
CHECK_GE(background_label_id_,
0)
-
<<
“background_label_id should be within [0, num_classes) for Softmax.”;
-
CHECK_LT(background_label_id_, num_classes_)
-
<<
“background_label_id should be within [0, num_classes) for Softmax.”;
-
LayerParameter layer_param;
-
layer_param.set_name(
this->layer_param_.name() +
“_softmax_conf”);
// mbox_loss_softmax_conf
-
layer_param.set_type(
“SoftmaxWithLoss”);
-
layer_param.add_loss_weight(Dtype(
1.));
// 1.0
-
layer_param.mutable_loss_param()->set_normalization(
-
LossParameter_NormalizationMode_NONE);
-
SoftmaxParameter* softmax_param = layer_param.mutable_softmax_param();
-
softmax_param->set_axis(
1);
-
// Fake reshape.
-
vector<
int> conf_shape(
1,
1);
-
conf_gt_.Reshape(conf_shape);
// [1]
-
conf_shape.push_back(num_classes_);
// 这两个参数没有用到
-
conf_pred_.Reshape(conf_shape);
-
conf_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);
-
conf_loss_layer_->SetUp(conf_bottom_vec_, conf_top_vec_);
-
}
else
if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_LOGISTIC) {
-
LayerParameter layer_param;
-
layer_param.set_name(
this->layer_param_.name() +
“_logistic_conf”);
-
layer_param.set_type(
“SigmoidCrossEntropyLoss”);
-
layer_param.add_loss_weight(Dtype(
1.));
-
// Fake reshape.
-
vector<
int> conf_shape(
1,
1);
-
conf_shape.push_back(num_classes_);
-
conf_gt_.Reshape(conf_shape);
-
conf_pred_.Reshape(conf_shape);
-
conf_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);
-
conf_loss_layer_->SetUp(conf_bottom_vec_, conf_top_vec_);
-
}
else {
-
LOG(FATAL) <<
“Unknown confidence loss type.”;
-
}
-
}
// layer setup 结束
-
-
template <
typename Dtype>
-
void MultiBoxLossLayer<Dtype>::Reshape(
const
vector<Blob<Dtype>*>& bottom,
-
const
vector<Blob<Dtype>*>& top) {
-
LossLayer<Dtype>::Reshape(bottom, top);
-
num_ = bottom[
0]->num();
// batch num
-
num_priors_ = bottom[
2]->height() /
4;
// 这里的blob维度还需要再仔细分析下
-
num_gt_ = bottom[
3]->height();
-
CHECK_EQ(bottom[
0]->num(), bottom[
1]->num());
-
CHECK_EQ(num_priors_ * loc_classes_ *
4, bottom[
0]->channels())
// loc_classes_共享是1,不共享就是classes数
-
<<
“Number of priors must match number of location predictions.”;
-
CHECK_EQ(num_priors_ * num_classes_, bottom[
1]->channels())
-
<<
“Number of priors must match number of confidence predictions.”;
-
}
-
-
// 预测loction bottom[0] dimension is [N*C*1*1],confidence bottom[1] dimension is [N*C*1*1]
-
// priors bottom[2] dimension is [N*1*2*W], gound truth bottom[3] dimension is [N*1*H*8]
-
template <
typename Dtype>
-
void MultiBoxLossLayer<Dtype>::Forward_cpu(
const
vector<Blob<Dtype>*>& bottom,
-
const
vector<Blob<Dtype>*>& top) {
-
const Dtype* loc_data = bottom[
0]->cpu_data();
-
const Dtype* conf_data = bottom[
1]->cpu_data();
-
const Dtype* prior_data = bottom[
2]->cpu_data();
-
const Dtype* gt_data = bottom[
3]->cpu_data();
-
// Retrieve all ground truth.
-
/*
-
message NormalizedBBox {
-
optional float xmin = 1;
-
optional float ymin = 2;
-
optional float xmax = 3;
-
optional float ymax = 4;
-
optional int32 label = 5;
-
optional bool difficult = 6;
-
optional float score = 7;
-
optional float size = 8;
-
}
-
*/
-
// Retrieve all ground truth.
-
map<
int,
vector<NormalizedBBox> > all_gt_bboxes;
//转化ground truth bounding box,存放在all_gt_bboxes
-
GetGroundTruth(gt_data, num_gt_, background_label_id_, use_difficult_gt_,
// background_label_id_=0,use_difficult_gt_=true
-
&all_gt_bboxes);
-
-
// Retrieve all prior bboxes. It is same within a batch since we assume all
-
// images in a batch are of same dimension.
-
// 把prior box 存入prior_bboxes,把variances存入prior_variances
-
vector<NormalizedBBox> prior_bboxes;
-
vector<
vector<
float> > prior_variances;
-
GetPriorBBoxes(prior_data, num_priors_, &prior_bboxes, &prior_variances);
-
-
-
// Retrieve all predictions.
-
vector<LabelBBox> all_loc_preds;
// map<int, vector<NormalizedBBox> > LabelBBox;
-
GetLocPredictions(loc_data, num_, num_priors_, loc_classes_, share_location_,
-
&all_loc_preds);
// 这里是把所有预测的box写入了all_loc_preds,这些box就是bottom[0],loc_data
-
-
// Find matches between source bboxes and ground truth bboxes.
-
vector<
map<
int,
vector<
float> > > all_match_overlaps;
-
FindMatches(all_loc_preds, all_gt_bboxes, prior_bboxes, prior_variances,
-
multibox_loss_param_, &all_match_overlaps, &all_match_indices_);
-
-
num_matches_ =
0;
-
int num_negs =
0;
-
// Sample hard negative (and positive) examples based on mining type.
-
MineHardExamples(*bottom[
1], all_loc_preds, all_gt_bboxes, prior_bboxes,
-
prior_variances, all_match_overlaps, multibox_loss_param_,
-
&num_matches_, &num_negs, &all_match_indices_,
-
&all_neg_indices_);
-
-
if (num_matches_ >=
1) {
-
// Form data to pass on to loc_loss_layer_.
-
vector<
int> loc_shape(
2);
-
loc_shape[
0] =
1;
-
loc_shape[
1] = num_matches_ *
4;
-
loc_pred_.Reshape(loc_shape);
// 地址已经存放进了loc_bottom_vec_
-
loc_gt_.Reshape(loc_shape);
-
Dtype* loc_pred_data = loc_pred_.mutable_cpu_data();
-
Dtype* loc_gt_data = loc_gt_.mutable_cpu_data();
-
EncodeLocPrediction(all_loc_preds, all_gt_bboxes, all_match_indices_,
-
prior_bboxes, prior_variances, multibox_loss_param_,
-
loc_pred_data, loc_gt_data);
-
loc_loss_layer_->Reshape(loc_bottom_vec_, loc_top_vec_);
-
loc_loss_layer_->Forward(loc_bottom_vec_, loc_top_vec_);
// 前向计算
-
}
else {
-
loc_loss_.mutable_cpu_data()[
0] =
0;
-
}
// 这里完成loc的loss前向计算
-
-
// Form data to pass on to conf_loss_layer_.
-
if (do_neg_mining_) {
// 计算positive和negative样本
-
num_conf_ = num_matches_ + num_negs;
-
}
else {
-
num_conf_ = num_ * num_priors_;
-
}
-
if (num_conf_ >=
1) {
-
// Reshape the confidence data.
-
vector<
int> conf_shape;
-
if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_SOFTMAX) {
// 选softmax
-
conf_shape.push_back(num_conf_);
-
conf_gt_.Reshape(conf_shape);
-
conf_shape.push_back(num_classes_);
-
conf_pred_.Reshape(conf_shape);
-
}
else
if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_LOGISTIC) {
-
conf_shape.push_back(
1);
-
conf_shape.push_back(num_conf_);
-
conf_shape.push_back(num_classes_);
-
conf_gt_.Reshape(conf_shape);
-
conf_pred_.Reshape(conf_shape);
-
}
else {
-
LOG(FATAL) <<
“Unknown confidence loss type.”;
-
}
-
if (!do_neg_mining_) {
-
// Consider all scores.
-
// Share data and diff with bottom[1].
-
CHECK_EQ(conf_pred_.count(), bottom[
1]->count());
-
conf_pred_.ShareData(*(bottom[
1]));
-
}
-
Dtype* conf_pred_data = conf_pred_.mutable_cpu_data();
-
Dtype* conf_gt_data = conf_gt_.mutable_cpu_data();
-
caffe_set(conf_gt_.count(), Dtype(background_label_id_), conf_gt_data);
-
EncodeConfPrediction(conf_data, num_, num_priors_, multibox_loss_param_,
-
all_match_indices_, all_neg_indices_, all_gt_bboxes,
-
conf_pred_data, conf_gt_data);
-
conf_loss_layer_->Reshape(conf_bottom_vec_, conf_top_vec_);
-
conf_loss_layer_->Forward(conf_bottom_vec_, conf_top_vec_);
-
}
else {
-
conf_loss_.mutable_cpu_data()[
0] =
0;
-
}
// 这里结束conf的loss计算
-
-
top[
0]->mutable_cpu_data()[
0] =
0;
-
if (
this->layer_param_.propagate_down(
0)) {
// true 正则化一下 loc_loss
-
Dtype normalizer = LossLayer<Dtype>::GetNormalizer(
-
normalization_, num_, num_priors_, num_matches_);
-
top[
0]->mutable_cpu_data()[
0] +=
-
loc_weight_ * loc_loss_.cpu_data()[
0] / normalizer;
-
}
-
if (
this->layer_param_.propagate_down(
1)) {
// true conf_loss
-
Dtype normalizer = LossLayer<Dtype>::GetNormalizer(
-
normalization_, num_, num_priors_, num_matches_);
-
top[
0]->mutable_cpu_data()[
0] += conf_loss_.cpu_data()[
0] / normalizer;
-
}
-
}
// 结束Forward计算
-
-
template <
typename Dtype>
-
void MultiBoxLossLayer<Dtype>::Backward_cpu(
const
vector<Blob<Dtype>*>& top,
-
const
vector<
bool>& propagate_down,
-
const
vector<Blob<Dtype>*>& bottom) {
-
-
if (propagate_down[
2]) {
-
LOG(FATAL) <<
this->type()
-
<<
” Layer cannot backpropagate to prior inputs.”;
-
}
-
if (propagate_down[
3]) {
-
LOG(FATAL) <<
this->type()
-
<<
” Layer cannot backpropagate to label inputs.”;
-
}
-
-
// Back propagate on location prediction.
-
if (propagate_down[
0]) {
// 先回传 loc_loss
-
Dtype* loc_bottom_diff = bottom[
0]->mutable_cpu_diff();
-
caffe_set(bottom[
0]->count(), Dtype(
0), loc_bottom_diff);
-
if (num_matches_ >=
1) {
-
vector<
bool> loc_propagate_down;
-
// Only back propagate on prediction, not ground truth.
-
loc_propagate_down.push_back(
true);
-
loc_propagate_down.push_back(
false);
-
loc_loss_layer_->Backward(loc_top_vec_, loc_propagate_down,
-
loc_bottom_vec_);
-
// Scale gradient.
-
Dtype normalizer = LossLayer<Dtype>::GetNormalizer(
-
normalization_, num_, num_priors_, num_matches_);
-
Dtype loss_weight = top[
0]->cpu_diff()[
0] / normalizer;
-
caffe_scal(loc_pred_.count(), loss_weight, loc_pred_.mutable_cpu_diff());
-
// Copy gradient back to bottom[0].
-
const Dtype* loc_pred_diff = loc_pred_.cpu_diff();
-
int count =
0;
-
for (
int i =
0; i < num_; ++i) {
-
for (
map<
int,
vector<
int> >::iterator it =
-
all_match_indices_[i].begin();
-
it != all_match_indices_[i].end(); ++it) {
-
const
int label = share_location_ ?
0 : it->first;
-
const
vector<
int>& match_index = it->second;
-
for (
int j =
0; j < match_index.size(); ++j) {
-
if (match_index[j] <=
-1) {
-
continue;
-
}
-
// Copy the diff to the right place.
-
int start_idx = loc_classes_ *
4 * j + label *
4;
-
caffe_copy<Dtype>(
4, loc_pred_diff + count *
4,
-
loc_bottom_diff + start_idx);
-
++count;
-
}
-
}
-
loc_bottom_diff += bottom[
0]->offset(
1);
-
}
-
}
-
}
-
-
// Back propagate on confidence prediction.
-
if (propagate_down[
1]) {
-
Dtype* conf_bottom_diff = bottom[
1]->mutable_cpu_diff();
-
caffe_set(bottom[
1]->count(), Dtype(
0), conf_bottom_diff);
-
if (num_conf_ >=
1) {
-
vector<
bool> conf_propagate_down;
-
// Only back propagate on prediction, not ground truth.
-
conf_propagate_down.push_back(
true);
-
conf_propagate_down.push_back(
false);
-
conf_loss_layer_->Backward(conf_top_vec_, conf_propagate_down,
-
conf_bottom_vec_);
-
// Scale gradient.
-
Dtype normalizer = LossLayer<Dtype>::GetNormalizer(
-
normalization_, num_, num_priors_, num_matches_);
-
Dtype loss_weight = top[
0]->cpu_diff()[
0] / normalizer;
-
caffe_scal(conf_pred_.count(), loss_weight,
-
conf_pred_.mutable_cpu_diff());
-
// Copy gradient back to bottom[1].
-
const Dtype* conf_pred_diff = conf_pred_.cpu_diff();
-
if (do_neg_mining_) {
-
int count =
0;
-
for (
int i =
0; i < num_; ++i) {
-
// Copy matched (positive) bboxes scores’ diff.
-
const
map<
int,
vector<
int> >& match_indices = all_match_indices_[i];
-
for (
map<
int,
vector<
int> >::const_iterator it =
-
match_indices.begin(); it != match_indices.end(); ++it) {
-
const
vector<
int>& match_index = it->second;
-
CHECK_EQ(match_index.size(), num_priors_);
-
for (
int j =
0; j < num_priors_; ++j) {
-
if (match_index[j] <=
-1) {
-
continue;
-
}
-
// Copy the diff to the right place.
-
caffe_copy<Dtype>(num_classes_,
-
conf_pred_diff + count * num_classes_,
-
conf_bottom_diff + j * num_classes_);
-
++count;
-
}
-
}
-
// Copy negative bboxes scores’ diff.
-
for (
int n =
0; n < all_neg_indices_[i].size(); ++n) {
-
int j = all_neg_indices_[i][n];
-
CHECK_LT(j, num_priors_);
-
caffe_copy<Dtype>(num_classes_,
-
conf_pred_diff + count * num_classes_,
-
conf_bottom_diff + j * num_classes_);
-
++count;
-
}
-
conf_bottom_diff += bottom[
1]->offset(
1);
-
}
-
}
else {
-
// The diff is already computed and stored.
-
bottom[
1]->ShareDiff(conf_pred_);
-
}
-
}
-
}
-
-
// After backward, remove match statistics.
-
all_match_indices_.clear();
-
all_neg_indices_.clear();
-
}
-
-
INSTANTIATE_CLASS(MultiBoxLossLayer);
-
REGISTER_LAYER_CLASS(MultiBoxLoss);
-
-
}
// namespace caffe
</div>