MTCNN训练不收敛原因:
地址: https://github.com/dlunion/mtcnn
我们的训练数据标签格式:
wider face:
pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2) -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
celebA:
landmark/001.jpg -1 -1 -1 -1 -1 pst1_x pst1_y pst2_x pst2_y pst3_x pst3_y pst4_x pst4_y pst5_x pst5_y
作者要求的
训练数据标签格式:
pos/001.jpg 1 x1 y1 x2 y2 (x1 y1 x2 y2)
pst1_x pst1_y
pst2_x pst2_y
pst3_x pst3_y
pst4_x pst4_y
pst5_x pst5_y
part/001.jpg -1 x1 y1 x2 y2 (x1 y1 x2 y2)
pst1_x pst1_y
pst2_x pst2_y
pst3_x pst3_y
pst4_x pst4_y
pst5_x pst5_y
neg/001.jpg 0 -1 -1 -1 -1 (x1 y1 x2 y2)
pst1_x pst1_y
pst2_x pst2_y
pst3_x pst3_y
pst4_x pst4_y
pst5_x pst5_y
在
“
pts_loss
”层(
type: "MTCNNEuclideanLoss"
)中,以
"label"(分类的标签)来判断是否ignore。对于我们的训练数据标签格式:
class: ignore_label=-1, 可以正常分类;
bbox regression:
ignore_label=0, 有landmark中-1参加计算,导致loss无法收敛;
landmark:
ignore_label=0, 有part中-1参加计算,导致loss无法收敛;
解决思路:
在做
class,
bbox regression,
landmark任务时,判断便签值是否全部为—1,来作为ignore条件。
修改后"MTCNNEuclideanLoss.cpp"如下:
- #include <vector>
- #include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
- #include "caffe/util/math_functions.hpp"
- #include <iostream>
- using namespace std;
- namespace caffe {
- template <typename Dtype>
- void MTCNNEuclideanLossLayer<Dtype>::Reshape(
- const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
- LossLayer<Dtype>::Reshape(bottom, top);
- CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
- << "Inputs must have the same dimension.";
- int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
- if (has_ignore_label)
- CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";
- if (!has_ignore_label)
- CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";
- diff_.ReshapeLike(*bottom[0]);
- }
- template <typename Dtype>
- void MTCNNEuclideanLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- int count = bottom[0]->count();
- int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
- int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
- if (has_ignore_label){
- const Dtype* label = bottom[2]->cpu_data();
- int countLabel = bottom[2]->num();
- //label
- Dtype* diff = diff_.mutable_cpu_data();
- int channel = bottom[0]->channels();
- //cout << "countLabel_forward: " << countLabel << endl;
- //cout << "channel_forward: " << channel << endl;
- //cout << "ignore_label_forward: " << ignore_label << endl;
- memset(diff, 0, sizeof(Dtype)*count);
- const Dtype* b0 = bottom[0]->cpu_data();
- const Dtype* b1 = bottom[1]->cpu_data();
- Dtype loss = 0;
- // bbox regression
- if (channel == 4)
- {
- for (int i = 0; i < countLabel; ++i)
- {
- //cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;
- int dec = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label);
- if ( dec==1 )
- {
- caffe_sub(
- channel,
- b0 + i * channel,
- b1 + i * channel,
- diff + i * channel);
- Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
- loss += dot / Dtype(2);
- //cout << "forware_b1_4: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << endl;
- }
- }
- }
- // landmark
- else if (channel == 10)
- {
- for (int i = 0; i < countLabel; ++i)
- {
- //cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
- //cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;
- int dec1 = (b1[i*channel + 0] != ignore_label) && (b1[i*channel + 1] != ignore_label) && (b1[i*channel + 2] != ignore_label) && (b1[i*channel + 3] != ignore_label) && (b1[i*channel + 4] != ignore_label);
- int dec2 = (b1[i*channel + 5] != ignore_label) && (b1[i*channel + 6] != ignore_label) && (b1[i*channel + 7] != ignore_label) && (b1[i*channel + 8] != ignore_label) && (b1[i*channel + 9] != ignore_label);
- if (dec1==1 && dec2==1)
- {
- caffe_sub(
- channel,
- b0 + i * channel,
- b1 + i * channel,
- diff + i * channel);
- Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
- loss += dot / Dtype(2);
- //cout << "forward_b1_10: " << b1[i*channel + 0] << " " << b1[i*channel + 1] << " " << b1[i*channel + 2] << " " << b1[i*channel + 3] << " " << b1[i*channel + 4] << " ";
- //cout << b1[i*channel + 5] << " " << b1[i*channel + 6] << " " << b1[i*channel + 7] << " " << b1[i*channel + 8] << " " << b1[i*channel + 9] << endl;
- }
- }
- }
- // ****************org data ********************
- //for (int i = 0; i < countLabel; ++i){
- // if (label[i] != ignore_label){
- // caffe_sub(
- // channel,
- // b0 + i * channel,
- // b1 + i * channel,
- // diff + i * channel);
- // Dtype dot = caffe_cpu_dot(channel, diff + i * channel, diff + i * channel);
- // loss += dot / Dtype(2);
- // }
- //}
- // ***************** ********************
- top[0]->mutable_cpu_data()[0] = loss;
- }
- else{
- caffe_sub(
- count,
- bottom[0]->cpu_data(),
- bottom[1]->cpu_data(),
- diff_.mutable_cpu_data());
- Dtype dot = caffe_cpu_dot(count, diff_.cpu_data(), diff_.cpu_data());
- Dtype loss = dot / bottom[0]->num() / Dtype(2);
- top[0]->mutable_cpu_data()[0] = loss;
- }
- }
- template <typename Dtype>
- void MTCNNEuclideanLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
- int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
- if (has_ignore_label){
- const Dtype* b1 = bottom[1]->cpu_data();
- const Dtype* label = bottom[2]->cpu_data();
- int countLabel = bottom[2]->num();
- int channels = bottom[0]->channels();
- //cout << "countLabel_backword: " << countLabel << endl;
- //cout << "channels_backword: " << channels << endl;
- //cout << "ignore_label_backword: " << ignore_label << endl;
- for (int i = 0; i < 2; ++i) {
- if (propagate_down[i]) {
- memset(bottom[i]->mutable_cpu_diff(), 0, sizeof(Dtype)*bottom[i]->count());
- const Dtype sign = (i == 0) ? 1 : -1;
- const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
- // bbox regression
- if (channels == 4)
- {
- for (int j = 0; j < countLabel; ++j)
- {
- int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
- if (dec==1)
- {
- caffe_cpu_axpby(
- channels, // count
- alpha, // alpha
- diff_.cpu_data() + channels * j, // a
- Dtype(0), // beta
- bottom[i]->mutable_cpu_diff() + channels * j); // b
- }
- }
- }
- // landmark
- else if (channels == 10)
- {
- for (int j = 0; j < countLabel; ++j)
- {
- int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
- int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
- if (dec1 == 1 && dec2 == 1)
- {
- caffe_cpu_axpby(
- channels, // count
- alpha, // alpha
- diff_.cpu_data() + channels * j, // a
- Dtype(0), // beta
- bottom[i]->mutable_cpu_diff() + channels * j); // b
- }
- }
- }
- // ***********************org data********************
- //for (int j = 0; j < countLabel; ++j){
- // if (label[j] != ignore_label){
- // caffe_cpu_axpby(
- // channels, // count
- // alpha, // alpha
- // diff_.cpu_data() + channels * j, // a
- // Dtype(0), // beta
- // bottom[i]->mutable_cpu_diff() + channels * j); // b
- // }
- //}
- }
- }
- }
- else{
- for (int i = 0; i < 2; ++i) {
- if (propagate_down[i]) {
- const Dtype sign = (i == 0) ? 1 : -1;
- const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
- caffe_cpu_axpby(
- bottom[i]->count(), // count
- alpha, // alpha
- diff_.cpu_data(), // a
- Dtype(0), // beta
- bottom[i]->mutable_cpu_diff()); // b
- }
- }
- }
- }
- #ifdef CPU_ONLY
- STUB_GPU(MTCNNEuclideanLossLayer);
- #endif
- INSTANTIATE_CLASS(MTCNNEuclideanLossLayer);
- REGISTER_LAYER_CLASS(MTCNNEuclideanLoss);
- } // namespace caffe
相应的 "MTCNNEuclideanLoss.cu"如下:
- #include <vector>
- #include "caffe/layers/mtcnn_euclidean_loss_layer.hpp"
- #include "caffe/util/math_functions.hpp"
- #include <iostream>
- using namespace std;
- namespace caffe {
- template <typename Dtype>
- void MTCNNEuclideanLossLayer<Dtype>::Reshape(
- const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
- LossLayer<Dtype>::Reshape(bottom, top);
- CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
- << "Inputs must have the same dimension.";
- int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
- if (has_ignore_label)
- CHECK_EQ(bottom.size(), 3) << "has_ignore_label=true but not input label";
- if (!has_ignore_label)
- CHECK_EQ(bottom.size(), 2) << "has_ignore_label=false but input mismatch";
- diff_.ReshapeLike(*bottom[0]);
- }
- template <typename Dtype>
- void MTCNNEuclideanLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top) {
- int count = bottom[0]->count();
- int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
- int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
- if (has_ignore_label){
- //label
- const Dtype* label = bottom[2]->cpu_data();
- Dtype* diff = diff_.mutable_gpu_data();
- int countLabel = bottom[2]->num();
- int channel = bottom[0]->channels();
- //cout << "ignore_label_forward: " << ignore_label << endl; //
- caffe_gpu_memset(sizeof(Dtype)*count, 0, diff);
- const Dtype* b0 = bottom[0]->gpu_data();
- const Dtype* b1 = bottom[1]->gpu_data();
- const Dtype* b1_cpu = bottom[1]->cpu_data();
- Dtype loss = 0;
- //cout << "channel_forward " << channel << endl;
- // bbox regression
- if (channel == 4)
- {
- for (int i = 0; i < countLabel; ++i)
- {
- //cout << "forware_b1_4: " << b1_cpu[i*channel + 0] << " " << b1_cpu[i*channel + 1] << " " << b1_cpu[i*channel + 2] << " " << b1_cpu[i*channel + 3] << endl;
- int dec = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label);
- if (dec == 1)
- {
- caffe_gpu_sub(
- channel,
- b0 + i * channel,
- b1 + i * channel,
- diff + i * channel);
- Dtype dot;
- caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
- loss += dot / Dtype(2);
- }
- }
- }
- // landmark
- else if (channel == 10)
- {
- for (int i = 0; i < countLabel; ++i)
- {
- int dec1 = (b1_cpu[i*channel + 0] != ignore_label) && (b1_cpu[i*channel + 1] != ignore_label) && (b1_cpu[i*channel + 2] != ignore_label) && (b1_cpu[i*channel + 3] != ignore_label) && (b1_cpu[i*channel + 4] != ignore_label);
- int dec2 = (b1_cpu[i*channel + 5] != ignore_label) && (b1_cpu[i*channel + 6] != ignore_label) && (b1_cpu[i*channel + 7] != ignore_label) && (b1_cpu[i*channel + 8] != ignore_label) && (b1_cpu[i*channel + 9] != ignore_label);
- if (dec1 == 1 && dec2 == 1)
- {
- caffe_gpu_sub(
- channel,
- b0 + i * channel,
- b1 + i * channel,
- diff + i * channel);
- Dtype dot;
- caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
- loss += dot / Dtype(2);
- }
- }
- }
- // ***********************org data ********************
- //for (int i = 0; i < countLabel; ++i){
- // if (label[i] != ignore_label){
- // caffe_gpu_sub(
- // channel,
- // b0 + i * channel,
- // b1 + i * channel,
- // diff + i * channel);
- // Dtype dot;
- // caffe_gpu_dot(channel, diff + i * channel, diff + i * channel, &dot);
- // loss += dot / Dtype(2);
- // }
- //}
- // **************************** **********************
- top[0]->mutable_cpu_data()[0] = loss;
- }
- else{
- int count = bottom[0]->count();
- caffe_gpu_sub(
- count,
- bottom[0]->gpu_data(),
- bottom[1]->gpu_data(),
- diff_.mutable_gpu_data());
- Dtype dot;
- caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot);
- Dtype loss = dot / bottom[0]->num() / Dtype(2);
- top[0]->mutable_cpu_data()[0] = loss;
- }
- }
- template <typename Dtype>
- void MTCNNEuclideanLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
- int has_ignore_label = this->layer_param().loss_param().has_ignore_label();
- int ignore_label = has_ignore_label ? this->layer_param().loss_param().ignore_label() : -1;
- if (has_ignore_label){
- const Dtype* b1 = bottom[1]->cpu_data();
- const Dtype* label = bottom[2]->cpu_data();
- int countLabel = bottom[2]->num();
- int channels = bottom[0]->channels();
- for (int i = 0; i < 2; ++i) {
- if (propagate_down[i]) {
- caffe_gpu_memset(sizeof(Dtype)*bottom[i]->count(), 0, bottom[i]->mutable_gpu_diff());
- const Dtype sign = (i == 0) ? 1 : -1;
- // bbox regression
- if (channels == 4)
- {
- for (int j = 0; j < countLabel; ++j)
- {
- const Dtype alpha = sign * top[0]->cpu_diff()[0];
- int dec = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label);
- if (dec == 1)
- {
- caffe_gpu_axpby(
- channels, // count
- alpha, // alpha
- diff_.gpu_data() + channels * j, // a
- Dtype(0), // beta
- bottom[i]->mutable_gpu_diff() + channels * j); // b
- }
- }
- }
- // landmark
- else if (channels == 10)
- {
- for (int j = 0; j < countLabel; ++j)
- {
- const Dtype alpha = sign * top[0]->cpu_diff()[0];
- int dec1 = (b1[j*channels + 0] != ignore_label) && (b1[j*channels + 1] != ignore_label) && (b1[j*channels + 2] != ignore_label) && (b1[j*channels + 3] != ignore_label) && (b1[j*channels + 4] != ignore_label);
- int dec2 = (b1[j*channels + 5] != ignore_label) && (b1[j*channels + 6] != ignore_label) && (b1[j*channels + 7] != ignore_label) && (b1[j*channels + 8] != ignore_label) && (b1[j*channels + 9] != ignore_label);
- if (dec1 == 1 && dec2 == 1)
- {
- caffe_gpu_axpby(
- channels, // count
- alpha, // alpha
- diff_.gpu_data() + channels * j, // a
- Dtype(0), // beta
- bottom[i]->mutable_gpu_diff() + channels * j); // b
- }
- }
- }
- // ******************* org data**********************
- //for (int j = 0; j < countLabel; ++j){
- // const Dtype alpha = sign * top[0]->cpu_diff()[0];
- // if (label[j] != ignore_label){
- // caffe_gpu_axpby(
- // channels, // count
- // alpha, // alpha
- // diff_.gpu_data() + channels * j, // a
- // Dtype(0), // beta
- // bottom[i]->mutable_gpu_diff() + channels * j); // b
- // }
- //}
- }
- }
- }
- else{
- for (int i = 0; i < 2; ++i) {
- if (propagate_down[i]) {
- const Dtype sign = (i == 0) ? 1 : -1;
- const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
- caffe_gpu_axpby(
- bottom[i]->count(), // count
- alpha, // alpha
- diff_.gpu_data(), // a
- Dtype(0), // beta
- bottom[i]->mutable_gpu_diff()); // b
- }
- }
- }
- }
- INSTANTIATE_LAYER_GPU_FUNCS(MTCNNEuclideanLossLayer);
- } // namespace caffe
小计,完成mtcnn_educlidean_loss_layer.cu的修改,发现:const Dtype* b1 = bottom[1]->gpu_data();,gpu读取的数据是不能打印和取出来的,改成cpu模式:const Dtype* b1_cpu = bottom[1]->cpu_data()就行了;