DispNet中Caffe自定义层解读(二)——DataAugmentation
这一系列博文记录了博主在学习DispNet过程中遇到的自定义Caffe层的笔记。这一部分是DataAugmentation层,其主要功能是:根据要求对输入的数据进行扩张,从而从数据的角度上尽量缓解过拟合的问题。更新于2018.10.25。
文章目录
详细功能介绍
用法
具体来说,DataAugmentation层的用法有两种:
第一种,层内设置了参数,此时运算的时候会读取层内设置的参数,这种情况下输入只有一个(待扩张的数据),输出可以有两个(第一个输出为数据扩张后的结果,第二个输出为该层的参数);
第二种,层内没有设置参数,扩张的参数通过输入提供,此时输入有两个(第一个为待扩张的数据,第二个为参数),输出只有一个(扩张后的数据)。
功能
DataAugmentation支持以下几种数据扩张方式:
- 空间变换(spatial transform):可设定的参数有是否镜像、是否旋转、是否缩放、是否translate(包括直接指定或只指定x或y一个方向);
- 颜色变换(chromatic transform):可设定的参数有亮度、gamma、对比度、颜色;
- 效果变换(effect transform):可设定的参数有雾面尺寸(fog size)、雾面程度(fog amount)、运动模糊角度、运动模糊尺寸、阴影角度、阴影距离、阴影强度、噪声;
- 颜色特征变换(chromatic eigen transform)。
调用方式
调用方式1:层中定义扩张参数
layer {
name: "DataAugmentation"
type: "DataAugmentation"
bottom: "input_blob"
top: "output_blob1"
top: "output_blob2"
propagate_down: false
augmentation_param {
max_multiplier: 1
augment_during_test: false
recompute_mean: 1000
mean_per_pixel: false
translate {
rand_type: "uniform_bernoulli"
exp: false
mean: 0
spread: 0.4
prob: 1.0
}
zoom {
rand_type: "uniform_bernoulli"
exp: true
mean: 0.2
spread: 0.4
prob: 1.0
}
squeeze {
rand_type: "uniform_bernoulli"
exp: true
mean: 0
spread: 0.3
prob: 1.0
}
lmult_pow {
rand_type: "uniform_bernoulli"
exp: true
mean: -0.2
spread: 0.4
prob: 1.0
}
lmult_mult {
rand_type: "uniform_bernoulli"
exp: true
mean: 0.0
spread: 0.4
prob: 1.0
}
lmult_add {
rand_type: "uniform_bernoulli"
exp: false
mean: 0
spread: 0.03
prob: 1.0
}
sat_pow {
rand_type: "uniform_bernoulli"
exp: true
mean: 0
spread: 0.4
prob: 1.0
}
sat_mult {
rand_type: "uniform_bernoulli"
exp: true
mean: -0.3
spread: 0.5
prob: 1.0
}
sat_add {
rand_type: "uniform_bernoulli"
exp: false
mean: 0
spread: 0.03
prob: 1.0
}
col_pow {
rand_type: "gaussian_bernoulli"
exp: true
mean: 0
spread: 0.4
prob: 1.0
}
col_mult {
rand_type: "gaussian_bernoulli"
exp: true
mean: 0
spread: 0.2
prob: 1.0
}
col_add {
rand_type: "gaussian_bernoulli"
exp: false
mean: 0
spread: 0.02
prob: 1.0
}
ladd_pow {
rand_type: "gaussian_bernoulli"
exp: true
mean: 0
spread: 0.4
prob: 1.0
}
ladd_mult {
rand_type: "gaussian_bernoulli"
exp: true
mean: 0.0
spread: 0.4
prob: 1.0
}
ladd_add {
rand_type: "gaussian_bernoulli"
exp: false
mean: 0
spread: 0.04
prob: 1.0
}
col_rotate {
rand_type: "uniform_bernoulli"
exp: false
mean: 0
spread: 1
prob: 1.0
}
crop_width: 960
crop_height: 256
chromatic_eigvec: 0.51
chromatic_eigvec: 0.56
chromatic_eigvec: 0.65
chromatic_eigvec: 0.79
chromatic_eigvec: 0.01
chromatic_eigvec: -0.62
chromatic_eigvec: 0.35
chromatic_eigvec: -0.83
chromatic_eigvec: 0.44
noise {
rand_type: "uniform_bernoulli"
exp: false
mean: 0.03
spread: 0.03
prob: 1.0
}
}
}
调用方式2:通过其他数据扩张层提供参数
layer {
name: "DataAugmentation"
type: "DataAugmentation"
bottom: "input_blob"
bottom: "input_augmented_blob"
top: "output_blob"
propagate_down: false
propagate_down: false
augmentation_param {
max_multiplier: 1
augment_during_test: false
recompute_mean: 1000
mean_per_pixel: false
crop_width: 960
crop_height: 256
chromatic_eigvec: 0.51
chromatic_eigvec: 0.56
chromatic_eigvec: 0.65
chromatic_eigvec: 0.79
chromatic_eigvec: 0.01
chromatic_eigvec: -0.62
chromatic_eigvec: 0.35
chromatic_eigvec: -0.83
chromatic_eigvec: 0.44
}
}
data_augmentation_layer.hpp
定义了层所需要的变量和函数,其中由于处理的是原始图像数据,因此不支持反向计算:
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { for(int i=0; i<propagate_down.size(); i++) if(propagate_down[i]) LOG(FATAL) << "DataAugmentationLayer cannot do backward."; return; }
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { for(int i=0; i<propagate_down.size(); i++) if(propagate_down[i]) LOG(FATAL) << "DataAugmentationLayer cannot do backward."; return; }
data_augmentation_layer.cpp
数据分割的计算和处理只在gpu上运行,因此cpp文件仅定义数据及参数的读取和必要的尺寸设置,具体的分割方式在cu文件中定义。
LayerSetUp
用于从prototxt文件中读取层的设置参数。
template <typename Dtype>
void DataAugmentationLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top)
{
// TODO This won't work when applying a net to images of size different from what the net was trained on
aug_ = this->layer_param_.augmentation_param(); //读取数据扩张层的参数到aug_中
this->layer_param_.set_reshape_every_iter(false);
LOG(WARNING) << "DataAugmentationLayer only runs Reshape on setup";
if (this->blobs_.size() > 0)
LOG(INFO) << "Skipping data mean blob initialization";
else {
if (aug_.recompute_mean()) {
LOG(INFO) << "Recompute mean";
this->blobs_.resize(3);
this->blobs_[1].reset(new Blob<Dtype>());
this->layer_param_.add_param();
this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_lr_mult(0.);
this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_decay_mult(0.);
this->blobs_[2].reset(new Blob<Dtype>());
this->layer_param_.add_param();
this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_lr_mult(0.);
this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_decay_mult(0.);
}
else {
LOG(INFO) << "Do not recompute mean";
this->blobs_.resize(1);
}
this->blobs_[0].reset(new Blob<Dtype>(1, 1, 1, 1));
// Never backpropagate
this->param_propagate_down_.resize(this->blobs_.size(), false);
this->layer_param_.add_param();
this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_lr_mult(0.);
this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_decay_mult(0.);
// LOG(INFO) << "DEBUG: this->layer_param_.param_size()=" << this->layer_param_.param_size();
// LOG(INFO) << "DEBUG: Writing layer_param";
WriteProtoToTextFile(this->layer_param_, "/misc/lmbraid17/sceneflownet/dosovits/matlab/test/message.prototxt");
// LOG(INFO) << "DEBUG: Finished writing layer_param";
}
}
Reshape
用于设定输出的尺寸等必要信息。
template <typename Dtype>
void DataAugmentationLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top)
{
//检查输入和输出的blob个数是否满足函数要求
LOG(WARNING) << "Reshape of Augmentation layer should only be called once? Check this";
CHECK_GE(bottom.size(), 1) << "Data augmentation layer takes one or two input blobs.";
CHECK_LE(bottom.size(), 2) << "Data augmentation layer takes one or two input blobs.";
CHECK_GE(top.size(), 1) << "Data augmentation layer outputs one or two output blobs.";
CHECK_LE(top.size(), 2) << "Data augmentation layer outputs one or two output blobs.";
//从输入中读取尺寸参数
const int num = bottom[0]->num();
const int channels = bottom[0]->channels();
const int height = bottom[0]->height();
const int width = bottom[0]->width();
output_params_=(top.size()>1);
input_params_=(bottom.size()>1); //用于判断是层内定义扩张参数还是从另一个数据扩张层获得
aug_ = this->layer_param_.augmentation_param();
discount_coeff_schedule_ = this->layer_param_.coeff_schedule_param();
//判断层中是否给出了要截取的宽和高:如果给出,进行裁减(要求裁减后的尺寸小于原尺寸);如果没有,保持原尺寸。
do_cropping_ = (aug_.has_crop_width() && aug_.has_crop_height());
if (!do_cropping_)
{
cropped_width_ = width;
cropped_height_ = height;
LOG(WARNING) << "Please enter crop size if you want to perform augmentation";
}
else
{
cropped_width_ = aug_.crop_width(); CHECK_GE(width, cropped_width_) << "crop width greater than original";
cropped_height_ = aug_.crop_height(); CHECK_GE(height, cropped_height_) << "crop height greater than original";
}
//给定输出的尺寸
top[0]->Reshape(num, channels, cropped_height_, cropped_width_);
//确定需要分割的参数
AugmentationCoeff coeff;
num_params_ = coeff.GetDescriptor()->field_count();
//如果这一层的参数是由另一个数据扩张层提供的,则读取这个(第二个)输入blob
if (input_params_) { //如果有两个输入,则根据第二个输入确定参数
LOG(INFO) << "Receiving " << num_params_ << " augmentation params";
all_coeffs_.ReshapeLike(*bottom[1]); //ReshapeLike:与后面的blob维度相同
} else //否则,新建参数
all_coeffs_.Reshape(num, num_params_, 1, 1); //create
//如果要求有超过两个输出,第二个输出根据前面的要求确定尺寸
if (output_params_) {
top[1]->ReshapeLike(all_coeffs_);
LOG(INFO) << "Emitting " << num_params_ << " augmentation params";
}
//一个batch需要用的参数变换矩阵缓存
coeff_matrices_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tTransMat)));
coeff_chromatic_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tChromaticCoeffs)));
coeff_chromatic_eigen_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tChromaticEigenCoeffs)));
coeff_effect_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tEffectCoeffs)));
chromatic_eigenspace_.reset(new SyncedMemory(sizeof(typename AugmentationLayerBase<Dtype>::tChromaticEigenSpace)));
//计算数据均值
if (aug_.recompute_mean()) { //如果需要重新计算真值
ones_.Reshape(1, 1, cropped_height_, cropped_width_);
caffe_set(ones_.count(), Dtype(1), ones_.mutable_cpu_data());
this->blobs_[1]->Reshape(1, channels, cropped_height_, cropped_width_);
this->blobs_[2]->Reshape(1, channels, 1, 1);
}
else if(aug_.mean().size()==3 && !aug_.mean_per_pixel())
{
ones_.Reshape(1, 1, cropped_height_, cropped_width_);
caffe_set(ones_.count(), Dtype(1), ones_.mutable_cpu_data());
LOG(INFO) << "Using predefined per-pixel mean from proto";
pixel_rgb_mean_from_proto_.Reshape(1,3,1,1);
for(int i=0; i<3; i++)
pixel_rgb_mean_from_proto_.mutable_cpu_data()[i]=aug_.mean().Get(i);
}
noise_.reset(new SyncedMemory(top[0]->count() / top[0]->num() * sizeof(Dtype)));
*(this->blobs_[0]->mutable_cpu_data()) = 0;
// LOG(INFO) << "DEBUG: Reshape done";
}
data_augmentation_layer.cu
用于定义在gpu上的运算。
Forward_gpu
首先做扩张前的检查工作:
- 确定输入和输出的尺寸;
- 检查输入与输出的num是否相等;
- 检查是否有NAN和特别大的值。
随后进行扩张(程序设定只有在设定了截取的条件下扩张数据):
- 如果没有输入的扩张参数,就根据要求(层参数)生成一组,并将所做的变换记录在log中;
- 依照参数求得变换矩阵;
- 根据需要,依变换矩阵扩张数据。
如果没有设置截取条件,则直接将输入复制给输出。
对扩张后的数据进行归一化处理(减去均值)。
更多内容,欢迎加入星球讨论。