1. GetFiller()
//通过调用GetFiller 函数,进行相应的继承类对象的创建。
template <typename Dtype>
Filler<Dtype>* GetFiller(const FillerParameter& param) {
const std::string& type = param.type();
if (type == "constant") {
return new ConstantFiller<Dtype>(param);
} else if (type == "gaussian") {
return new GaussianFiller<Dtype>(param);
} else if (type == "positive_unitball") {
return new PositiveUnitballFiller<Dtype>(param);
} else if (type == "uniform") {
return new UniformFiller<Dtype>(param);
} else if (type == "xavier") {
return new XavierFiller<Dtype>(param);
} else if (type == "msra") {
return new MSRAFiller<Dtype>(param);
} else if (type == "bilinear") {
return new BilinearFiller<Dtype>(param);
} else {
CHECK(false) << "Unknown filler name: " << param.type();
}
return (Filler<Dtype>*)(NULL);
}
2. Filler::Fill()
template <typename Dtype>
class Filler {
public:
explicit Filler(const FillerParameter& param) : filler_param_(param) {}
virtual ~Filler() {}
virtual void Fill(Blob<Dtype>* blob) = 0; //可以看出, Fill 函数将对一个blob 的数值进行初始化赋值
protected:
FillerParameter filler_param_;
}; // class Filler
3. 几个常见的参数初始化方式
// 常数初始化
/// @brief Fills a Blob with constant values @f$ x = 0 @f$.
template <typename Dtype>
class ConstantFiller : public Filler<Dtype> {
public:
explicit ConstantFiller(const FillerParameter& param)
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
Dtype* data = blob->mutable_cpu_data();
const int count = blob->count();
const Dtype value = this->filler_param_.value();
CHECK(count);
for (int i = 0; i < count; ++i) {
data[i] = value; //将所有的参数初始化为常数value
}
CHECK_EQ(this->filler_param_.sparse(), -1)
<< "Sparsity not supported by this Filler.";
}
};
/// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
template <typename Dtype>
class UniformFiller : public Filler<Dtype> {
public:
explicit UniformFiller(const FillerParameter& param)
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
CHECK(blob->count());
caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),//产生均匀分布的随机数初始化
Dtype(this->filler_param_.max()), blob->mutable_cpu_data()); //默认的均匀分布为[0, 1]
CHECK_EQ(this->filler_param_.sparse(), -1)
<< "Sparsity not supported by this Filler.";
}
};
/// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
template <typename Dtype>
class GaussianFiller : public Filler<Dtype> {
public:
explicit GaussianFiller(const FillerParameter& param)
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
Dtype* data = blob->mutable_cpu_data();
CHECK(blob->count());
caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()), //产生高斯分布的随机数初始化
Dtype(this->filler_param_.std()), blob->mutable_cpu_data()); //默认的高斯分布为 N~(0, 1)
int sparse = this->filler_param_.sparse();
CHECK_GE(sparse, -1);
if (sparse >= 0) { //sparse 默认为 -1
// Sparse initialization is implemented for "weight" blobs; i.e. matrices.
// These have num == channels == 1; width is number of inputs; height is
// number of outputs. The 'sparse' variable specifies the mean number
// of non-zero input weights for a given output.
CHECK_GE(blob->num_axes(), 1);
const int num_outputs = blob->shape(0);
Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
for (int i = 0; i < blob->count(); ++i) {
data[i] *= mask[i];
}
}
}
protected:
shared_ptr<SyncedMemory> rand_vec_;
};
/**
* @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
* set inversely proportional to number of incoming nodes, outgoing
* nodes, or their average.
*
* A Filler based on the paper [Bengio and Glorot 2010]: Understanding
* the difficulty of training deep feedforward neuralnetworks.
*
* It fills the incoming matrix by randomly sampling uniform data from [-scale,
* scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
* average, depending on the variance_norm option. You should make sure the
* input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
* = fan_out. Note that this is currently not the case for inner product layers.
*
* TODO(dox): make notation in above comment consistent with rest & use LaTeX.
*/
template <typename Dtype>
class XavierFiller : public Filler<Dtype> {
public:
explicit XavierFiller(const FillerParameter& param)
: Filler<Dtype>(param) {}
virtual void Fill(Blob<Dtype>* blob) {
CHECK(blob->count());
int fan_in = blob->count() / blob->num(); //即 fan_in = channel * height * width
int fan_out = blob->count() / blob->channels();
Dtype n = fan_in; // default to fan_in 即variance_norm默认值为FAN_IN
if (this->filler_param_.variance_norm() ==
FillerParameter_VarianceNorm_AVERAGE) {
n = (fan_in + fan_out) / Dtype(2);
} else if (this->filler_param_.variance_norm() ==
FillerParameter_VarianceNorm_FAN_OUT) {
n = fan_out;
}
Dtype scale = sqrt(Dtype(3) / n); //scale = sqrt(3/n), n默认为 blob 的 num
caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
blob->mutable_cpu_data()); //产生均匀分布[-scale, scale] 的随机数初始化
CHECK_EQ(this->filler_param_.sparse(), -1)
<< "Sparsity not supported by this Filler.";
}
};