总结一下最近的工作:LSTM layer 代码,caffe 加入新层 Attention LSTM layer
LSTM layer
关键代码如下,可以参考图1进行阅读,图一来自博客
namespace caffe {
template <typename Dtype>
void LSTMLayer<Dtype>::RecurrentInputBlobNames(vector<string>* names) const {
names->resize(2);
(*names)[0] = "h_0";
(*names)[1] = "c_0"; //定义h_0,c_0 的输入
}
template <typename Dtype>
void LSTMLayer<Dtype>::RecurrentOutputBlobNames(vector<string>* names) const {
names->resize(2);
(*names)[0] = "h_" + this->int_to_str(this->T_);
(*names)[1] = "c_T"; // 定义输出,不同时刻的h_t
}
template <typename Dtype>
void LSTMLayer<Dtype>::OutputBlobNames(vector<string>* names) const {
names->resize(1);
(*names)[0] = "h"; // 最终输出h
}
template <typename Dtype>
void LSTMLayer<Dtype>::FillUnrolledNet(NetParameter* net_param) const {
const int num_output = this->layer_param_.recurrent_param().num_output();
CHECK_GT(num_output, 0) << "num_output must be positive";
const FillerParameter& weight_filler =
this->layer_param_.recurrent_param().weight_filler();
const FillerParameter& bias_filler =
this->layer_param_.recurrent_param().bias_filler(); // 权重W和偏差Bias
// Add generic LayerParameter's (without bottoms/tops) of layer types we'll
// use to save redundant code.
LayerParameter hidden_param;
hidden_par