#引用论文Temporal Segment Networks: Towards GoodPractices for Deep Action Recognition中代码
####################################### start #######################################
name: "BN-Inception"
layer { #训练数据输入参数
name: "data" #输入层的名称
type: "VideoData" #输入层的类型
top: "data" #层的输出有两个:data和lable
top: "label"
video_data_param {
source: "data/ucf101_rgb_train_split_1.txt" #输入的LMDB的路径
batch_size: 32 #一次读取32张图片
new_length: 1 #输入是RGB时,累积输入的一个样本的帧数
num_segments: 3 #一个视频分为三个片段
modality: RGB
shuffle: true #打乱图片顺序
name_pattern: "img_%05d.jpg" #图片名称格式为img_00000.jpg
}
transform_param{ #数据增强处理(不同的数据来源处理参数不同,具体参考caffe.proto)
crop_size: 224 ( 数据处理方式一般有归一化scale,减去均值,水平镜像flip,随即裁剪crop)
mirror: true
fix_crop: true
more_fix_crop: true
multi_scale: true
max_distort: 1
scale_ratios:[1,.875,.75, .66]
is_flow: false
mean_value: [104, 117, 123, 104, 117, 123, 104, 117, 123]
}
include: { phase: TRAIN }
}
layer { #测试数据输入参数
name: "data"
type: "VideoData"
top: "data"
top: "label"
video_data_param {
source: "data/ucf101_rgb_val_split_1.txt"
batch_size: 1
new_length: 1
num_segments: 3
modality: RGB
name_pattern: "img_%05d.jpg"
}
transform_param{ #数据增强处理
crop_size: 224
mirror: false
mean_value: [104, 117, 123, 104, 117, 123, 104, 117, 123]
}
include: { phase: TEST }
}
####################################### input #######################################
#数据输入input层 layer { name: "reshape_data" type: "Reshape" bottom: "data" #本层使用上一层的data生成下一层的输入reshape_data top: "reshape_data" reshape_param { shape { dim: -1 dim: 3 dim: 224 dim: 224 } } #dim依次表示num、channels、height、width的变化,0表示 保持不变,-1表示根据其他维度自动推测出该维度的大小
}
####################################### conv1 #######################################
#卷积层
layer { name: "conv1/7x7_s2" type: "Convolution" bottom: "reshape_data" top: "conv1/7x7_s2"
param { lr_mult: 1 decay_mult: 1 } #lr_mult权重参数w,b的学习率倍数,1表示与全局参数一致 param { lr_mult: 2 decay_mult: 0 } decay_mult权重参数w,b的衰减率倍数,0表示无衰减 convolution_param { num_output: 64 pad: 3 kernel_size: 7 stride: 2 #卷积核参数 weight_filler { type: "xavier" } #权值初始化使用xavier填充器 bias_filler { type: "constant" value: 0.2 } } } #偏置初始化使用常数填充器,默认为零
#BN层 layer { name: "conv1/7x7_s2_bn" type: "BN" bottom: "conv1/7x7_s2" top: "conv1/7x7_s2_bn" param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 } bn_param { frozen: false slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
#激活层 layer { name: "conv1/relu_7x7" type: "ReLU" bottom: "conv1/7x7_s2_bn" top: "conv1/7x7_s2_bn" }
#池化层
layer { name: "pool1/3x3_s2" type: "Pooling" bottom: "conv1/7x7_s2_bn" top: "pool1/3x3_s2"
pooling_param { pool: MAX kernel_size: 3 stride: 2 } }
####################################### inception_3a #######################################
layer { name: "inception_3a/1x1" type: "Convolution" bottom: "pool2/3x3_s2" top: "inception_3a/1x1"
param { lr_mult: 1 decay_mult: 1 }param { lr_mult: 2 decay_mult: 0 }
convolution_param { num_output: 64 kernel_size: 1
weight_filler { type: "xavier"}
bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_3a/1x1_bn" type: "BN" bottom: "inception_3a/1x1" top: "inception_3a/1x1_bn"
param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_3a/relu_1x1" type: "ReLU" bottom: "inception_3a/1x1_bn" top: "inception_3a/1x1_bn" }
layer { name: "inception_3a/3x3_reduce" type: "Convolution" bottom: "pool2/3x3_s2" top: "inception_3a/3x3_reduce"
param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
convolution_param { num_output: 64 kernel_size: 1
weight_filler { type: "xavier"}
bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_3a/3x3_reduce_bn" type: "BN" bottom: "inception_3a/3x3_reduce" top: "inception_3a/3x3_reduce_bn"
param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_3a/relu_3x3_reduce" type: "ReLU" bottom: "inception_3a/3x3_reduce_bn" top: "inception_3a/3x3_reduce_bn" }
layer { name: "inception_3a/3x3" type: "Convolution" bottom: "inception_3a/3x3_reduce_bn" top: "inception_3a/3x3"
param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
convolution_param { num_output: 64 pad: 1 kernel_size: 3
weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_3a/3x3_bn" type: "BN" bottom: "inception_3a/3x3" top: "inception_3a/3x3_bn"
param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_3a/relu_3x3" type: "ReLU" bottom: "inception_3a/3x3_bn" top: "inception_3a/3x3_bn" }
layer { name: "inception_3a/double_3x3_reduce" type: "Convolution" bottom: "pool2/3x3_s2" top: "inception_3a/double_3x3_reduce"
param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
convolution_param { num_output: 64 kernel_size: 1
weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_3a/double_3x3_reduce_bn" type: "BN" bottom: "inception_3a/double_3x3_reduce" top: "inception_3a/double_3x3_reduce_bn"
param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_3a/relu_double_3x3_reduce" type: "ReLU" bottom: "inception_3a/double_3x3_reduce_bn" top: "inception_3a/double_3x3_reduce_bn" }
layer { name: "inception_3a/double_3x3_1" type: "Convolution" bottom: "inception_3a/double_3x3_reduce_bn" top: "inception_3a/double_3x3_1"
param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
convolution_param { num_output: 96 pad: 1 kernel_size: 3
weight_filler { type: "xavier"}
bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_3a/double_3x3_1_bn" type: "BN" bottom: "inception_3a/double_3x3_1" top: "inception_3a/double_3x3_1_bn"
param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_3a/relu_double_3x3_1" type: "ReLU" bottom: "inception_3a/double_3x3_1_bn" top: "inception_3a/double_3x3_1_bn" }
layer { name: "inception_3a/double_3x3_2" type: "Convolution" bottom: "inception_3a/double_3x3_1_bn" top: "inception_3a/double_3x3_2"
param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
convolution_param { num_output: 96 pad: 1 kernel_size: 3
weight_filler { type: "xavier"}
bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_3a/double_3x3_2_bn" type: "BN" bottom: "inception_3a/double_3x3_2" top: "inception_3a/double_3x3_2_bn"
param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_3a/relu_double_3x3_2" type: "ReLU" bottom: "inception_3a/double_3x3_2_bn" top: "inception_3a/double_3x3_2_bn" }
layer { name: "inception_3a/pool" type: "Pooling" bottom: "pool2/3x3_s2" top: "inception_3a/pool"
pooling_param { pool: AVE kernel_size: 3 stride: 1 pad: 1 } }
layer { name: "inception_3a/pool_proj" type: "Convolution" bottom: "inception_3a/pool" top: "inception_3a/pool_proj"
param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
convolution_param { num_output: 32 kernel_size: 1
weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0.2 } } }
layer { name: "inception_3a/pool_proj_bn" type: "BN" bottom: "inception_3a/pool_proj" top: "inception_3a/pool_proj_bn"
param { lr_mult: 1 decay_mult: 0 } param { lr_mult: 1 decay_mult: 0 }
bn_param { frozen: true slope_filler { type: "constant" value: 1 } bias_filler { type: "constant" value: 0 } } }
layer { name: "inception_3a/relu_pool_proj" type: "ReLU" bottom: "inception_3a/pool_proj_bn" top: "inception_3a/pool_proj_bn" }
layer { name: "inception_3a/output" type: "Concat"
bottom: "inception_3a/1x1_bn"
bottom: "inception_3a/3x3_bn"
bottom: "inception_3a/double_3x3_2_bn"
bottom: "inception_3a/pool_proj_bn"
top: "inception_3a/output" }
####################################### global pool #######################################
layer { name: "global_pool" top: "global_pool" bottom: "inception_5b/output" type: "Pooling"
pooling_param { pool: AVE kernel_size: 7 stride: 1 } }
layer { name: "dropout" top: "global_pool" bottom: "global_pool" type: "Dropout"
dropout_param { dropout_ratio: 0.8 } }
####################################### loss accuracy #######################################
#全连接层 layer { name: "fc-action" type: "InnerProduct" bottom: "global_pool" top: "fc"
param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 }
inner_product_param { num_output: 101
weight_filler { type: "gaussian" std: 0.001 }
bias_filler { type: "constant" value: 0 } }} layer { name: "reshape_fc" type: "Reshape" bottom: "fc" top: "reshape_fc" reshape_param { shape { dim: [-1, 1, 3, 101] } } } layer { name: "segment_consensus" type: "Pooling" bottom: "reshape_fc" top: "pool_fusion" pooling_param { pool: AVE kernel_h: 3 kernel_w: 1 } }
#损失层
layer { name: "loss" type: "SoftmaxWithLoss" bottom: "pool_fusion" bottom: "label" top: "loss" softmax_param { axis: 3} }
layer { name: "accuracy_top1" type: "Accuracy" bottom: "pool_fusion" bottom: "label" top: "accuracy" accuracy_param {axis:3}
include { phase: TEST } }