最近在看的双流论文《Two-Stream Convolutional Networks for Action Recognition in Videos 》是基于CNN-M-2048 的网络结构的,本想在网上找了下相关的train_val.prototxt文件。最后只在ksimonyan/VGG_CNN_M_2048_deploy.prototxt里找到了一个deploy.prototxt文件和已经在 ILSVRC-2012 dataset上训练好的VGG_CNN_M_2048.caffemodel。根据这个deploy.prototxt文件结合以前的CaffeNet_train_val.prototxt文件。自己将CNN_M_2048_train_val.prototxt文件给补全如下 :
name: "VGG_CNN_M_2048"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 224
mean_file: "C:/WENTEST/caffe-master/examples/WEN/data/1210_1206/mean.binaryproto"
}
data_param {
source: "C:/WENTEST/caffe-master/examples/WEN/data/1210_1206/train_lmdb"
batch_size: 64
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
crop_size: 224
mean_file: "C:/WENTEST/caffe-master/examples/WEN/data/1210_1206/mean.binaryproto"
}
data_param {
source: "C:/WENTEST/caffe-master/examples/WEN/data/1210_1206/test_lmdb"
batch_size: 50
backend: LMDB
}
}
layer {
bottom: "data"
top: "conv1"
name: "conv1"
type: "Convolution"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 7
stride: 2
}
}
layer {
bottom: "conv1"
top: "conv1"
name: "relu1"
type: "ReLU"
}
layer {
bottom: "conv1"
top: "norm1"
name: "norm1"
type: "LRN"
lrn_param {
local_size: 5
alpha: 0.0005
beta: 0.75
k: 2
}
}
layer {
bottom: "norm1"
top: "pool1"
name: "pool1"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
bottom: "pool1"
top: "conv2"
name: "conv2"
type: "Convolution"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 5
stride: 2
}
}
layer {
bottom: "conv2"
top: "conv2"
name: "relu2"
type: "ReLU"
}
layer {
bottom: "conv2"
top: "norm2"
name: "norm2"
type: "LRN"
lrn_param {
local_size: 5
alpha: 0.0005
beta: 0.75
k: 2
}
}
layer {
bottom: "norm2"
top: "pool2"
name: "pool2"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
bottom: "pool2"
top: "conv3"
name: "conv3"
type: "Convolution"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
bottom: "conv3"
top: "conv3"
name: "relu3"
type: "ReLU"
}
layer {
bottom: "conv3"
top: "conv4"
name: "conv4"
type: "Convolution"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
bottom: "conv4"
top: "conv4"
name: "relu4"
type: "ReLU"
}
layer {
bottom: "conv4"
top: "conv5"
name: "conv5"
type: "Convolution"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
bottom: "conv5"
top: "conv5"
name: "relu5"
type: "ReLU"
}
layer {
bottom: "conv5"
top: "pool5"
name: "pool5"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
bottom: "pool5"
top: "fc6"
name: "fc6"
type: "InnerProduct"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
inner_product_param {
num_output: 4096
}
}
layer {
bottom: "fc6"
top: "fc6"
name: "relu6"
type: "ReLU"
}
layer {
bottom: "fc6"
top: "fc6"
name: "drop6"
type: "Dropout"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
bottom: "fc6"
top: "fc7"
name: "fc7"
type: "InnerProduct"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
inner_product_param {
num_output: 2048
}
}
layer {
bottom: "fc7"
top: "fc7"
name: "relu7"
type: "ReLU"
}
layer {
bottom: "fc7"
top: "fc7"
name: "drop7"
type: "Dropout"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
bottom: "fc7"
top: "fc8_w"
name: "fc8_w"
type: "InnerProduct"
param {
lr_mult: 10
decay_mult: 1
}
param {
lr_mult: 20
decay_mult: 0
}
inner_product_param {
num_output: 51
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc8_w"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8_w"
bottom: "label"
top: "loss"
}
利用训练好的caffemodel在最后一层进行微调。固定前面层(学习率被我置为0),修改最后一层名称(fc8改为fc8_w)和输出类别数(1000改为51)。