Learning Spatiotemporal Features with 3D Convolutional Networks-2015-ICCV
1. create_volume_mean.sh
GLOG_logtostderr=1 ../../build/tools/compute_volume_mean_from_list.bin ../c3d_finetuning/train_01.lst 16 128 171 1 ucf101_train_mean.binaryproto 10
2. train_ucf101.sh
GLOG_logtostderr=1 ../../build/tools/train_net.bin conv3d_ucf101_solver.prototxt
3. test_ucf101.sh
GLOG_logtostderr=1 ../../build/tools/test_net.bin conv3d_ucf101_test.prototxt conv3d_ucf101_iter_60000 1396 GPU 0
a. conv3d_ucf101_solver.prototxt
train_net: "conv3d_ucf101_train.prototxt"
test_net: "conv3d_ucf101_test.prototxt"
test_iter: 100
test_interval: 1000
base_lr: 0.003
momentum: 0.9
weight_decay: 0.005
lr_policy: "step"
gamma: 0.1
stepsize: 20000
# Display every 20 iterations
display: 20
# The maximum number of iterations
max_iter: 60000
# snapshot intermediate results
snapshot: 1000
snapshot_prefix: "conv3d_ucf101"
# solver mode: CPU or GPU
solver_mode: GPU
device_id: 0
b. conv3d_ucf101_train.prototxt
name: "deep_c3d_ucf101"
layers {
name: "data"
type: VIDEO_DATA
top: "data"
top: "label"
image_data_param {
source: "../c3d_finetuning/train_01.lst"
use_image: true
mean_file: "ucf101_train_mean.binaryproto"
batch_size: 30
crop_size: 112
mirror: true
show_data: 0
new_height: 128
new_width: 171
new_length: 16
shuffle: true
}
}
# ----------- 1st layer group ---------------
layers {
name: "conv1a"
type: CONVOLUTION3D
bottom: "data"
top: "conv1a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 64
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu1a"
type: RELU
bottom: "conv1a"
top: "conv1a"
}
layers {
name: "pool1"
type: POOLING3D
bottom: "conv1a"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 1
stride: 2
temporal_stride: 1
}
}
# ------------- 2nd layer group --------------
layers {
name: "conv2a"
type: CONVOLUTION3D
bottom: "pool1"
top: "conv2a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 128
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu2a"
type: RELU
bottom: "conv2a"
top: "conv2a"
}
layers {
name: "pool2"
type: POOLING3D
bottom: "conv2a"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
# ----------------- 3rd layer group --------------
layers {
name: "conv3a"
type: CONVOLUTION3D
bottom: "pool2"
top: "conv3a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu3a"
type: RELU
bottom: "conv3a"
top: "conv3a"
}
layers {
name: "pool3"
type: POOLING3D
bottom: "conv3a"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
# --------- 4th layer group
layers {
name: "conv4a"
type: CONVOLUTION3D
bottom: "pool3"
top: "conv4a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu4a"
type: RELU
bottom: "conv4a"
top: "conv4a"
}
layers {
name: "pool4"
type: POOLING3D
bottom: "conv4a"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
# --------------- 5th layer group --------
layers {
name: "conv5a"
type: CONVOLUTION3D
bottom: "pool4"
top: "conv5a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu5a"
type: RELU
bottom: "conv5a"
top: "conv5a"
}
layers {
name: "pool5"
type: POOLING3D
bottom: "conv5a"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
# ---------------- fc layers -------------
layers {
name: "fc6"
type: INNER_PRODUCT
bottom: "pool5"
top: "fc6"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 2048
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu6"
type: RELU
bottom: "fc6"
top: "fc6"
}
layers {
name: "drop6"
type: DROPOUT
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc7"
type: INNER_PRODUCT
bottom: "fc6"
top: "fc7"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 2048
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu7"
type: RELU
bottom: "fc7"
top: "fc7"
}
layers {
name: "drop7"
type: DROPOUT
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc8"
type: INNER_PRODUCT
bottom: "fc7"
top: "fc8"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 101
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "loss"
type: SOFTMAX_LOSS
bottom: "fc8"
bottom: "label"
}
c. conv3d_ucf101_test.prototxt
name: "deep_c3d_ucf101"
layers {
name: "data"
type: VIDEO_DATA
top: "data"
top: "label"
image_data_param {
source: "../c3d_finetuning/test_01.lst"
use_image: true
mean_file: "ucf101_train_mean.binaryproto"
batch_size: 30
crop_size: 112
mirror: false
show_data: 0
new_height: 128
new_width: 171
new_length: 16
shuffle: true
}
}
# ----------- 1st layer group ---------------
layers {
name: "conv1a"
type: CONVOLUTION3D
bottom: "data"
top: "conv1a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 64
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu1a"
type: RELU
bottom: "conv1a"
top: "conv1a"
}
layers {
name: "pool1"
type: POOLING3D
bottom: "conv1a"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 1
stride: 2
temporal_stride: 1
}
}
# ------------- 2nd layer group --------------
layers {
name: "conv2a"
type: CONVOLUTION3D
bottom: "pool1"
top: "conv2a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 128
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu2a"
type: RELU
bottom: "conv2a"
top: "conv2a"
}
layers {
name: "pool2"
type: POOLING3D
bottom: "conv2a"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
# ----------------- 3rd layer group --------------
layers {
name: "conv3a"
type: CONVOLUTION3D
bottom: "pool2"
top: "conv3a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu3a"
type: RELU
bottom: "conv3a"
top: "conv3a"
}
layers {
name: "pool3"
type: POOLING3D
bottom: "conv3a"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
# --------- 4th layer group
layers {
name: "conv4a"
type: CONVOLUTION3D
bottom: "pool3"
top: "conv4a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu4a"
type: RELU
bottom: "conv4a"
top: "conv4a"
}
layers {
name: "pool4"
type: POOLING3D
bottom: "conv4a"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
# --------------- 5th layer group --------
layers {
name: "conv5a"
type: CONVOLUTION3D
bottom: "pool4"
top: "conv5a"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
kernel_size: 3
kernel_depth: 3
pad: 1
temporal_pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu5a"
type: RELU
bottom: "conv5a"
top: "conv5a"
}
layers {
name: "pool5"
type: POOLING3D
bottom: "conv5a"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
kernel_depth: 2
stride: 2
temporal_stride: 2
}
}
# ---------------- fc layers -------------
layers {
name: "fc6"
type: INNER_PRODUCT
bottom: "pool5"
top: "fc6"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 2048
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu6"
type: RELU
bottom: "fc6"
top: "fc6"
}
layers {
name: "drop6"
type: DROPOUT
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc7"
type: INNER_PRODUCT
bottom: "fc6"
top: "fc7"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 2048
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu7"
type: RELU
bottom: "fc7"
top: "fc7"
}
layers {
name: "drop7"
type: DROPOUT
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc8"
type: INNER_PRODUCT
bottom: "fc7"
top: "fc8"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 101
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "prob"
type: SOFTMAX
bottom: "fc8"
top: "prob"
}
layers {
top: "accuracy"
name: "accuracy"
type: ACCURACY
bottom: "prob"
bottom: "label"
}