1. 首先准备训练和测试数据 400张训练数据,100张测试数据。分类标签文件使用脚本
[root@iZwz99nu4bhzqppacs6hwbZ ~]# cat create_filelist.sh
#!/usr/bin/env sh
DATA=/root/re
MY=/root
echo "Create train.txt..."
rm -rf $MY/train.txt
for i in 3 4 5 6 7
do
find $DATA/train -name $i*.jpg | cut -d'/' -f4-5 | sed "s/$/ $i/">>$MY/train.txt
done
echo "Create test.txt..."
rm -rf $MY/test.txt
for i in 3 4 5 6 7
do
find $DATA/test -name $i*.jpg | cut -d'/' -f4-5 | sed "s/$/ $i/">>$MY/test.txt
done
echo "All done"
2. 转换图片为caffe可以处理的数据格式lmdb
[root@iZwz99nu4bhzqppacs6hwbZ ~]# ln -s /usr/bin/convert_imageset /usr/local/caffe-master/build/tools/convert_imageset
[root@iZwz99nu4bhzqppacs6hwbZ ~]# convert_imageset -shuffle=true -resize_height=256 -resize_width=256 /root/re/ train.txt img_train_lmdb
I0529 15:25:32.079100 27138 convert_imageset.cpp:86] Shuffling data
I0529 15:25:32.080145 27138 convert_imageset.cpp:89] A total of 400 images.
I0529 15:25:32.080363 27138 db_lmdb.cpp:35] Opened lmdb img_train_lmdb
I0529 15:25:34.468282 27138 convert_imageset.cpp:153] Processed 400 files
[root@iZwz99nu4bhzqppacs6hwbZ ~]# convert_imageset -shuffle=true -resize_height=256 -resize_width=256 /root/re/ test.txt img_test_lmdb
I0529 15:31:36.012348 27185 convert_imageset.cpp:86] Shuffling data
I0529 15:31:36.013384 27185 convert_imageset.cpp:89] A total of 100 images.
I0529 15:31:36.013618 27185 db_lmdb.cpp:35] Opened lmdb img_test_lmdb
I0529 15:31:36.599316 27185 convert_imageset.cpp:153] Processed 100 files.
3.计算均值
[root@iZwz99nu4bhzqppacs6hwbZ ~]# ln -s /usr/local/caffe-master/build/tools/compute_image_mean /usr/bin/compute_image_mean
[root@iZwz99nu4bhzqppacs6hwbZ ~]# compute_image_mean img_train_lmdb mean.binaryproto
I0529 15:35:20.221936 27203 db_lmdb.cpp:35] Opened lmdb img_train_lmdb
I0529 15:35:20.224038 27203 compute_image_mean.cpp:70] Starting iteration
I0529 15:35:20.507119 27203 compute_image_mean.cpp:101] Processed 400 files.
I0529 15:35:20.507441 27203 compute_image_mean.cpp:108] Write to mean.binaryproto
I0529 15:35:20.508962 27203 compute_image_mean.cpp:114] Number of channels: 3
I0529 15:35:20.509100 27203 compute_image_mean.cpp:119] mean_value channel [0]: 100.257
I0529 15:35:20.509266 27203 compute_image_mean.cpp:119] mean_value channel [1]: 114.453
I0529 15:35:20.509397 27203 compute_image_mean.cpp:119] mean_value channel [2]: 121.706
4. 创建模型
编辑train_val.prototxt文件,注意修改数据层的路径:
[root@iZwz99nu4bhzqppacs6hwbZ ~]# cat train_val.prototxt
name: "CaffeNet"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 227
mean_file: "/root/mean.binaryproto"
}
# mean pixel / channel-wise mean instead of mean image
# transform_param {
# crop_size: 227
# mean_value: 104
# mean_value: 117
# mean_value: 123
# mirror: true
# }
data_param {
source: "/root/img_train_lmdb"
batch_size: 32
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
crop_size: 227
mean_file: "/root/mean.binaryproto"
}
# mean pixel / channel-wise mean instead of mean image
# transform_param {
# crop_size: 227
# mean_value: 104
# mean_value: 117
# mean_value: 123
# mirror: false
# }
data_param {
source: "/root/img_test_lmdb"
batch_size: 50
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "norm1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm2"
type: "LRN"
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "norm2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1000
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8"
bottom: "label"
top: "loss"
}
编辑solver.prototxt,从里面可以观察到,我们将运行256批次,迭代4500000次(90期),每1000次迭代,我们测试学习网络验证数据,我们设置初始的学习率为0.01,每100000(20期)次迭代减少学习率,显示一次信息,训练的weight_decay为0.0005,每10000次迭代,我们显示一下当前状态。
以上是教程的,实际上,以上需要耗费很长时间,因此,我们稍微改一下 :
net: "/root/train_val.prototxt" //根据自己的路径做修改
test_iter: 2 //测试的时候,输入2个batch
test_interval: 50 //每迭代50次,测试一次
base_lr: 0.001 //基础学习率,因为数据量小,0.01就会下降太快了,因此改成0.001
lr_policy: "step"
gamma: 0.1 //0.1学习率变化的比率
stepsize: 100 //每迭代100次,调整一次学习率
display: 20 //每20次迭代,显示一次
max_iter: 1000 //最大迭代1000次
momentum: 0.9 //学习的参数,不用变
weight_decay: 0.0005 //学习的参数,不用变
snapshot: 5000 //每迭代5000次显示状态
snapshot_prefix: "examples/mytest/caffenet_train" //根据自己的路径做修改
solver_mode: GPU //表示用GPU运算
5.训练模型
[root@iZwz99nu4bhzqppacs6hwbZ ~]# cat train_mycaffenet.sh
#!/usr/bin/env sh
set -e
/usr/bin/caffe train \
--solver=/root/solver.prototxt $@
6.验证测试训练模型
caffe test -model train_val.prototxt -weights caffenet_train_iter_1000.caffemodel
[root@iZwz99nu4bhzqppacs6hwbZ ~]# /usr/local/caffe-master/build/examples/cpp_classification/classification.bin deploy.prototxt caffenet_train_iter_1000.caffemodel mean.binaryproto synset_words.txt /root/re/test/600.jpg
---------- Prediction for /root/re/test/600.jpg ----------
0.9997 - "鲜花"
0.0002 - "汽车巴士"
0.0001 - "大象"
0.0000 - "马"
0.0000 - "恐龙"
注意: 此处我刚开始的标签文件不是从0开始的,结果导致对应的synset_words.txt也要对应着来。截取部分
[root@iZwz99nu4bhzqppacs6hwbZ ~]# cat synset_words.txt | head -8
0
1
2
汽车巴士
恐龙
大象
鲜花
马
接下来的工作:
1. 将分类验证classification.bin 使用C源码改写打包训练好的模型,到时候只需要传图片参数就好啦。并且只需要输出概率最高的分类即可。
2. 整个框架部署工作花费了很多时间。具体一些参数还不知道是什么意思。但是知道怎么运作使用了。后续将各参数学习深入。并结合tensorflow一起学习。
3. 语音识别