其实fc层可以用conv层取代
以lenet为例,修改lenet.prototxt为lenet_conv.prototxt,第一个fc变成conv的kernel_size可以通过加载模型的时候计算到。下边的kernel_size为1
name: "LeNet"
layer {
name: "data"
type: "Input"
top: "data"
input_param { shape: { dim: 64 dim: 1 dim: 28 dim: 28 } }
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1_conv"
type: "Convolution"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 500
stride:1
kernel_size:4
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2_conv"
type: "Convolution"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 10
stride:1
kernel_size:1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "prob"
type: "Softmax"
bottom: "ip2"
top: "prob"
}
跑代码
#-*-coding:utf-8-*-
# --------------------------------------------------------
# convert_fc2fullconv
# Copyright (c) 2018 Tsinghua
# Licensed under company License
# Written by Dezan Zhao
# --------------------------------------------------------
import numpy as np
import math
caffe_root = '../../python'
import sys
sys.path.insert(0, caffe_root + 'python')
import caffe
params = ['ip1', 'ip2']
params_full_conv = ['ip1_conv', 'ip2_conv']
def convert_full_conv(model_define,model_weight,model_define_fc,model_weight_fc):
'''
@breif: convert fc to conv
@param: model_define, src_prototxt
@param: model_weight, src_model
@param: model_define_fc, dst_prototxt
@param: model_weight_fc, dst_model
'''
net = caffe.Net(model_define, model_weight, caffe.TEST)
fc_params = {pr: (net.params[pr][0].data, net.params[pr][1].data) for pr in params}
net_fc = caffe.Net(model_define_fc, model_weight, caffe.TEST)
conv_params = {pr: (net_fc.params[pr][0].data, net_fc.params[pr][1].data) for pr in params_full_conv}
for fc in params:
print '{} weights are {} dimensional and biases are {} dimensional'.format(fc, fc_params[fc][0].shape, fc_params[fc][1].shape)
for conv in params_full_conv:
print '{} weights are {} dimensional and biases are {} dimensional'.format(conv, conv_params[conv][0].shape, conv_params[conv][1].shape)
for pr, pr_conv in zip(params, params_full_conv):
conv_params[pr_conv][0].flat = fc_params[pr][0].flat # flat unrolls the arrays
conv_params[pr_conv][1][...] = fc_params[pr][1]
net_fc.save(model_weight_fc)
print 'convert done!'
return net_fc
if __name__ == '__main__':
file = 'lenet.prototxt'#原始的prototxt名称
conv_file = 'lenet_conv.prototxt' #复制并修改origin.prototxt中的全连接层的名字为params->params_full_conv得到
model ='lenet_iter_10000.caffemodel'#原始的caffemodel名称
conv_model = './lenet_conv.caffemodel'#最终得到的结果
convert_full_conv(file, model, conv_file, conv_model)
具体参考官方文档
http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/net_surgery.ipynb