1、mxnet model 参数输出打印的代码
import mxnet as mx
sym, arg_params, aux_params = mx.model.load_checkpoint('mobilenet_v2', 0)
for param in arg_params:
print(param)
print(arg_params['conv1_weight'])
arg_params是一个字典结构 {‘参数名’:参数}
symbol 是训练网络需要的网络结构
## 加载模型,加载参数代码
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
sym = all_layers['fc1_output'] #得到模型第一层到这一层的模型结构
data_shapes = [('data', (args.batch_size, 3, 112, 112))] #teacher model only need data, no label
t_module = mx.module.Module(symbol=sym_high, context=ctx, label_names=[])
t_module.bind(data_shapes=data_shapes, for_training=False, grad_req='null')
t_module.set_params(arg_params=t_arg_params, aux_params=t_aux_params)
t_model=t_module
## 加载 模型,得到模型参数,运行模型得到输出
class Embedding:
def __init__(self, prefix, epoch, ctx_id=0):
print('loading', prefix, epoch)
ctx = mx.gpu(ctx_id)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
sym = all_layers['fc1_output'] #得到模型某一层的输出
image_size = (112, 112)
self.image_size = image_size
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) #重构model
model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))]) #模型绑定
model.set_params(arg_params, aux_params) #更新模型参数
self.model = model
def get(self, rimg):
img = rimg#cv2.imread()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# img_flip = np.fliplr(img)
img = np.transpose(img, (2, 0, 1)) # 3*112*112, RGB
# img_flip = np.transpose(img_flip, (2, 0, 1))
input_blob = np.zeros((1, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8)
input_blob[0] = img
# input_blob[1] = img_flip
data = mx.nd.array(input_blob) #输入数据格式
db = mx.io.DataBatch(data=(data,))
self.model.forward(db, is_train=False)
feat = self.model.get_outputs()[0].asnumpy()
feat = feat.reshape([-1, feat.shape[0]])#* feat.shape[1]]) #512 shape
feat = feat.flatten()
return feat
我们看一下resnet18输出的sym, arg_params, aux_params是什么
两个都是权重,分开了
arg_params : dict of str to NDArray
Model parameter, dict of name to NDArray of net’s weights.
看打印结果是 卷积全连接的权重参数
aux_params : dict of str to NDArray
Model parameter, dict of name to NDArray of net’s auxiliary states.
看打印的结果,是bn 层的权重参数
sym, arg_params, aux_params = mx.model.load_checkpoint("resnet18",0)
# <Symbol fc1>
for k,v in arg_params.items():
... print (k,v.shape)
...
stage1_unit1_sc_weight (64, 64, 1, 1)
stage2_unit1_bn2_gamma (128,)
stage2_unit1_sc_weight (128, 64, 1, 1)
stage1_unit1_bn1_gamma (64,)
stage3_unit1_bn1_gamma (128,)
bn_data_beta (3,)
stage3_unit2_conv2_weight (256, 256, 3, 3)
stage1_unit2_bn2_beta (64,)
stage3_unit2_bn1_gamma (256,)
stage2_unit2_bn1_gamma (128,)
stage3_unit2_bn2_gamma (256,)
stage2_unit2_bn1_beta (128,)
stage4_unit1_conv1_weight (512, 256, 3, 3)
stage2_unit1_bn1_beta (64,)
bn_data_gamma (3,)
stage4_unit2_bn1_gamma (512,)
stage3_unit1_conv2_weight (256, 256, 3, 3)
stage2_unit1_conv1_weight (128, 64, 3, 3)
stage3_unit1_sc_weight (256, 128, 1, 1)
fc1_bias (1000,)
stage3_unit2_conv1_weight (256, 256, 3, 3)
stage1_unit1_bn2_beta (64,)
stage2_unit2_conv2_weight (128, 128, 3, 3)
stage3_unit2_bn1_beta (256,)
stage1_unit2_conv2_weight (64, 64, 3, 3)
stage4_unit2_conv2_weight (512, 512, 3, 3)
stage4_unit2_bn2_beta (512,)
stage2_unit1_bn1_gamma (64,)
stage3_unit1_conv1_weight (256, 128, 3, 3)
bn1_beta (512,)
stage4_unit1_bn1_gamma (256,)
stage1_unit2_bn2_gamma (64,)
stage1_unit1_conv1_weight (64, 64, 3, 3)
stage4_unit1_conv2_weight (512, 512, 3, 3)
stage3_unit1_bn1_beta (128,)
stage2_unit2_bn2_beta (128,)
stage3_unit2_bn2_beta (256,)
stage1_unit1_bn2_gamma (64,)
stage2_unit2_bn2_gamma (128,)
stage4_unit1_bn2_beta (512,)
stage1_unit2_bn1_gamma (64,)
stage4_unit2_conv1_weight (512, 512, 3, 3)
bn1_gamma (512,)
stage4_unit1_sc_weight (512, 256, 1, 1)
stage4_unit1_bn1_beta (256,)
stage4_unit2_bn1_beta (512,)
bn0_gamma (64,)
stage1_unit1_conv2_weight (64, 64, 3, 3)
stage4_unit2_bn2_gamma (512,)
conv0_weight (64, 3, 7, 7)
stage2_unit1_conv2_weight (128, 128, 3, 3)
stage2_unit1_bn2_beta (128,)
stage3_unit1_bn2_beta (256,)
fc1_weight (1000, 512)
stage4_unit1_bn2_gamma (512,)
stage3_unit1_bn2_gamma (256,)
bn0_beta (64,)
stage1_unit2_conv1_weight (64, 64, 3, 3)
stage1_unit1_bn1_beta (64,)
stage2_unit2_conv1_weight (128, 128, 3, 3)
stage1_unit2_bn1_beta (64,)
>>> for k,v in aux_params.items():
... print(k,v.shape)
...
bn_data_moving_var (3,)
stage1_unit1_bn1_moving_var (64,)
stage4_unit1_bn1_moving_var (256,)
stage4_unit2_bn2_moving_mean (512,)
stage3_unit1_bn2_moving_var (256,)
bn0_moving_mean (64,)
stage2_unit2_bn2_moving_mean (128,)
stage1_unit2_bn2_moving_var (64,)
stage3_unit2_bn2_moving_mean (256,)
stage2_unit1_bn2_moving_mean (128,)
stage4_unit1_bn1_moving_mean (256,)
stage4_unit1_bn2_moving_var (512,)
stage4_unit1_bn2_moving_mean (512,)
stage3_unit2_bn1_moving_mean (256,)
stage3_unit1_bn1_moving_mean (128,)
stage3_unit2_bn1_moving_var (256,)
stage1_unit2_bn1_moving_var (64,)
stage1_unit2_bn2_moving_mean (64,)
bn1_moving_mean (512,)
stage4_unit2_bn2_moving_var (512,)
stage1_unit1_bn1_moving_mean (64,)
stage1_unit1_bn2_moving_var (64,)
stage4_unit2_bn1_moving_var (512,)
stage3_unit2_bn2_moving_var (256,)
stage3_unit1_bn2_moving_mean (256,)
stage2_unit1_bn1_moving_var (64,)
bn_data_moving_mean (3,)
bn1_moving_var (512,)
stage4_unit2_bn1_moving_mean (512,)
stage2_unit2_bn1_moving_mean (128,)
stage3_unit1_bn1_moving_var (128,)
bn0_moving_var (64,)
stage1_unit1_bn2_moving_mean (64,)
stage2_unit1_bn1_moving_mean (64,)
stage1_unit2_bn1_moving_mean (64,)
stage2_unit1_bn2_moving_var (128,)
stage2_unit2_bn1_moving_var (128,)
stage2_unit2_bn2_moving_var (128,)
>>>
2、了解mxnet 模型和caffe 模型参数的对应关系,
1、在mxnet字典中,存有caffe不需要的后缀,_weight _bias
2、需要确认caffe的参数保存顺序 [0]是weight [1]是bias,
3、 [0]权重,[1]偏置,data:参数,diff:梯度
4、mxnet 模型有两个参数,一个是aux_param ,一个是args_param ,一定不要搞混了,两个都是模型的参数,区别在哪里昵?
前者是辅助参数类似,bn 层的bn_moving_mean,bn_moving_var, 对应caffe bn 层的均值方差参数, 后者是args_param, 例如bn_gamma,bn_beta,对应caffe的bn_scale参数,两个学习到权重参数, args_param,存储的是所有学习的权重参数,例如
pre_fc1_bias
pre_fc1_weight
conv0_weight
conv0_bias
对应caffe,参数名字去 _bias _weights
caffe bn层
caffe bn层
layer {
bottom: "conv1"
top: "conv1"
name: "bn_conv1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: false
}
}
/* 参数解答use_global_stats:如果为真,则使用保存的均值和方差,否则采用滑动平均计算新的均值和方差。该参数缺省的时候,如果是测试阶段则等价为真,如果是训练阶段则等价为假。
moving_average_fraction :滑动平均的衰减系数,默认为0.999
eps:分母附加值,防止除以方差时出现除0操作,默认为1e-5(不同框架采用的默认值不一样),
caffe自带的BatchNorm层,与scale和在一起完成batchnorm,batchnorm层该层也是有参数的*/
layer {
bottom: "conv1"
top: "conv1"
name: "scale_conv1"
type: "Scale"
scale_param {
bias_term: true//是否使用偏置
}
}
使用参考代码,arcface 人脸模型转换caffe, https://github.com/search?q=MxNet2Caffe-mobilefacenet,
https://blog.csdn.net/junqing_wu/article/details/88339514
Mxnet2Caffe mxnet模型转换caffe该工程也很好
2.1 mxnet bn 层,caffe 转换
转换之前先打印每层对应的参数名字,用来比较
bn0_beta arg_params 对应caffe bn_scale [1]
bn0_gamma arg_params 对应caffe bn_scale [0]
bn0_moving_mean aux_params 对应caffe bn层【0】
bn0_moving_var aux_params 对应caffe bn层【1】
上面四个参数对应caffe 的 bn bn_scale 层
fc1 = mx.sym.BatchNorm(data=key_i, fix_gamma=True, eps=2e-5, momentum=0.9, name=‘fc1’)
fix_gamma=False 区别,mxnet 参数赋值给caffe[0]
net.params[key_caffe][0].data.flat = arg_params[key_i].asnumpy().flat
fix_gamma=True 区别,mxnet 参数gamma参数不用,caffe【0】等于1
net.params[key_caffe][0].data.flat = 1
打印参数代码
def mxnet_get_model(ctx, image_size, model_str, layer):
_vec = model_str.split(',')
assert len(_vec)==2
prefix = _vec[0]
epoch = int(_vec[1])
print('loading',prefix, epoch)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
# print(all_layers)
sym = all_layers[layer+'_output']
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
model.set_params(arg_params, aux_params)
model.aux_params = aux_params
model.arg_params = arg_params
return model
def mxnet_get_weights(img , mx_model, last_layer, layer):
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.transpose(img, (2, 0, 1))
ctx = mx.cpu(0)#mx.gpu(0)
image_size = (int(112), int(112))
model = mxnet_get_model(ctx, image_size, mx_model, last_layer)
# extract weights
all_keys= model.arg_params.keys() + model.aux_params.keys()
all_keys.sort() #一个名字列表,排序不是网络结构的顺序,但是可以把顺序两个参数同一个层的顺序放在一起
layer = "fc1_moving_mean" #名字按照上面打印出来的名字不是定义网络的名字
for k in all_keys:
print k #这才是层的名字,mxnet 打印权重参数的名字,和定义模型的名字不一样
w0 = model.aux_params[layer].asnumpy()
layer = "fc1_gamma"
w2 = model.arg_params[layer].asnumpy()
mxnet to caffe 局部代码
elif '_gamma' in key_i and 'relu' not in key_i:
key_caffe = key_i.replace('_gamma','_scale')
net.params[key_caffe][0].data.flat = arg_params[key_i].asnumpy().flat
#这句话 #fc1 = mx.sym.BatchNorm(data=key_i, fix_gamma=True, eps=2e-5, momentum=0.9, name='fc1')
# fix_gamma=true 只有最后一层bn是这样的,其他层都是false,所以
if "fc1_gamma" in key_i:
net.params[key_caffe][0].data.flat = 1
........
## last bn fc
elif '_moving_mean' in key_i:
key_caffe = key_i.replace('_moving_mean', '') #caffe bn 层的均值方差 对应这里的aux_params, caffe bn_scale,对应 arg_params,gamma beta
net.params[key_caffe][0].data.flat = aux_params[key_i].asnumpy().flat
net.params[key_caffe][2].data[...] = 1
2.2 mxnet prelu 转换 caffe prelu
bn_scale 层有gamma,beta 都赋值给caffe 的同一个层,权重赋值分别对应【0】,【1】索引处
prelu 只有有gamma,没有其他参数,对应caffe 该层也只有一个参数,
relu0_gamma 对应caffe 层参数名字是 relu0
bn0_gamma,bn0_beta 对应caffe 层参数名字是 bn0 顺序存放两个模型参数【0】,【1】
elif '_gamma' in key_i: # for prelu
key_caffe = key_i.replace('_gamma','')
assert (len(net.params[key_caffe]) == 1)
net.params[key_caffe][0].data.flat = arg_params[key_i].asnumpy().flat
2.3 mxnet conv,fc 对应caffe
卷积和全连接层的mxnet参数都是,conv0_weight ,pre_fc1_bias
pre_fc1_weight
对应caffe层名字去除后缀 conv0,pre_fc1
这些参数赋值都是 arg_params
caffe 层的[0] [1] 分别代表权重
elif '_weight' in key_i:
key_caffe = key_i.replace('_weight','')
if 'fc1' in key_i: #pre_fc1_weight
key_caffe = 'pre_fc1'
print net.params[key_caffe][0].data.shape
net.params[key_caffe][0].data.flat = arg_params[key_i].asnumpy().flat
elif '_bias' in key_i:
key_caffe = key_i.replace('_bias','')
net.params[key_caffe][1].data.flat = arg_params[key_i].asnumpy().flat