第三篇mxnet 模型直接转换caffe层解析,不用onnx, arcface,fix_gamma=True,false

1、mxnet model 参数输出打印的代码

import mxnet as mx
sym, arg_params, aux_params = mx.model.load_checkpoint('mobilenet_v2', 0)
for param in arg_params:
    print(param)
print(arg_params['conv1_weight'])

 arg_params是一个字典结构 {‘参数名’:参数} 
 symbol 是训练网络需要的网络结构


## 加载模型,加载参数代码
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    all_layers = sym.get_internals()
    sym = all_layers['fc1_output'] #得到模型第一层到这一层的模型结构
    data_shapes = [('data', (args.batch_size, 3, 112, 112))]  #teacher model only need data, no label 
    t_module = mx.module.Module(symbol=sym_high, context=ctx, label_names=[])
    t_module.bind(data_shapes=data_shapes, for_training=False, grad_req='null')
    t_module.set_params(arg_params=t_arg_params, aux_params=t_aux_params)
    t_model=t_module
## 加载 模型,得到模型参数,运行模型得到输出
class Embedding:
    def __init__(self, prefix, epoch, ctx_id=0):
        print('loading', prefix, epoch)
        ctx = mx.gpu(ctx_id)
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        all_layers = sym.get_internals()
        sym = all_layers['fc1_output'] #得到模型某一层的输出
        image_size = (112, 112)
        self.image_size = image_size
        model = mx.mod.Module(symbol=sym, context=ctx, label_names=None) #重构model
        model.bind(for_training=False, data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])    #模型绑定
        model.set_params(arg_params, aux_params)  #更新模型参数
        self.model = model

    def get(self, rimg):
        img = rimg#cv2.imread()
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # img_flip = np.fliplr(img)
        img = np.transpose(img, (2, 0, 1))  # 3*112*112, RGB
        # img_flip = np.transpose(img_flip, (2, 0, 1))
        input_blob = np.zeros((1, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8)
        input_blob[0] = img
        # input_blob[1] = img_flip
        data = mx.nd.array(input_blob)  #输入数据格式
        db = mx.io.DataBatch(data=(data,))
        self.model.forward(db, is_train=False)
        feat = self.model.get_outputs()[0].asnumpy()
        feat = feat.reshape([-1, feat.shape[0]])#* feat.shape[1]]) #512 shape
        feat = feat.flatten()
        return feat
    

我们看一下resnet18输出的sym, arg_params, aux_params是什么

两个都是权重,分开了
arg_params : dict of str to NDArray
Model parameter, dict of name to NDArray of net’s weights.
看打印结果是 卷积全连接的权重参数
aux_params : dict of str to NDArray
Model parameter, dict of name to NDArray of net’s auxiliary states.
看打印的结果,是bn 层的权重参数

sym, arg_params, aux_params = mx.model.load_checkpoint("resnet18",0)
# <Symbol fc1>

 for k,v in arg_params.items():
...  print (k,v.shape)
... 
stage1_unit1_sc_weight (64, 64, 1, 1)
stage2_unit1_bn2_gamma (128,)
stage2_unit1_sc_weight (128, 64, 1, 1)
stage1_unit1_bn1_gamma (64,)
stage3_unit1_bn1_gamma (128,)
bn_data_beta (3,)
stage3_unit2_conv2_weight (256, 256, 3, 3)
stage1_unit2_bn2_beta (64,)
stage3_unit2_bn1_gamma (256,)
stage2_unit2_bn1_gamma (128,)
stage3_unit2_bn2_gamma (256,)
stage2_unit2_bn1_beta (128,)
stage4_unit1_conv1_weight (512, 256, 3, 3)
stage2_unit1_bn1_beta (64,)
bn_data_gamma (3,)
stage4_unit2_bn1_gamma (512,)
stage3_unit1_conv2_weight (256, 256, 3, 3)
stage2_unit1_conv1_weight (128, 64, 3, 3)
stage3_unit1_sc_weight (256, 128, 1, 1)
fc1_bias (1000,)
stage3_unit2_conv1_weight (256, 256, 3, 3)
stage1_unit1_bn2_beta (64,)
stage2_unit2_conv2_weight (128, 128, 3, 3)
stage3_unit2_bn1_beta (256,)
stage1_unit2_conv2_weight (64, 64, 3, 3)
stage4_unit2_conv2_weight (512, 512, 3, 3)
stage4_unit2_bn2_beta (512,)
stage2_unit1_bn1_gamma (64,)
stage3_unit1_conv1_weight (256, 128, 3, 3)
bn1_beta (512,)
stage4_unit1_bn1_gamma (256,)
stage1_unit2_bn2_gamma (64,)
stage1_unit1_conv1_weight (64, 64, 3, 3)
stage4_unit1_conv2_weight (512, 512, 3, 3)
stage3_unit1_bn1_beta (128,)
stage2_unit2_bn2_beta (128,)
stage3_unit2_bn2_beta (256,)
stage1_unit1_bn2_gamma (64,)
stage2_unit2_bn2_gamma (128,)
stage4_unit1_bn2_beta (512,)
stage1_unit2_bn1_gamma (64,)
stage4_unit2_conv1_weight (512, 512, 3, 3)
bn1_gamma (512,)
stage4_unit1_sc_weight (512, 256, 1, 1)
stage4_unit1_bn1_beta (256,)
stage4_unit2_bn1_beta (512,)
bn0_gamma (64,)
stage1_unit1_conv2_weight (64, 64, 3, 3)
stage4_unit2_bn2_gamma (512,)
conv0_weight (64, 3, 7, 7)
stage2_unit1_conv2_weight (128, 128, 3, 3)
stage2_unit1_bn2_beta (128,)
stage3_unit1_bn2_beta (256,)
fc1_weight (1000, 512)
stage4_unit1_bn2_gamma (512,)
stage3_unit1_bn2_gamma (256,)
bn0_beta (64,)
stage1_unit2_conv1_weight (64, 64, 3, 3)
stage1_unit1_bn1_beta (64,)
stage2_unit2_conv1_weight (128, 128, 3, 3)
stage1_unit2_bn1_beta (64,)
>>> for k,v in aux_params.items():
...  print(k,v.shape)
... 
bn_data_moving_var (3,)
stage1_unit1_bn1_moving_var (64,)
stage4_unit1_bn1_moving_var (256,)
stage4_unit2_bn2_moving_mean (512,)
stage3_unit1_bn2_moving_var (256,)
bn0_moving_mean (64,)
stage2_unit2_bn2_moving_mean (128,)
stage1_unit2_bn2_moving_var (64,)
stage3_unit2_bn2_moving_mean (256,)
stage2_unit1_bn2_moving_mean (128,)
stage4_unit1_bn1_moving_mean (256,)
stage4_unit1_bn2_moving_var (512,)
stage4_unit1_bn2_moving_mean (512,)
stage3_unit2_bn1_moving_mean (256,)
stage3_unit1_bn1_moving_mean (128,)
stage3_unit2_bn1_moving_var (256,)
stage1_unit2_bn1_moving_var (64,)
stage1_unit2_bn2_moving_mean (64,)
bn1_moving_mean (512,)
stage4_unit2_bn2_moving_var (512,)
stage1_unit1_bn1_moving_mean (64,)
stage1_unit1_bn2_moving_var (64,)
stage4_unit2_bn1_moving_var (512,)
stage3_unit2_bn2_moving_var (256,)
stage3_unit1_bn2_moving_mean (256,)
stage2_unit1_bn1_moving_var (64,)
bn_data_moving_mean (3,)
bn1_moving_var (512,)
stage4_unit2_bn1_moving_mean (512,)
stage2_unit2_bn1_moving_mean (128,)
stage3_unit1_bn1_moving_var (128,)
bn0_moving_var (64,)
stage1_unit1_bn2_moving_mean (64,)
stage2_unit1_bn1_moving_mean (64,)
stage1_unit2_bn1_moving_mean (64,)
stage2_unit1_bn2_moving_var (128,)
stage2_unit2_bn1_moving_var (128,)
stage2_unit2_bn2_moving_var (128,)
>>> 

2、了解mxnet 模型和caffe 模型参数的对应关系,

1、在mxnet字典中,存有caffe不需要的后缀,_weight _bias
2、需要确认caffe的参数保存顺序 [0]是weight [1]是bias,
3、 [0]权重,[1]偏置,data:参数,diff:梯度
4、mxnet 模型有两个参数,一个是aux_param ,一个是args_param ,一定不要搞混了,两个都是模型的参数,区别在哪里昵?
前者是辅助参数类似,bn 层的bn_moving_mean,bn_moving_var, 对应caffe bn 层的均值方差参数, 后者是args_param, 例如bn_gamma,bn_beta,对应caffe的bn_scale参数,两个学习到权重参数, args_param,存储的是所有学习的权重参数,例如
pre_fc1_bias
pre_fc1_weight
conv0_weight
conv0_bias
对应caffe,参数名字去 _bias _weights

caffe bn层
在这里插入图片描述

caffe  bn层
layer {
    bottom: "conv1"
    top: "conv1"
    name: "bn_conv1"
    type: "BatchNorm"
    batch_norm_param {
        use_global_stats: false
    }
}
/*  参数解答use_global_stats:如果为真,则使用保存的均值和方差,否则采用滑动平均计算新的均值和方差。该参数缺省的时候,如果是测试阶段则等价为真,如果是训练阶段则等价为假。
       moving_average_fraction  :滑动平均的衰减系数,默认为0.999
       eps:分母附加值,防止除以方差时出现除0操作,默认为1e-5(不同框架采用的默认值不一样),
      caffe自带的BatchNorm层,与scale和在一起完成batchnorm,batchnorm层该层也是有参数的*/
 
layer {
    bottom: "conv1"
    top: "conv1"
    name: "scale_conv1"
    type: "Scale"
    scale_param {
        bias_term: true//是否使用偏置
    }
}

使用参考代码,arcface 人脸模型转换caffe, https://github.com/search?q=MxNet2Caffe-mobilefacenet,

https://blog.csdn.net/junqing_wu/article/details/88339514
Mxnet2Caffe mxnet模型转换caffe该工程也很好

2.1 mxnet bn 层,caffe 转换

转换之前先打印每层对应的参数名字,用来比较
bn0_beta arg_params 对应caffe bn_scale [1]
bn0_gamma arg_params 对应caffe bn_scale [0]
bn0_moving_mean aux_params 对应caffe bn层【0】
bn0_moving_var aux_params 对应caffe bn层【1】
上面四个参数对应caffe 的 bn bn_scale 层
fc1 = mx.sym.BatchNorm(data=key_i, fix_gamma=True, eps=2e-5, momentum=0.9, name=‘fc1’)

fix_gamma=False 区别,mxnet 参数赋值给caffe[0]
net.params[key_caffe][0].data.flat = arg_params[key_i].asnumpy().flat
fix_gamma=True 区别,mxnet 参数gamma参数不用,caffe【0】等于1
net.params[key_caffe][0].data.flat = 1

打印参数代码

def mxnet_get_model(ctx, image_size, model_str, layer):
  _vec = model_str.split(',')
  assert len(_vec)==2
  prefix = _vec[0]
  epoch = int(_vec[1])
  print('loading',prefix, epoch)
  sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
  all_layers = sym.get_internals()
  # print(all_layers)
  sym = all_layers[layer+'_output']
  model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
  model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
  model.set_params(arg_params, aux_params)
  model.aux_params = aux_params
  model.arg_params = arg_params
  return model
def mxnet_get_weights(img , mx_model, last_layer, layer):
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  img = np.transpose(img, (2, 0, 1))
  ctx = mx.cpu(0)#mx.gpu(0)
  image_size = (int(112), int(112))
  model = mxnet_get_model(ctx, image_size, mx_model, last_layer)
  # extract weights
  all_keys= model.arg_params.keys() + model.aux_params.keys()
  all_keys.sort()  #一个名字列表,排序不是网络结构的顺序,但是可以把顺序两个参数同一个层的顺序放在一起
  
  layer = "fc1_moving_mean"  #名字按照上面打印出来的名字不是定义网络的名字
  for k in all_keys:
      print k  #这才是层的名字,mxnet 打印权重参数的名字,和定义模型的名字不一样 

  w0 = model.aux_params[layer].asnumpy()   
  layer =    "fc1_gamma"
  w2 = model.arg_params[layer].asnumpy()      
  

mxnet to caffe 局部代码


    elif '_gamma' in key_i and 'relu' not in key_i:
      key_caffe = key_i.replace('_gamma','_scale')
      net.params[key_caffe][0].data.flat = arg_params[key_i].asnumpy().flat
      #这句话 #fc1 = mx.sym.BatchNorm(data=key_i, fix_gamma=True, eps=2e-5, momentum=0.9, name='fc1')
      # fix_gamma=true 只有最后一层bn是这样的,其他层都是false,所以
      if "fc1_gamma" in key_i: 
        net.params[key_caffe][0].data.flat = 1
........
    ## last bn fc
    elif '_moving_mean' in key_i:
      key_caffe = key_i.replace('_moving_mean', '')  #caffe  bn 层的均值方差 对应这里的aux_params,  caffe bn_scale,对应 arg_params,gamma beta
      net.params[key_caffe][0].data.flat = aux_params[key_i].asnumpy().flat
      net.params[key_caffe][2].data[...] = 1

2.2 mxnet prelu 转换 caffe prelu

bn_scale 层有gamma,beta 都赋值给caffe 的同一个层,权重赋值分别对应【0】,【1】索引处
prelu 只有有gamma,没有其他参数,对应caffe 该层也只有一个参数,

relu0_gamma 对应caffe 层参数名字是 relu0
bn0_gamma,bn0_beta 对应caffe 层参数名字是 bn0 顺序存放两个模型参数【0】,【1】

    elif '_gamma' in key_i:   # for prelu
      key_caffe = key_i.replace('_gamma','')
      assert (len(net.params[key_caffe]) == 1)
      net.params[key_caffe][0].data.flat = arg_params[key_i].asnumpy().flat
2.3 mxnet conv,fc 对应caffe

卷积和全连接层的mxnet参数都是,conv0_weight ,pre_fc1_bias
pre_fc1_weight
对应caffe层名字去除后缀 conv0,pre_fc1
这些参数赋值都是 arg_params
caffe 层的[0] [1] 分别代表权重

    elif '_weight' in key_i:
      key_caffe = key_i.replace('_weight','')
      if 'fc1' in key_i:  #pre_fc1_weight
        key_caffe = 'pre_fc1'  
        print net.params[key_caffe][0].data.shape
      net.params[key_caffe][0].data.flat = arg_params[key_i].asnumpy().flat      
    elif '_bias' in key_i:
      key_caffe = key_i.replace('_bias','')
      net.params[key_caffe][1].data.flat = arg_params[key_i].asnumpy().flat  
2.4 drop,reshape,concat,等层没有参数,不需要赋值,上采样等碰到再写进来
  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值