1、第一种训练方式:“所有层训练微调”
import os
import argparse
import logging
logging.basicConfig(level=logging.DEBUG)
from common import find_mxnet
from common import data, fit, modelzoo
import mxnet as mx
def get_fine_tune_model(symbol, arg_params, num_classes, layer_name):
"""
symbol: the pre-trained network symbol
arg_params: the argument parameters of the pre-trained model
num_classes: the number of classes for the fine-tune datasets
layer_name: the layer name before the last fully-connected layer
"""
all_layers = symbol.get_internals()
net = all_layers[layer_name+'_output']
net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='fc')
net = mx.symbol.SoftmaxOutput(data=net, name='softmax')
new_args = dict({k:arg_params[k] for k in arg_params if 'fc' not in k})
return (net, new_args)
if __name__ == "__main__":
# parse args
parser = argparse.ArgumentParser(description="fine-tune a dataset",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
train = fit.add_fit_args(parser)
data.add_data_args(parser)
aug = data.add_data_aug_args(parser)
parser.add_argument('--pretrained-model', type=str,
help='the pre-trained model')
parser.add_argument('--layer-before-fullc', type=str, default='flatten0',
help='the name of the layer before the last fullc layer')
# use less augmentations for fine-tune
data.set_data_aug_level(parser, 1)
# use a small learning rate and less regularizations
parser.set_defaults(image_shape='3,224,224', num_epochs=30,
lr=.01, lr_step_epochs='20', wd=0, mom=0)
args = parser.parse_args()
# load pretrained model
dir_path = os.path.dirname(os.path.realpath(__file__))
(prefix, epoch) = modelzoo.download_model(
args.pretrained_model, os.path.join(dir_path, 'model'))
if prefix is None:
(prefix, epoch) = (args.pretrained_model, args.load_epoch)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
# remove the last fullc layer
(new_sym, new_args) = get_fine_tune_model(
sym, arg_params, args.num_classes, args.layer_before_fullc)
# train
fit.fit(args = args,
network = new_sym,
data_loader = data.get_rec_iter,
arg_params = new_args,
aux_params = aux_params)
2、第二种训练方式:“冻结预训练层,仅训练新层”
2.1、
# 如果你需要在训练的时候固定一些层的参数不更新,只更新部分层的参数,那么可以在生成这个model对象的时候加上
# 类似fixed_param_names = [‘layer_name1’,‘layer_name2’]这样的参数,表示这两个参数不参与更新。
model = mx.mod.Module(
context = devs,
symbol = network
)
lr_scheduler = lr_scheduler
optimizer_params = {
'learning_rate': lr,
'momentum' : args.mom,
'wd' : args.wd,
'lr_scheduler': lr_scheduler,
'multi_precision': True}
2.2、
def get_fine_tune_model(model_name):
# load model
symbol, arg_params, aux_params = mx.model.load_checkpoint("data/pre_train/"+model_name, 0)
# model tuning
all_layers = symbol.get_internals()
if model_name=="vgg16":
net = all_layers['drop7_output']
else:
net = all_layers['flatten0_output']
net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='newfc1')
net = mx.symbol.SoftmaxOutput(data=net, name='softmax')
# eliminate weights of new layer
new_args = dict({k:arg_params[k] for k in arg_params if 'fc1' not in k})
return (net, new_args,aux_params)
#模型训练,训练过程中,把不进行调整的层的学习率设置为0,从而达到只finetune后几层的效果
def fit(symbol, arg_params, aux_params, iter_train, iter_val, class_str, num_epoch, batch_size, gpu_avaliable):
devs = [mx.gpu(i) for i in gpu_avaliable]
model = mx.mod.Module(symbol=symbol, context=devs)
# metric
com_metric = mx.metric.CompositeEvalMetric()
com_metric.add(mx.metric.Accuracy())
com_metric.add(mAP(class_str)) # remove if unnecessary
# optimizer: fix the weight of certain layers except the last fully connect layer
sgd = mx.optimizer.Optimizer.create_optimizer('sgd')
finetune_lr = dict({k: 0 for k in arg_params})
sgd.set_lr_mult(finetune_lr)
# training
model.fit(iter_train, iter_val,
num_epoch=num_epoch,
arg_params=arg_params,
aux_params=aux_params,
allow_missing=True,
batch_end_callback = mx.callback.Speedometer(batch_size, 10),
kvstore='device',
optimizer=sgd,
optimizer_params={'learning_rate':0.01},
initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2),
eval_metric='acc')
return model.score(iter_val, com_metric)
set_lr_mult(dict):输入时个dict。
该函数的目的是设置一个layer和其对应的lr的一个dict,以便优化器的调用。当我们finetune的时候,有时候需要固化某些层(基本是前几层,保持共性,微调后几层,找特性)。所以对于一个加载进来的模型,每一层对应的学习率是完全不一样的,这个时候就需要这个函数了。
比如说:
一个三层的网络,分别为conv_0,bn_0,fc_1
。
如果希望不动前两层的参数,只调节最后一层的参数,那么对应的lr就应该是0,0,lr。写成dict的形式就是{'conv_0':0,'bn_0':0,‘fc_1’:lr}
。
#step 1:set an optimizer
optimizer=mx.optimizer.Optimizer.create_optimizer('sgd',learning_rate=0.01)
#step 2:获取固定的层名
finetune_layer={'name1','name2'}#just an example
#step 3:设置不同的学习lv
finetune_lr=dict({k:0 for k in finetune_layers})
#step 4:optimizer
optimizer.set_lr_mult(finetune_lr)
import logging
import mxnet as mx
import numpy as np
import os.path, time,sys
from mAP_metric import mAP
print ("\n******File updated %ds ago%s******" % (time.time()-os.path.getmtime(sys.argv[0])))# file updatation check
# data iterators: generate data iterator from .rec file
def get_iterators(batch_size, rec_train, rec_val, lst_train, data_shape=(3, 224, 224)):
train = mx.io.ImageRecordIter(
path_imgrec=rec_train,
path_imglist=lst_train,
data_name='data',
label_name='softmax_label',
batch_size=batch_size,
data_shape=data_shape,
shuffle=True,
# shuffle=False,
rand_crop=True,
mirror =True,
rand_mirror=True,
max_rotate_angle=0)
val = mx.io.ImageRecordIter(
path_imgrec=rec_val,
data_name='data',
label_name='softmax_label',
batch_size=batch_size,
data_shape=data_shape)
return train,val
# load and tune model
def get_fine_tune_model(model_name):
# load model
symbol, arg_params, aux_params = mx.model.load_checkpoint("data/pre_train/"+model_name, 0)
# model tuning
all_layers = symbol.get_internals()
if model_name=="vgg16":
net = all_layers['drop7_output']
else:
net = all_layers['flatten0_output']
net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='newfc1')
net = mx.symbol.SoftmaxOutput(data=net, name='softmax')
# eliminate weights of new layer
new_args = dict({k:arg_params[k] for k in arg_params if 'fc1' not in k})
return (net, new_args,aux_params)
#model training
def fit(symbol, arg_params, aux_params, iter_train, iter_val, class_str, num_epoch, batch_size, gpu_avaliable):
devs = [mx.gpu(i) for i in gpu_avaliable]
model = mx.mod.Module(symbol=symbol, context=devs)
# metric
com_metric = mx.metric.CompositeEvalMetric()
com_metric.add(mx.metric.Accuracy())
com_metric.add(mAP(class_str)) # remove if unnecessary
# optimizer: fix the weight of certain layers except the last fully connect layer
sgd = mx.optimizer.Optimizer.create_optimizer('sgd')
finetune_lr = dict({k: 0 for k in arg_params})
sgd.set_lr_mult(finetune_lr)
# training
model.fit(iter_train, iter_val,
num_epoch=num_epoch,
arg_params=arg_params,
aux_params=aux_params,
allow_missing=True,
batch_end_callback = mx.callback.Speedometer(batch_size, 10),
kvstore='device',
optimizer=sgd,
optimizer_params={'learning_rate':0.01},
initializer=mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2),
eval_metric='acc')
return model.score(iter_val, com_metric)
#=======================================================================================================================
# set logger, print message on screen and file
logging.basicConfig(level=logging.DEBUG, format='%(asctime)-15s %(message)s',filename='acc_record.log',filemode='w')
console = logging.StreamHandler()
console.setLevel(logging.INFO)
console.setFormatter(logging.Formatter('%(asctime)-15s %(message)s'))
logging.getLogger('').addHandler(console)
# data and pre-train model
rec_train='./data/rec/hico_train_full.rec'
# rec_train='./data/rec/hico_train_200500.rec'
model_name='vgg16'
# model_name='resnet-152'
rec_val='./data/rec/hico_val.rec'
lst_train=rec_train[:-3]+'lst'
# parameter
num_classes = 600
class_str=[]
for i in range(num_classes):
class_str.append("c"+str(i))
batch_per_gpu = 40
num_epoch =10
gpu_avaliable=[0,1,2,3]
num_gpus = len(gpu_avaliable)
batch_size = batch_per_gpu * num_gpus
if rec_train=='./data/rec/hico_train_full.rec':
print ('-----------Batchs per epoch: %d-----------' % (7000.0/batch_size))
if rec_train=='./data/rec/hico_train_200500.rec':
print ('-----------Batchs per epoch: %d-----------' % (137120.0/batch_size))
#-----------------------------------------------------------------------------------------------------------------------
(new_sym,new_args,aux_params)=get_fine_tune_model(model_name)
(iter_train, iter_val) = get_iterators(batch_size,rec_train,rec_val,lst_train)
mod_score = fit(new_sym, new_args, aux_params, iter_train, iter_val, class_str, num_epoch, batch_size, gpu_avaliable)
print(mod_score)
3、预测