1.MobilenetV3的介绍
MobileNetV3结合硬件感知神经网络架构搜索(NAS)和NetAdapt算法,可以移植到手机CPU上运行。
论文:Howard, Andrew, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang et al."Searching for mobilenetv3."In Proceedings of the IEEE International Conference on Computer Vision, pp. 1314-1324.2019.
mobilenet系列对比
MobilenetV1:
- 引入了深度可分离卷积作为传统卷积层的有效替代,大大减少计算量。
MobilenetV2:
- 引入线性瓶颈和倒置残差结构,以便通过利用问题的低秩性质来提高层结构的效率。
MobilenetV3:
- 添加了SE注意力机制(squeeze and excitation)
- 使用h-swish函数
- 使用NAS搜索参数(Neural Architecture Search)
- 重新设计耗时层结构
模型架构
MobileNetV3总体网络架构详见论文 链接
2.MobilenetV3的mindspore实现
使用的数据集:imagenet
- 数据集大小: 146G, 1330k 1000类彩色图像
- 训练: 140G, 1280k张图片
- 测试: 6G, 50k张图片
- 数据格式:RGB
环境要求
- 硬件(Ascend)
- 由启智社区提供算力,使用Ascend来搭建硬件环境。
- 框架
- 如需查看详情,请参见如下资源:
使用mindspore进行推理
从该github仓库获取torch权重
使用weight_convert.py
文件进行torch格式到mindspore格式的转换
from mindspore.train.serialization import save_checkpoint
from mindspore import Tensor
import torch
def pytorch2mindspore(ckpt_name='mobilenetv3.pth'):
par_dict = torch.load(ckpt_name, map_location=torch.device('cpu'))
new_params_list = []
for name in par_dict:
param_dict = {}
parameter = par_dict[name]
print('========================py_name',name)
if name.endswith('normalize.bias'):
name = name[:name.rfind('normalize.bias')]
name = name + 'normalize.beta'
elif name.endswith('normalize.weight'):
name = name[:name.rfind('normalize.weight')]
name = name + 'normalize.gamma'
elif name.endswith('.running_mean'):
name = name[:name.rfind('.running_mean')]
name = name + '.moving_mean'
elif name.endswith('.running_var'):
name = name[:name.rfind('.running_var')]
name = name + '.moving_variance'
print('========================ms_name',name)
param_dict['name'] = name
param_dict['data'] = Tensor(parameter.numpy())
new_params_list.append(param_dict)
save_checkpoint(new_params_list, 'mobilenetv3.ckpt')
使用model.predict接口进行推理,推理数据从imagenet-1K中随机挑选一些图片。
使用mindspore从0到1开始训练
网络结构搭建
mindspore提供的API与pytorch较为相似,在pytorch实现代码的基础上进行重构。
class hswish(nn.Cell):
"""hswish"""
def construct(self, x):
out = x * nn.ReLU6()(x + 3) / 6
return out
class hsigmoid(nn.Cell):
"""hsigmoid"""
def construct(self, x):
out = nn.ReLU6()(x + 3) / 6
return out
def _make_divisible(x, divisor=8):
"""_make_divisible"""
return int(np.ceil(x * 1. / divisor) * divisor)
class Activation(nn.Cell):
def __init__(self, act_func):
super(Activation, self).__init__()
if act_func == 'relu':
self.act = nn.ReLU()
elif act_func == 'relu6':
self.act = nn.ReLU6()
elif act_func in ('hsigmoid', 'hard_sigmoid'):
self.act = hsigmoid()
elif act_func in ('hswish', 'hard_swish'):
self.act = hswish()
else:
raise NotImplementedError
def construct(self, x):
return self.act(x)
class GlobalAvgPooling(nn.Cell):
def __init__(self, keep_dims=False):
super(GlobalAvgPooling, self).__init__()
self.mean = P.ReduceMean(keep_dims=keep_dims)
def construct(self, x):
x = self.mean(x, (2, 3))
return x
class SE(nn.Cell):
"""
SE注意力
"""
def __init__(self, num_out, ratio=4):
super(SE, self).__init__()
num_mid = _make_divisible(num_out // ratio)
self.pool = GlobalAvgPooling(keep_dims=True)
self.conv1 = nn.Conv2d(in_channels=num_out, out_channels=num_mid,
kernel_size=1, has_bias=True, pad_mode='pad')
self.act1 = Activation('relu')
self.conv2 = nn.Conv2d(in_channels=num_mid, out_channels=num_out,
kernel_size=1, has_bias=True, pad_mode='pad')
self.act2 = Activation('hsigmoid')
self.mul = P.Mul()
def construct(self, x):
out = self.pool(x)
out = self.conv1(out)
out = self.act1(out)
out = self.conv2(out)
out = self.act2(out)
out = self.mul(x, out)
return out
class Unit(nn.Cell):
"""
普通的CBL结构(Liner可选)
"""
def __init__(self, num_in, num_out, kernel_size=1, stride=1, padding=0, num_groups=1,
use_act=True, act_type='relu'):
super(Unit, self).__init__()
self.conv = nn.Conv2d(in_channels=num_in,
out_channels=num_out,
kernel_size=kernel_size,
stride=stride,
padding=padding,
group=num_groups,
has_bias=False,
pad_mode='pad')
self.bn = nn.BatchNorm2d(num_out)
self.use_act = use_act
self.act = Activation(act_type) if use_act else None
def construct(self, x):
out = self.conv(x)
out = self.bn(out)
if self.use_act:
out = self.act(out)
return out
class ResUnit(nn.Cell):
def __init__(self, num_in, num_mid, num_out, kernel_size, stride=1, act_type='relu', use_se=False):
super(ResUnit, self).__init__()
self.use_se = use_se
# first conv只是用来调整通道数的
self.first_conv = (num_out != num_mid)
self.use_short_cut_conv = True
if self.first_conv:
self.expand = Unit(num_in, num_mid, kernel_size=1,
stride=1, padding=0, act_type=act_type)
else:
self.expand = None
self.conv1 = Unit(num_mid, num_mid, kernel_size=kernel_size, stride=stride,
padding=self._get_pad(kernel_size), act_type=act_type, num_groups=num_mid)
if use_se:
self.se = SE(num_mid)
self.conv2 = Unit(num_mid, num_out, kernel_size=1, stride=1,
padding=0, act_type=act_type, use_act=False)
if num_in != num_out or stride != 1:
self.use_short_cut_conv = False
self.add = P.Add() if self.use_short_cut_conv else None
def construct(self, x):
"""construct"""
if self.first_conv:
out = self.expand(x)
else:
out = x
out = self.conv1(out) # 单通道卷积
if self.use_se:
out = self.se(out)
out = self.conv2(out) # 1*1卷积整合各通道信息
if self.use_short_cut_conv:
out = self.add(x, out)
return out
def _get_pad(self, kernel_size):
"""set the padding number"""
pad = 0
if kernel_size == 1:
pad = 0
elif kernel_size == 3:
pad = 1
elif kernel_size == 5:
pad = 2
elif kernel_size == 7:
pad = 3
else:
raise NotImplementedError
return pad
class MobileNetV3(nn.Cell):
def __init__(self, model_cfgs, num_classes=1000, multiplier=1., final_drop=0., round_nearest=8):
super(MobileNetV3, self).__init__()
self.cfgs = model_cfgs['cfg']
self.inplanes = 16
self.features = []
first_conv_in_channel = 3
first_conv_out_channel = _make_divisible(multiplier * self.inplanes)
self.features.append(nn.Conv2d(in_channels=first_conv_in_channel,
out_channels=first_conv_out_channel,
kernel_size=3, padding=1, stride=2,
has_bias=False, pad_mode='pad'))
self.features.append(nn.BatchNorm2d(first_conv_out_channel))
self.features.append(Activation('hswish'))
for layer_cfg in self.cfgs:
self.features.append(self._make_layer(kernel_size=layer_cfg[0],
exp_ch=_make_divisible(multiplier * layer_cfg[1]),
out_channel=_make_divisible(multiplier * layer_cfg[2]),
use_se=layer_cfg[3],
act_func=layer_cfg[4],
stride=layer_cfg[5]))
output_channel = _make_divisible(multiplier * model_cfgs["cls_ch_squeeze"])
self.features.append(nn.Conv2d(in_channels=_make_divisible(multiplier * self.cfgs[-1][2]),
out_channels=output_channel,
kernel_size=1, padding=0, stride=1,
has_bias=False, pad_mode='pad'))
self.features.append(nn.BatchNorm2d(output_channel))
self.features.append(Activation('hswish'))
self.features.append(GlobalAvgPooling(keep_dims=True))
self.features.append(nn.Conv2d(in_channels=output_channel,
out_channels=model_cfgs['cls_ch_expand'],
kernel_size=1, padding=0, stride=1,
has_bias=False, pad_mode='pad'))
self.features.append(Activation('hswish'))
if final_drop > 0:
self.features.append((nn.Dropout(final_drop)))
# make it nn.CellList
self.features = nn.SequentialCell(self.features)
self.output = nn.Conv2d(in_channels=model_cfgs['cls_ch_expand'],
out_channels=num_classes,
kernel_size=1, has_bias=True, pad_mode='pad')
self.squeeze = P.Squeeze(axis=(2, 3))
self._initialize_weights()
def construct(self, x):
x = self.features(x)
x = self.output(x)
x = self.squeeze(x)
return x
def _make_layer(self, kernel_size, exp_ch, out_channel, use_se, act_func, stride=1):
mid_planes = exp_ch
out_planes = out_channel
layer = ResUnit(self.inplanes, mid_planes, out_planes,
kernel_size, stride=stride, act_type=act_func, use_se=use_se)
self.inplanes = out_planes
return layer
def _initialize_weights(self):
self.init_parameters_data()
for _, m in self.cells_and_names():
if isinstance(m, (nn.Conv2d)):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.set_data(Tensor(np.random.normal(0, np.sqrt(2. / n),
m.weight.data.shape).astype("float32")))
if m.bias is not None:
m.bias.set_data(
Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
elif isinstance(m, nn.BatchNorm2d):
m.gamma.set_data(
Tensor(np.ones(m.gamma.data.shape, dtype="float32")))
m.beta.set_data(
Tensor(np.zeros(m.beta.data.shape, dtype="float32")))
elif isinstance(m, nn.Dense):
m.weight.set_data(Tensor(np.random.normal(
0, 0.01, m.weight.data.shape).astype("float32")))
if m.bias is not None:
m.bias.set_data(
Tensor(np.zeros(m.bias.data.shape, dtype="float32")))
使用启智平台训练
参数设定
"num_classes": 1000,
"image_height": 224,
"image_width": 224,
"batch_size": 512,
"epoch_size": 360,
"warmup_epochs": 4,
"lr": 0.05,
"momentum": 0.9,
"weight_decay": 4e-5,
"label_smooth": 0.1,
"loss_scale": 1024,
"save_checkpoint": True,
"save_checkpoint_epochs": 4,
"keep_checkpoint_max": 20,
"save_checkpoint_path": "/cache/output/checkpoint",
"export_file": "mobilenetv3_small",
"export_format": "MINDIR",
"device": "Ascend",
})
数据集
启智环境适配
parser = argparse.ArgumentParser(description='Image classification')
# openi适配
parser.add_argument('--data_url', type=str, default=None, help='Dataset path')
parser.add_argument('--train_url', type=str, default=None, help='Train output path')
parser.add_argument('--pretrain_url',help='pre_train_model path in obs')
parser.add_argument('--multi_data_url',help='path to multi dataset',default= '/cache/data/')
parser.add_argument('---device_target',default="Ascend",type=str,help='device target')
# Ascend parameter
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
parser.add_argument('--run_distribute', type=ast.literal_eval, default=False, help='Run distribute')
parser.add_argument('--device_id', type=int, default=0, help='Device id')
parser.add_argument('--run_modelarts', type=ast.literal_eval, default=False, help='Run mode')
parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path')
args_opt = parser.parse_args()
#context.set_context(mode=context.GRAPH_MODE, device_target=config.device, save_graphs=False)
### Copy multiple datasets from obs to training image and unzip###
def C2netMultiObsToEnv(multi_data_url, data_dir):
#--multi_data_url is json data, need to do json parsing for multi_data_url
multi_data_json = json.loads(multi_data_url)
print("multi_data_json:",multi_data_json)
for i in range(len(multi_data_json)):
zipfile_path = data_dir + "/" + multi_data_json[i]["dataset_name"]
try:
mox.file.copy(multi_data_json[i]["dataset_url"], zipfile_path)
print("Successfully Download {} to {}".format(multi_data_json[i]["dataset_url"],zipfile_path))
#get filename and unzip the dataset
filename = os.path.splitext(multi_data_json[i]["dataset_name"])[0]
filePath = data_dir + "/" + filename
if not os.path.exists(filePath):
os.makedirs(filePath)
os.system("unzip {} -d {}".format(zipfile_path, filePath))
except Exception as e:
print('moxing download {} to {} failed: '.format(
multi_data_json[i]["dataset_url"], zipfile_path) + str(e))
#Set a cache file to determine whether the data has been copied to obs.
#If this file exists during multi-card training, there is no need to copy the dataset multiple times.
f = open("/cache/download_dataset_input.txt", 'w')
f.close()
try:
if os.path.exists("/cache/download_dataset_input.txt"):
print("download_input succeed")
except Exception as e:
print("download_dataset_input failed")
return
### Copy ckpt file from obs to training image###
### To operate on folders, use mox.file.copy_parallel. If copying a file.
### Please use mox.file.copy to operate the file, this operation is to operate the file
def ObsUrlToEnv(obs_ckpt_url, ckpt_url):
try:
mox.file.copy(obs_ckpt_url, ckpt_url)
print("Successfully Download {} to {}".format(obs_ckpt_url,ckpt_url))
except Exception as e:
print('moxing download {} to {} failed: '.format(obs_ckpt_url, ckpt_url) + str(e))
return
### Copy the output model to obs ###
def EnvToObs(train_dir, obs_train_url):
try:
mox.file.copy_parallel(train_dir, obs_train_url)
print("Successfully Upload {} to {}".format(train_dir,
obs_train_url))
except Exception as e:
print('moxing upload {} to {} failed: '.format(train_dir,
obs_train_url) + str(e))
return
### Copy multiple pretrain file from obs to training image and unzip###
def C2netModelToEnv(model_url, model_dir):
#--ckpt_url is json data, need to do json parsing for ckpt_url_json
model_url_json = json.loads(model_url)
print("model_url_json:",model_url_json)
for i in range(len(model_url_json)):
modelfile_path = model_dir + "/" + "checkpoint.ckpt"
try:
mox.file.copy(model_url_json[i]["model_url"], modelfile_path)
print("Successfully Download {} to {}".format(model_url_json[i]["model_url"],modelfile_path))
except Exception as e:
print('moxing download {} to {} failed: '.format(
model_url_json[i]["model_url"], modelfile_path) + str(e))
return
def DownloadDataFromQizhi(multi_data_url, data_dir):
device_num = int(os.getenv('RANK_SIZE'))
if device_num == 1:
C2netMultiObsToEnv(multi_data_url,data_dir)
context.set_context(mode=context.GRAPH_MODE,device_target=args_opt.device_target)
if device_num > 1:
# set device_id and init for multi-card training
context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, device_id=int(os.getenv('ASCEND_DEVICE_ID')))
context.reset_auto_parallel_context()
context.set_auto_parallel_context(device_num = device_num, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, parameter_broadcast=True)
init()
#Copying obs data does not need to be executed multiple times, just let the 0th card copy the data
local_rank=int(os.getenv('RANK_ID'))
if local_rank%8==0:
C2netMultiObsToEnv(multi_data_url,data_dir)
#If the cache file does not exist, it means that the copy data has not been completed,
#and Wait for 0th card to finish copying data
while not os.path.exists("/cache/download_dataset_input.txt"):
time.sleep(1)
return
def DownloadModelFromQizhi(model_url, model_dir):
device_num = int(os.getenv('RANK_SIZE'))
if device_num == 1:
C2netModelToEnv(model_url,model_dir)
if device_num > 1:
#Copying obs data does not need to be executed multiple times, just let the 0th card copy the data
local_rank=int(os.getenv('RANK_ID'))
if local_rank%8==0:
C2netModelToEnv(model_url,model_dir)
return
def UploadToQizhi(train_dir, obs_train_url):
device_num = int(os.getenv('RANK_SIZE'))
local_rank = int(os.getenv('RANK_ID'))
if device_num == 1:
EnvToObs(train_dir, obs_train_url)
if device_num > 1:
if local_rank % 8 == 0:
EnvToObs(train_dir, obs_train_url)
return
启动训练
启动评估
cd ./mobilenetv3 && python eval.py --checkpoint_path=./best.ckpt --dataset_path=../imagenet/val
评估结果:
result: {'top_5_accuracy': 0.8687065972222222, 'top_1_accuracy': 0.669921875, 'loss': 2.332750029034085} ckpt= ./mobilenetV3-360_2502.ckpt
总结
本次使用mindspore复现的难点不在网络搭建和数据集处理上,而在于imagenet-1K的100+G大小,以及启智平台的环境适配工作。
为了提高训练速度,将batch_size设置为512,epoch设置为360,训练耗时仍旧达到了100h,也许是batch_size设置的过大,反而降低了网络效率。