概述
网络迁移,即将其它框架下的网络迁移成MindSpore框架下,刚好最近有一个迁移的任务,借这个手把手的机会学一学。
## 准备工作
安装MindSpore:
安装的是1.6.1版本
网络脚本分析
-
算子分析
PyTorch 使用算子 | MindSpore 对应算子 | 是否支持该算子所需功能 |
---|---|---|
nn.Conv2D | nn.Conv2d | 是 |
nn.BatchNorm2D | nn.BatchNom2d | 是 |
nn.ReLU | nn.ReLU | 是 |
nn.MaxPool2D | nn.MaxPool2d | 是 |
nn.AdaptiveAvgPool2D | 无 | 不支持 |
nn.Linear | nn.Dense | 是 |
torch.flatten | nn.Flatten | 是 |
-
功能分析
Pytorch 使用功能 | MindSpore 对应功能 |
---|---|
nn.init.kaiming_normal_ | initializer(init='HeNormal') |
nn.init.constant_ | initializer(init='Constant') |
nn.Sequential | nn.SequentialCell |
nn.Module | nn.Cell |
nn.distibuted | context.set_auto_parallel_context |
torch.optim.SGD | nn.optim.SGD or nn.optim.Momentum |
网络脚本开发
迁移数据预处理部分
import os
from mindspore import dtype as mstype
import mindspore.dataset as ds
import mindspore.dataset.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2
def create_dataset(dataset_path, batch_size=32, rank_size=1, rank_id=0, do_train=True):
"""
create a train or eval imagenet2012 dataset for resnet50
Args:
dataset_path(string): the path of dataset.
batch_size(int): the batch size of dataset. Default: 32
rank_size(int): total num of devices for training. Default: 1,
greater than 1 in distributed training
rank_id(int): logical sequence in all devices. Default: 1,
can be greater than i in distributed training
Returns:
dataset
"""
data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=do_train,
num_shards=rank_size, shard_id=rank_id)
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
# define map operations
trans = [
C.Decode(),
C.Resize(256),
C.CenterCrop(224),
C.Normalize(mean=mean, std=std),
C.HWC2CHW()
]
type_cast_op = C2.TypeCast(mstype.int32)
data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8)
data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
# apply batch operations
data_set = data_set.batch(batch_size, drop_remainder=do_train)
return data_set
迁移子网与整网部分
重新开发 conv3x3 和 conv1x1,这里发现MindSpore的conv2d的功能还挺强大。
import mindspore.nn as nn
def _conv3x3(in_channel, out_channel, stride=1):
return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride,
padding=0, pad_mode='same')
def _conv1x1(in_channel, out_channel, stride=1):
return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride,
padding=0, pad_mode='same')
重新开发 BasicBlock 和 BottleNeck,一层一层搭起来:
class ResidualBlockBase(nn.Cell):
def __init__(self,
in_channel,
out_channel,
stride=1):
super(ResidualBlockBase, self).__init__()
self.conv1 = _conv3x3(in_channel, out_channel, stride=stride)
self.bn1d = _bn(out_channel)
self.conv2 = _conv3x3(out_channel, out_channel, stride=1)
self.bn2d = _bn(out_channel)
self.relu = nn.ReLU()
self.down_sample = False
if stride != 1 or in_channel != out_channel:
self.down_sample = True
self.down_sample_layer = None
if self.down_sample:
self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride),
_bn(out_channel)])
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.bn1d(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2d(out)
if self.down_sample:
identity = self.down_sample_layer(identity)
out = out + identity
out = self.relu(out)
return out
class ResidualBlock(nn.Cell):
expansion = 4
def __init__(self,
in_channel,
out_channel,
stride=1):
super(ResidualBlock, self).__init__()
self.stride = stride
channel = out_channel // self.expansion
self.conv1 = _conv1x1(in_channel, channel, stride=1)
self.bn1 = _bn(channel)
if self.stride != 1:
self.e2 = nn.SequentialCell([_conv3x3(channel, channel, stride=1), _bn(channel),
nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='same')])
else:
self.conv2 = _conv3x3(channel, channel, stride=stride)
self.bn2 = _bn(channel)
self.conv3 = _conv1x1(channel, out_channel, stride=1)
self.bn3 = _bn_last(out_channel)
self.relu = nn.ReLU()
self.down_sample = False
if stride != 1 or in_channel != out_channel:
self.down_sample = True
self.down_sample_layer = None
if self.down_sample:
self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), _bn(out_channel)])
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
if self.stride != 1:
out = self.e2(out)
else:
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.down_sample:
identity = self.down_sample_layer(identity)
out = out + identity
out = self.relu(out)
return out
重新开发 ResNet 系列整网,没有build()和call(),而是把build放在init里,call变成constuct:
class ResNet(nn.Cell):
def __init__(self,
block,
layer_nums,
in_channels,
out_channels,
strides,
num_classes):
super(ResNet, self).__init__()
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
self.conv1 = _conv7x7(3, 64, stride=2)
self.bn1 = _bn(64)
self.relu = ops.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
self.layer1 = self._make_layer(block,
layer_nums[0],
in_channel=in_channels[0],
out_channel=out_channels[0],
stride=strides[0])
self.layer2 = self._make_layer(block,
layer_nums[1],
in_channel=in_channels[1],
out_channel=out_channels[1],
stride=strides[1])
self.layer3 = self._make_layer(block,
layer_nums[2],
in_channel=in_channels[2],
out_channel=out_channels[2],
stride=strides[2])
self.layer4 = self._make_layer(block,
layer_nums[3],
in_channel=in_channels[3],
out_channel=out_channels[3],
stride=strides[3])
self.mean = ops.ReduceMean(keep_dims=True)
self.flatten = nn.Flatten()
self.end_point = _fc(out_channels[3], num_classes)
def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
layers = []
resnet_block = block(in_channel, out_channel, stride=stride)
layers.append(resnet_block)
for _ in range(1, layer_num):
resnet_block = block(out_channel, out_channel, stride=1)
layers.append(resnet_block)
return nn.SequentialCell(layers)
def construct(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
c1 = self.maxpool(x)
c2 = self.layer1(c1)
c3 = self.layer2(c2)
c4 = self.layer3(c3)
c5 = self.layer4(c4)
out = self.mean(c5, (2, 3))
out = self.flatten(out)
out = self.end_point(out)
return out
传入 ResNet50 层数信息,构造 ResNet50 整网:
def resnet50(class_num=10):
return ResNet(ResidualBlock,
[3, 4, 6, 3],
[64, 256, 512, 1024],
[256, 512, 1024, 2048],
[1, 2, 2, 2],
class_num)
迁移反向构造、梯度裁剪、优化器、学习率生成等
实现带 Momentum 的 SGD 优化器,除 BN 的 gamma 和 bias 外,其他权重应用 WeightDecay :
# define opt
decayed_params = []
no_decayed_params = []
for param in net.trainable_params():
if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
decayed_params.append(param)
else:
no_decayed_params.append(param)
group_params = [{'params': decayed_params, 'weight_decay': weight_decay},
{'params': no_decayed_params},
{'order_params': net.trainable_params()}]
opt = Momentum(group_params, lr, momentum)
定义 Loss 函数和实现 Label Smoothing:
import mindspore.nn as nn
from mindspore import Tensor
from mindspore import dtype as mstype
from mindspore.nn import LossBase
import mindspore.ops as ops
# define cross entropy loss
class CrossEntropySmooth(LossBase):
"""CrossEntropy"""
def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
super(CrossEntropySmooth, self).__init__()
self.onehot = ops.OneHot()
self.sparse = sparse
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)
def construct(self, logit, label):
if self.sparse:
label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value)
loss = self.ce(logit, label)
return loss
# define loss with label smooth
label_smooth_factor = 0.1
loss = CrossEntropySmooth(sparse=True, reduction="mean",smooth_factor=label_smooth_factor, num_classes=1000)
总结
整个流程走下来发现MindSpore的conv2d的功能还挺强大,定义网络时没有build()和call(),而是把build放在init里,call变成constuct,mindspore.common.initializer提供的initializer支持各类参数初始化,总体功能都挺强。