MindSpore怎么用?手把手教你!--网络迁移

概述

网络迁移,即将其它框架下的网络迁移成MindSpore框架下,刚好最近有一个迁移的任务,借这个手把手的机会学一学。

## 准备工作

安装MindSpore:

安装的是1.6.1版本

网络脚本分析

  • 算子分析

PyTorch 使用算子MindSpore 对应算子是否支持该算子所需功能
nn.Conv2Dnn.Conv2d
nn.BatchNorm2Dnn.BatchNom2d
nn.ReLUnn.ReLU
nn.MaxPool2Dnn.MaxPool2d
nn.AdaptiveAvgPool2D不支持
nn.Linearnn.Dense
torch.flattennn.Flatten

  • 功能分析

Pytorch 使用功能MindSpore 对应功能
nn.init.kaiming_normal_initializer(init='HeNormal')
nn.init.constant_initializer(init='Constant')
nn.Sequentialnn.SequentialCell
nn.Modulenn.Cell
nn.distibutedcontext.set_auto_parallel_context
torch.optim.SGDnn.optim.SGD or nn.optim.Momentum

网络脚本开发

迁移数据预处理部分

 import os
 from mindspore import dtype as mstype
 import mindspore.dataset as ds
 import mindspore.dataset.vision.c_transforms as C
 import mindspore.dataset.transforms.c_transforms as C2
 ​
 ​
 def create_dataset(dataset_path, batch_size=32, rank_size=1, rank_id=0, do_train=True):
     """
     create a train or eval imagenet2012 dataset for resnet50
 ​
     Args:
         dataset_path(string): the path of dataset.
         batch_size(int): the batch size of dataset. Default: 32
         rank_size(int): total num of devices for training. Default: 1,
                         greater than 1 in distributed training
         rank_id(int): logical sequence in all devices. Default: 1,
                       can be greater than i in distributed training
 ​
     Returns:
         dataset
     """
     data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=do_train,
                                      num_shards=rank_size, shard_id=rank_id)
 ​
     mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
     std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
 ​
     # define map operations
     trans = [
         C.Decode(),
         C.Resize(256),
         C.CenterCrop(224),
         C.Normalize(mean=mean, std=std),
         C.HWC2CHW()
     ]
 ​
     type_cast_op = C2.TypeCast(mstype.int32)
 ​
     data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8)
     data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
 ​
     # apply batch operations
     data_set = data_set.batch(batch_size, drop_remainder=do_train)
 ​
     return data_set

 ​

迁移子网与整网部分

重新开发 conv3x3 和 conv1x1,这里发现MindSpore的conv2d的功能还挺强大。

import mindspore.nn as nn
 ​
def _conv3x3(in_channel, out_channel, stride=1):
    return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride,
                      padding=0, pad_mode='same')
 ​
def _conv1x1(in_channel, out_channel, stride=1):
    return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride,
                      padding=0, pad_mode='same')

重新开发 BasicBlock 和 BottleNeck,一层一层搭起来:

class ResidualBlockBase(nn.Cell):
    def __init__(self,
                 in_channel,
                 out_channel,
                 stride=1):
        super(ResidualBlockBase, self).__init__()
        self.conv1 = _conv3x3(in_channel, out_channel, stride=stride)
        self.bn1d = _bn(out_channel)
        self.conv2 = _conv3x3(out_channel, out_channel, stride=1)
        self.bn2d = _bn(out_channel)
        self.relu = nn.ReLU()
​
        self.down_sample = False
        if stride != 1 or in_channel != out_channel:
            self.down_sample = True
​
        self.down_sample_layer = None
        if self.down_sample:
            self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride),
                                                         _bn(out_channel)])
 ​
    def construct(self, x):
        identity = x
​
        out = self.conv1(x)
        out = self.bn1d(out)
        out = self.relu(out)
​
        out = self.conv2(out)
        out = self.bn2d(out)
​
        if self.down_sample:
            identity = self.down_sample_layer(identity)
​
        out = out + identity
        out = self.relu(out)
​
        return out
​
​
class ResidualBlock(nn.Cell):
    expansion = 4
​
    def __init__(self,
                 in_channel,
                 out_channel,
                 stride=1):
        super(ResidualBlock, self).__init__()
        self.stride = stride
        channel = out_channel // self.expansion
        self.conv1 = _conv1x1(in_channel, channel, stride=1)
        self.bn1 = _bn(channel)
        if self.stride != 1:
            self.e2 = nn.SequentialCell([_conv3x3(channel, channel, stride=1), _bn(channel),
                                          nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='same')])
        else:
            self.conv2 = _conv3x3(channel, channel, stride=stride)
            self.bn2 = _bn(channel)
​
        self.conv3 = _conv1x1(channel, out_channel, stride=1)
        self.bn3 = _bn_last(out_channel)
        self.relu = nn.ReLU()
        self.down_sample = False
        if stride != 1 or in_channel != out_channel:
            self.down_sample = True
        self.down_sample_layer = None
        if self.down_sample:
            self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), _bn(out_channel)])
    def construct(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        if self.stride != 1:
            out = self.e2(out)
        else:
            out = self.conv2(out)
            out = self.bn2(out)
            out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.down_sample:
            identity = self.down_sample_layer(identity)
 ​
        out = out + identity
        out = self.relu(out)
        return out

重新开发 ResNet 系列整网,没有build()和call(),而是把build放在init里,call变成constuct:

class ResNet(nn.Cell):
    def __init__(self,
                 block,
                 layer_nums,
                 in_channels,
                 out_channels,
                 strides,
                 num_classes):
        super(ResNet, self).__init__()
 ​
        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
        self.conv1 = _conv7x7(3, 64, stride=2)
        self.bn1 = _bn(64)
        self.relu = ops.ReLU()
 ​
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
 ​
        self.layer1 = self._make_layer(block,
                                       layer_nums[0],
                                       in_channel=in_channels[0],
                                       out_channel=out_channels[0],
                                       stride=strides[0])
        self.layer2 = self._make_layer(block,
                                       layer_nums[1],
                                       in_channel=in_channels[1],
                                       out_channel=out_channels[1],
                                       stride=strides[1])
        self.layer3 = self._make_layer(block,
                                       layer_nums[2],
                                       in_channel=in_channels[2],
                                       out_channel=out_channels[2],
                                       stride=strides[2])
        self.layer4 = self._make_layer(block,
                                       layer_nums[3],
                                       in_channel=in_channels[3],
                                       out_channel=out_channels[3],
                                       stride=strides[3])
​
        self.mean = ops.ReduceMean(keep_dims=True)
        self.flatten = nn.Flatten()
        self.end_point = _fc(out_channels[3], num_classes)
​
    def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
        layers = []
​
        resnet_block = block(in_channel, out_channel, stride=stride)
        layers.append(resnet_block)
        for _ in range(1, layer_num):
            resnet_block = block(out_channel, out_channel, stride=1)
            layers.append(resnet_block)
        return nn.SequentialCell(layers)
​
    def construct(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        c1 = self.maxpool(x)
​
        c2 = self.layer1(c1)
        c3 = self.layer2(c2)
        c4 = self.layer3(c3)
        c5 = self.layer4(c4)
​
        out = self.mean(c5, (2, 3))
        out = self.flatten(out)
        out = self.end_point(out)
​
        return out

传入 ResNet50 层数信息,构造 ResNet50 整网:

def resnet50(class_num=10):
    return ResNet(ResidualBlock,
                  [3, 4, 6, 3],
                  [64, 256, 512, 1024],
                  [256, 512, 1024, 2048],
                  [1, 2, 2, 2],
                  class_num)

迁移反向构造、梯度裁剪、优化器、学习率生成等

实现带 Momentum 的 SGD 优化器,除 BN 的 gamma 和 bias 外,其他权重应用 WeightDecay :

# define opt
decayed_params = []
no_decayed_params = []
for param in net.trainable_params():
    if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
        decayed_params.append(param)
    else:
        no_decayed_params.append(param)

group_params = [{'params': decayed_params, 'weight_decay': weight_decay},
                {'params': no_decayed_params},
                {'order_params': net.trainable_params()}]
opt = Momentum(group_params, lr, momentum)

定义 Loss 函数和实现 Label Smoothing:

import mindspore.nn as nn
from mindspore import Tensor
from mindspore import dtype as mstype
from mindspore.nn import LossBase
import mindspore.ops as ops

# define cross entropy loss
class CrossEntropySmooth(LossBase):
    """CrossEntropy"""
    def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
        super(CrossEntropySmooth, self).__init__()
        self.onehot = ops.OneHot()
        self.sparse = sparse
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
        self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)

    def construct(self, logit, label):
        if self.sparse:
            label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value)
        loss = self.ce(logit, label)
        return loss

# define loss with label smooth
label_smooth_factor = 0.1
loss = CrossEntropySmooth(sparse=True, reduction="mean",smooth_factor=label_smooth_factor, num_classes=1000)

总结

整个流程走下来发现MindSpore的conv2d的功能还挺强大,定义网络时没有build()和call(),而是把build放在init里,call变成constuct,mindspore.common.initializer提供的initializer支持各类参数初始化,总体功能都挺强。 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值