MindSpore怎么用？手把手教你！--网络迁移

最新推荐文章于 2024-06-19 20:38:37 发布

昇思MindSpore

最新推荐文章于 2024-06-19 20:38:37 发布

阅读量619

点赞数

分类专栏：技术博客文章标签：网络深度学习 pytorch

本文链接：https://blog.csdn.net/Kenji_Shinji/article/details/127258863

版权

技术博客专栏收录该内容

414 篇文章 56 订阅

订阅专栏

概述

网络迁移，即将其它框架下的网络迁移成MindSpore框架下，刚好最近有一个迁移的任务，借这个手把手的机会学一学。

## 准备工作

安装MindSpore：

安装的是1.6.1版本

网络脚本分析

算子分析

PyTorch 使用算子	MindSpore 对应算子	是否支持该算子所需功能
`nn.Conv2D`	`nn.Conv2d`	是
`nn.BatchNorm2D`	`nn.BatchNom2d`	是
`nn.ReLU`	`nn.ReLU`	是
`nn.MaxPool2D`	`nn.MaxPool2d`	是
`nn.AdaptiveAvgPool2D`	无	不支持
`nn.Linear`	`nn.Dense`	是
`torch.flatten`	`nn.Flatten`	是

功能分析

Pytorch 使用功能	MindSpore 对应功能
`nn.init.kaiming_normal_`	`initializer(init='HeNormal')`
`nn.init.constant_`	`initializer(init='Constant')`
`nn.Sequential`	`nn.SequentialCell`
`nn.Module`	`nn.Cell`
`nn.distibuted`	`context.set_auto_parallel_context`
`torch.optim.SGD`	`nn.optim.SGD` or `nn.optim.Momentum`

网络脚本开发

迁移数据预处理部分

 import os
 from mindspore import dtype as mstype
 import mindspore.dataset as ds
 import mindspore.dataset.vision.c_transforms as C
 import mindspore.dataset.transforms.c_transforms as C2
 
 
 def create_dataset(dataset_path, batch_size=32, rank_size=1, rank_id=0, do_train=True):
     """
     create a train or eval imagenet2012 dataset for resnet50
 
     Args:
         dataset_path(string): the path of dataset.
         batch_size(int): the batch size of dataset. Default: 32
         rank_size(int): total num of devices for training. Default: 1,
                         greater than 1 in distributed training
         rank_id(int): logical sequence in all devices. Default: 1,
                       can be greater than i in distributed training
 
     Returns:
         dataset
     """
     data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=do_train,
                                      num_shards=rank_size, shard_id=rank_id)
 
     mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
     std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
 
     # define map operations
     trans = [
         C.Decode(),
         C.Resize(256),
         C.CenterCrop(224),
         C.Normalize(mean=mean, std=std),
         C.HWC2CHW()
     ]
 
     type_cast_op = C2.TypeCast(mstype.int32)
 
     data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8)
     data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
 
     # apply batch operations
     data_set = data_set.batch(batch_size, drop_remainder=do_train)
 
     return data_set

迁移子网与整网部分

重新开发 conv3x3 和 conv1x1，这里发现MindSpore的conv2d的功能还挺强大。

import mindspore.nn as nn
 
def _conv3x3(in_channel, out_channel, stride=1):
    return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride,
                      padding=0, pad_mode='same')
 
def _conv1x1(in_channel, out_channel, stride=1):
    return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride,
                      padding=0, pad_mode='same')

重新开发 BasicBlock 和 BottleNeck，一层一层搭起来：

class ResidualBlockBase(nn.Cell):
    def __init__(self,
                 in_channel,
                 out_channel,
                 stride=1):
        super(ResidualBlockBase, self).__init__()
        self.conv1 = _conv3x3(in_channel, out_channel, stride=stride)
        self.bn1d = _bn(out_channel)
        self.conv2 = _conv3x3(out_channel, out_channel, stride=1)
        self.bn2d = _bn(out_channel)
        self.relu = nn.ReLU()

        self.down_sample = False
        if stride != 1 or in_channel != out_channel:
            self.down_sample = True

        self.down_sample_layer = None
        if self.down_sample:
            self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride),
                                                         _bn(out_channel)])
 
    def construct(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1d(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2d(out)

        if self.down_sample:
            identity = self.down_sample_layer(identity)

        out = out + identity
        out = self.relu(out)

        return out


class ResidualBlock(nn.Cell):
    expansion = 4

    def __init__(self,
                 in_channel,
                 out_channel,
                 stride=1):
        super(ResidualBlock, self).__init__()
        self.stride = stride
        channel = out_channel // self.expansion
        self.conv1 = _conv1x1(in_channel, channel, stride=1)
        self.bn1 = _bn(channel)
        if self.stride != 1:
            self.e2 = nn.SequentialCell([_conv3x3(channel, channel, stride=1), _bn(channel),
                                          nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='same')])
        else:
            self.conv2 = _conv3x3(channel, channel, stride=stride)
            self.bn2 = _bn(channel)

        self.conv3 = _conv1x1(channel, out_channel, stride=1)
        self.bn3 = _bn_last(out_channel)
        self.relu = nn.ReLU()
        self.down_sample = False
        if stride != 1 or in_channel != out_channel:
            self.down_sample = True
        self.down_sample_layer = None
        if self.down_sample:
            self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), _bn(out_channel)])
    def construct(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        if self.stride != 1:
            out = self.e2(out)
        else:
            out = self.conv2(out)
            out = self.bn2(out)
            out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)
        if self.down_sample:
            identity = self.down_sample_layer(identity)
 
        out = out + identity
        out = self.relu(out)
        return out

重新开发 ResNet 系列整网，没有build()和call()，而是把build放在init里，call变成constuct：

class ResNet(nn.Cell):
    def __init__(self,
                 block,
                 layer_nums,
                 in_channels,
                 out_channels,
                 strides,
                 num_classes):
        super(ResNet, self).__init__()
 
        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
            raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!")
        self.conv1 = _conv7x7(3, 64, stride=2)
        self.bn1 = _bn(64)
        self.relu = ops.ReLU()
 
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
 
        self.layer1 = self._make_layer(block,
                                       layer_nums[0],
                                       in_channel=in_channels[0],
                                       out_channel=out_channels[0],
                                       stride=strides[0])
        self.layer2 = self._make_layer(block,
                                       layer_nums[1],
                                       in_channel=in_channels[1],
                                       out_channel=out_channels[1],
                                       stride=strides[1])
        self.layer3 = self._make_layer(block,
                                       layer_nums[2],
                                       in_channel=in_channels[2],
                                       out_channel=out_channels[2],
                                       stride=strides[2])
        self.layer4 = self._make_layer(block,
                                       layer_nums[3],
                                       in_channel=in_channels[3],
                                       out_channel=out_channels[3],
                                       stride=strides[3])

        self.mean = ops.ReduceMean(keep_dims=True)
        self.flatten = nn.Flatten()
        self.end_point = _fc(out_channels[3], num_classes)

    def _make_layer(self, block, layer_num, in_channel, out_channel, stride):
        layers = []

        resnet_block = block(in_channel, out_channel, stride=stride)
        layers.append(resnet_block)
        for _ in range(1, layer_num):
            resnet_block = block(out_channel, out_channel, stride=1)
            layers.append(resnet_block)
        return nn.SequentialCell(layers)

    def construct(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        c1 = self.maxpool(x)

        c2 = self.layer1(c1)
        c3 = self.layer2(c2)
        c4 = self.layer3(c3)
        c5 = self.layer4(c4)

        out = self.mean(c5, (2, 3))
        out = self.flatten(out)
        out = self.end_point(out)

        return out

传入 ResNet50 层数信息，构造 ResNet50 整网：

def resnet50(class_num=10):
    return ResNet(ResidualBlock,
                  [3, 4, 6, 3],
                  [64, 256, 512, 1024],
                  [256, 512, 1024, 2048],
                  [1, 2, 2, 2],
                  class_num)

迁移反向构造、梯度裁剪、优化器、学习率生成等

实现带 Momentum 的 SGD 优化器，除 BN 的 gamma 和 bias 外，其他权重应用 WeightDecay ：

# define opt
decayed_params = []
no_decayed_params = []
for param in net.trainable_params():
    if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
        decayed_params.append(param)
    else:
        no_decayed_params.append(param)

group_params = [{'params': decayed_params, 'weight_decay': weight_decay},
                {'params': no_decayed_params},
                {'order_params': net.trainable_params()}]
opt = Momentum(group_params, lr, momentum)

定义 Loss 函数和实现 Label Smoothing：

import mindspore.nn as nn
from mindspore import Tensor
from mindspore import dtype as mstype
from mindspore.nn import LossBase
import mindspore.ops as ops

# define cross entropy loss
class CrossEntropySmooth(LossBase):
    """CrossEntropy"""
    def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
        super(CrossEntropySmooth, self).__init__()
        self.onehot = ops.OneHot()
        self.sparse = sparse
        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
        self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)

    def construct(self, logit, label):
        if self.sparse:
            label = self.onehot(label, ops.shape(logit)[1], self.on_value, self.off_value)
        loss = self.ce(logit, label)
        return loss

# define loss with label smooth
label_smooth_factor = 0.1
loss = CrossEntropySmooth(sparse=True, reduction="mean",smooth_factor=label_smooth_factor, num_classes=1000)