MobileNet模型实现

V2

import torch
from torch import nn


# 官网的代码,返回离v最近的divisor整数倍的数
def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # 确保向下舍入不超过10%
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class ConvBNReLU6(nn.Sequential):
    # groups==in_channel时即为DW卷积
    def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU6, self).__init__(
            nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU6(inplace=True)
        )


class InvertedResidual(nn.Module):
    def __init__(self, in_channel, out_channel, stride, expand_ratio):
        super(InvertedResidual,self).__init__()
        self.use_shortcut = (stride == 1 and in_channel == out_channel)
        hidden_channel = in_channel * expand_ratio
        layers = []
        # 第一层Conv如果没有进行升维(expandratio=1),则不添加第一个升维卷积层
        if expand_ratio != 1:
            layers.append(ConvBNReLU6(in_channel, hidden_channel, kernel_size=1, stride=1, groups=1))
        # 最后一层不用Relu6激活函数,用线性激活函数,在代码实现中即不用激活函数
        layers.extend([ConvBNReLU6(hidden_channel, hidden_channel, kernel_size=3, stride=stride, groups=hidden_channel),
                       nn.Conv2d(hidden_channel, out_channel, kernel_size=1, stride=1, padding=0, groups=1, bias=False),
                       nn.BatchNorm2d(out_channel)])
        self.sequential = nn.Sequential(*layers)

    def forward(self, x):
        y = self.sequential(x)
        if self.use_shortcut:
            return y + x
        return y


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):
        super(MobileNetV2, self).__init__()
        bottleneck_weight = [
            # t  c  n  s
            [1, 16, 1, 1],
            [6, 24, 2, 2],
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1]
        ]
        block = InvertedResidual
        # input_channel为上一层输出channel数,本层的输入channel数;初始化为第一层的输出channel
        input_channel = _make_divisible(32 * alpha, round_nearest)
        layers = []
        layers.append(ConvBNReLU6(in_channel=3, out_channel=input_channel, kernel_size=3, stride=2))
        for t, c, n, s in bottleneck_weight:
            # output_channel:本层输出channel
            output_channel = _make_divisible(c * alpha, round_nearest)
            for i in range(n):
                # 此处s若为bottleneck所在层的步距,则s指第一层(共n层)bottleneck的步距,后面的bottlneck的步距仍为1
                stride = s if i == 0 else 1
                layers.append(
                    block(in_channel=input_channel, out_channel=output_channel, stride=stride, expand_ratio=t))
                input_channel = output_channel
        last_channel = _make_divisible(1280 * alpha, round_nearest)
        layers.append(ConvBNReLU6(in_channel=output_channel, out_channel=last_channel, kernel_size=1, stride=1))
        layers.append(nn.AdaptiveAvgPool2d((1, 1)))
        layers.append(nn.Flatten())
        self.features = nn.Sequential(*layers)
        self.classfier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(last_channel, num_classes),
            nn.Softmax()
        )
        self._initialize_weights()

    def _initialize_weights(self):
        # 迭代每一个层结构modules
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.classfier(x)
        return x

V3

from typing import Callable, List, Optional

import torch
from torch import nn, Tensor
from torch.nn import functional as F
from functools import partial


def _make_divisible(ch, divisor=8, min_ch=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    """
    if min_ch is None:
        min_ch = divisor
    new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_ch < 0.9 * ch:
        new_ch += divisor
    return new_ch


class ConvBNActivation(nn.Sequential):
    def __init__(self, input_channels, output_channels, kernel_size=3, stride=1, groups=1,
                 norm_layer: Optional[Callable[..., nn.Module]] = None,
                 activation_layer: Optional[Callable[..., nn.Module]] = None):
        padding = (kernel_size - 1) // 2
        if norm_layer == None:
            norm_layer = nn.BatchNorm2d
        if activation_layer == None:
            activation_layer = nn.ReLU6
        super(ConvBNActivation, self).__init__(
            nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=kernel_size, stride=stride,
                      padding=padding, groups=groups, bias=False), norm_layer(num_features=output_channels),
            activation_layer(inplace=True))


class SqueezeExcitation(nn.Module):
    def __init__(self, input_channels):
        super(SqueezeExcitation, self).__init__()
        hidden_channels = _make_divisible(input_channels // 4, 8)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Conv2d(input_channels, hidden_channels, 1)
        self.fc2 = nn.Conv2d(hidden_channels, input_channels, 1)

    def forward(self, x):
        scale = self.pool(x)
        scale = self.fc1(scale)
        scale = F.relu(scale)
        scale = self.fc2(scale)
        scale = F.hardswish(scale)
        return scale * x


class InvertedResidualConfig:
    # alpha:控制通道数的参数
    def __init__(self, input_channels, kernel_size, exp_size, output_channels, use_se, activation_layer, stride,
                 alpha: float = 1.0):
        super(InvertedResidualConfig, self).__init__()
        self.input_channels = _make_divisible(input_channels * alpha, 8)
        self.output_channels = _make_divisible(output_channels * alpha, 8)
        self.kernel_size = kernel_size
        self.exp_size = _make_divisible(exp_size * alpha, 8)
        self.use_se = use_se
        self.activation_layer = activation_layer
        self.stride = stride
        self.alpha = alpha


class bneck(nn.Module):
    def __init__(self, config: InvertedResidualConfig):
        super(bneck, self).__init__()
        self.use_shortcut = config.stride == 1 and config.input_channels == config.output_channels
        self.layers = []
        if config.exp_size != config.input_channels:
            # 升维所用卷积核size=1,stride=1
            self.layers.append(ConvBNActivation(config.input_channels, config.exp_size, 1, 1, 1,
                                                activation_layer=config.activation_layer))
        self.layers.append(
            ConvBNActivation(config.exp_size, config.exp_size, config.kernel_size, config.stride, config.exp_size,
                             activation_layer=config.activation_layer))
        if config.use_se:
            self.layers.append(SqueezeExcitation(config.exp_size))
        # nn.Identity:返回输入本身,即不用激活函数
        self.layers.append(
            ConvBNActivation(config.exp_size, config.output_channels, 1, 1, 1, activation_layer=nn.Identity))
        self.layers = nn.Sequential(*self.layers)

    def forward(self, x):
        y = self.layers(x)
        if self.use_shortcut:
            return y + x
        return y


class MobileNetV3(nn.Module):
    def __init__(self, num_classes=1000):
        super(MobileNetV3, self).__init__()
        bneck_configs = [
            InvertedResidualConfig(16, 3, 16, 16, False, nn.ReLU6, 1),
            InvertedResidualConfig(16, 3, 64, 24, False, nn.ReLU6, 2),
            InvertedResidualConfig(24, 3, 72, 24, False, nn.ReLU6, 1),
            InvertedResidualConfig(24, 5, 72, 40, True, nn.ReLU6, 2),
            InvertedResidualConfig(40, 5, 120, 40, True, nn.ReLU6, 1),
            InvertedResidualConfig(40, 5, 120, 40, True, nn.ReLU6, 1),
            InvertedResidualConfig(40, 3, 240, 80, False, nn.Hardswish, 2),
            InvertedResidualConfig(80, 3, 200, 80, False, nn.Hardswish, 1),
            InvertedResidualConfig(80, 3, 184, 80, False, nn.Hardswish, 1),
            InvertedResidualConfig(80, 3, 184, 80, False, nn.Hardswish, 1),
            InvertedResidualConfig(80, 3, 480, 112, True, nn.Hardswish, 1),
            InvertedResidualConfig(112, 3, 672, 112, True, nn.Hardswish, 1),
            InvertedResidualConfig(112, 5, 672, 160, True, nn.Hardswish, 2),
            InvertedResidualConfig(160, 5, 960, 160, True, nn.Hardswish, 1),
            InvertedResidualConfig(160, 5, 960, 160, True, nn.Hardswish, 1),
        ]
        self.features = []
        self.features.append(ConvBNActivation(3,16,3,2,1,activation_layer=nn.Hardswish))
        for config in bneck_configs:
            self.features.append(bneck(config))
        self.features.append(ConvBNActivation(160,960,1,1,1,activation_layer=nn.Hardswish))
        self.features.append(nn.AdaptiveAvgPool2d((1,1)))
        self.features = nn.Sequential(*self.features)
        self.classifier = nn.Sequential(
            nn.Linear(960,1280),
            nn.Hardswish(inplace=True),
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(1280,num_classes)
        )
        self._initialize_weights()

    def _initialize_weights(self):
        # 迭代每一个层结构modules
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
使用PyTorch提供的MobileNet模型实现图像分类的步骤如下: 1. 导入必要的库 ```python import torch import torchvision from torchvision import transforms ``` 2. 加载MobileNet模型 ```python model = torchvision.models.mobilenet_v2(pretrained=True) ``` 这里使用了PyTorch提供的预训练的MobileNet_v2模型,可以根据需要选择其他的预训练模型。 3. 对输入图像进行预处理 ```python preprocess = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ``` 这里使用了一系列的transforms来对输入的图像进行预处理,包括将图像缩放到256x256大小、中心裁剪为224x224大小、将图像转换为Tensor格式、以及使用ImageNet数据集的均值和标准差对图像进行归一化。 4. 加载输入图像 ```python img = Image.open('test.jpg') ``` 这里使用了PIL库的Image模块来加载输入图像,可以根据实际情况选择其他的图像加载方式。 5. 对输入图像进行预处理 ```python img_tensor = preprocess(img) ``` 将输入图像转换为Tensor格式,并进行预处理。 6. 将输入图像送入模型中进行预测 ```python with torch.no_grad(): output = model(img_tensor.unsqueeze(0)) pred = output.argmax(dim=1) ``` 将Tensor格式的输入图像送入模型中进行预测,得到输出结果。这里使用了torch.no_grad()上下文管理器来关闭梯度计算,以减少内存占用。 7. 打印预测结果 ```python print('Predicted class:', pred.item()) ``` 打印出预测结果,即输入图像所属的类别。 以上是使用PyTorch提供的MobileNet模型实现图像分类的基本步骤。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值