mobilenetv2

前言

MobileNet 是轻量级卷积神经网络系列,现在已经有v1、v2、 v3.

MobileNet v2 是对Mobile v1 的改进。本章利用MobileNet v2对Cifar10做分类任务

部分网络结构说明

MobileNet v1

  1. 采用了depth-wise seperable convolution(深度可分离卷积)减少运算量
  2. 激活层用了RELU6

MobileNet v2

  1. 与MobileNet v1一样,采用深度可分离卷积,采用Depth-wise和Point-wise提取特征,但是v2版本在在深度可分离卷积中DW层前面再加入了1x1的PW层,并且去掉了第二个PW的激活函数,即下图的Linear Bottleneck

    image-20220313103103767

  2. 在v1版本上添加了shortcut,采用1x1->3x3->1x1的模式。

    image-20220313104106974

    • ResNet 使用 标准卷积 提特征,MobileNet 始终使用 DW卷积 提特征。
    • ResNet 先降维 (0.25倍)、卷积、再升维,而 MobileNet V2 则是 先升维 (6倍)、卷积、再降维。直观的形象上来看,ResNet 的微结构是沙漏形,而 MobileNet V2 则是纺锤形,刚好相反。因此论文作者将 MobileNet V2 的结构称为 Inverted Residual Block。这么做也是因为使用DW卷积而作的适配,希望特征提取能够在高维进行。 (https://zhuanlan.zhihu.com/p/33075914)

深度可分离卷积是如何减少计算量

假设我们需要从5x5x3的图片获取3x3x4的feature map,正常的卷积如下

image-20220313104623730

卷积层共4个filter,每个filter有3个卷积核,每个卷积核为3x3,所以卷积层参数数量为
N s t d = 4 ∗ 3 ∗ 3 ∗ 3 = 108 Nstd = 4 * 3 * 3 * 3 = 108 Nstd=4333=108
深度可分离卷积如下

image-20220313105914328

深度可分离卷积由DW与PW组成,DW为3个filter,每个filter为1个3x3的卷积核,则DW的卷积层参数量为3x3x3=27;PW为4个1x1的卷积核组成的filter,则DW的卷积层参数量为1x1x3x4=12
N d s c = D W + P W = 39 Ndsc = DW + PW = 39 Ndsc=DW+PW=39
在获得同样的输出,深度可分离卷积所需要的参数量大大小于正常卷积。

训练采用torchvision自带的数据集,首先下载数据集:

def write_img(path, data, index):
    """
    将data转化为图片保存下来
    """
    dir = os.path.join(path, classes[int(data[1])])
    if not os.path.exists(dir):
        os.makedirs(dir)
    img_path = os.path.join(dir, str(classes[int(data[1])]) + "_" + str(index) + ".jpg")
    data[0].save(img_path)


def get_cifar10_data(train_save_path=r"./dataset/train_data", test_save_path=r"./dataset/test_data"):
    """
    将torchvision的cifar数据集保存在本地
    """
    # 解压cifar10 test
    train_data = torchvision.datasets.cifar.CIFAR10(root="./dataset", train=False, download=True)
    with ThreadPoolExecutor(max_workers=None) as t:
        for index, image in enumerate(train_data):
            t.submit(write_img, test_save_path, image, index)
    train_data = torchvision.datasets.cifar.CIFAR10(root="./dataset", train=True, download=True)
    with ThreadPoolExecutor(max_workers=None) as t:
        for index, image in enumerate(train_data):
            t.submit(write_img, train_save_path, image, index)

加载数据集,创建数据集的加载类,并且以Dataloader加载到内存进行训练

class ImageDataset(Dataset):
    def __init__(self, path, classes, iftrain=False):
        super(ImageDataset, self).__init__()
        self.transform = Compose([
            Resize((224, 224)),
            ToTensor(),
            Normalize(
                mean=[0.5, 0.5, 0.5],
                std=[0.5, 0.5, 0.5]
            ),
        ])
        self.iftrain = iftrain
        self.files = glob.glob(os.path.join(path, "*", "*.jpg"))
        self.classes = classes

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        image_path = random.choice(self.files)
        image_name = os.path.basename(image_path)
        label_id = self.classes.index(image_name.split("_")[0])
        image = Image.open(image_path)
        image = self.transform(image)
        return image, label_id

模型修改,本次采用的是MobileNet v2预训练模型,但是使用的数据集是cifar10,只有10个分类,如果使用官方的模型直接训练,官方的预训练模型为1000分类(ImageNet1k)那么训练cifar10的模型将不能拟合,我们需要对模型进行修改,改变最后进行分类的全连接层,将全连接层的输出由1000分类改为10分类。

class MobileNetV2(nn.Module):
    def __init__(self, model, classes_num=None):
        super(MobileNetV2, self).__init__()
        self.features = model.features
        self.dropout = model.classifier[0]
        if classes_num is not None:
            self.linear = nn.Linear(in_features=1280, out_features=classes_num, bias=True)
        else:
            self.linear = model.linear

    def forward(self, x):
        x = self.features(x)
        # 加入自适应平均池化层
        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = self.linear(x)
        return x

训练主体代码

import os
import torch.nn
import torchvision
from torch.utils.data import DataLoader
from tqdm import tqdm

from ImageDataset import ImageDataset
from mobile_net.utils import MobileNetV2

# cifar10分类 类名
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# 选择cuda进行训练
device = torch.device("cuda")

def fit(model, train_loader, val_loader, criterion, optimizer, lr_scheduler, epoches=10):
    for epoch in range(epoches):
        if float(optimizer.param_groups[0]["lr"]) < 0.00001:
            break
        model.train()
        sum_loss = 0
        with tqdm(total=len(train_loader), desc=f"train Epoch: {epoch} lr: {optimizer.param_groups[0]['lr']}") as t:
            for images, labels in train_loader:
                model.to(device=device)
                optimizer.zero_grad()
                images, labels = images.to(device=device), labels.to(device=device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                sum_loss += loss.item()
                t.update(1)
                t.set_postfix(loss=float(loss.item()))
            avg_loss = sum_loss / len(train_loader)
            t.set_postfix(avg_loss=avg_loss)
        model.cuda().eval()
        sum_loss = 0
        correct = 0
        with tqdm(total=len(val_loader), desc=f"val Epoch: {epoch}") as tv:
            for images, labels in val_loader:
                images, labels = images.to(device=device), labels.to(device=device)
                with torch.no_grad():
                    outputs = model(images)
                _, pred = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                correct += torch.sum(pred == labels)
                sum_loss += loss.item()
                tv.set_postfix(loss=float(loss.item()))
                tv.update(1)
            acc = float(correct) / len(val_loader.dataset)
            val_loss = sum_loss / len(val_loader)
            tv.set_postfix(val_loss=val_loss, acc=acc)
        lr_scheduler.step(val_loss)
        torch.save(model, os.path.join(r"/data/kile/other/Inception/model",f"{epoch}_{acc}.pth"))


def train(train_data_path=r"./dataset/train_data", val_data_path=r"./dataset/val_data", batch_size=64, lr=0.01):
    # 数据加载
    train_loader = DataLoader(ImageDataset(train_data_path, classes=classes, iftrain=train), batch_size=batch_size,
                              shuffle=True)
    val_loader = DataLoader(ImageDataset(val_data_path, classes=classes), batch_size=batch_size)
    # 加载mobilenet v2
    model = torchvision.models.mobilenet_v2(pretrained=True)
    model = MobileNetV2(model, 10)
    # 选择Adam优化器
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    # 选择交叉熵损失函数,在处理多分类,交叉熵属于扛把子
    criterion = torch.nn.CrossEntropyLoss()
    # 选择自动减少学习率,在学习率达到某个数值,我们将停止模型训练。类似自动停止训练
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min",patience=3)
    # 训练模型代码
    fit(model, train_loader, val_loader, criterion, optimizer, lr_scheduler, epoches = 100, )


if __name__ == '__main__':
    train()

预测代码

训练准确率在81%,测试另外的一部分数据集准确率为80.7%

def pytorch_python_predict(testpath):
    model_path = r"/data/kile/other/Inception/model/40_0.8137084944659287.pth"
    model = torch.load(model_path).eval().cuda()
    predict(model, testpath)
def predict(model, predict_path=r"./dataset/test_data"):
    test_loader = DataLoader(PredictImageDataset(predict_path, classes=classes, iftrain=False), batch_size=1)
    total = 0
    for images, labels, images_path in tqdm(test_loader):
        images, labels = images.cuda(), labels.cuda()
        with torch.no_grad():
            outputs = model(images)
            print(outputs)
            _, pred = torch.max(outputs, 1)
            print("pred",pred)
            correct = torch.sum(labels == pred)
            total += correct

    print(f"correct percent : {float(total) / (len(test_loader.dataset))}")
    return float(total) / (len(test_loader.dataset))
  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

kui9702

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值