pytorch构建Alexnet网络模型

在学习CS231计算机视觉的过程中了解到了alexnet模型、GoogLenet以及resnet的相关内容,故查看了相关的文献和博客

在这里插入图片描述

其中在Alexnet模型主要运用了以下五个方法:
  • 使用了ReLu激活函数加速收敛
  • 使用GPU并行,加速训练,也为之后的分组卷积理论奠定基础
  • 提出了局部响应归一化(LRN)增加了泛化特性
  • 使用交叠池化防止过拟合
    一般的在池化操作用,kernel-size和stride是相等的,但是交叠池化指的是stride < kernel-size
  • 提出DropOut

Alexnet网络模型总共只有五个卷集层和三个全连接层,在论文中将模型均分为两块,分别部署到两个GPU上加快模型的训练(只在特定的卷集层上进行数据交互),但是只有单个GPU的情况下,就没必要拆分了

数据增强采用了两种方式

  • 通过对训练集的图片进行随机的水平镜像,随机的剪切来扩充训练集
  • 通过对RGB空间做降维,然后添加高斯噪声

网络Alexnet结构

import torch
import torch.nn as nn
import torch.nn.functional as F


class AlexNet(nn.Module):
    def __init__(self, num_classes):
    '''
    	numclasses 为最后分类的类别数
    '''
        super(AlexNet, self).__init__()
        self.num_classes = num_classes
        self.features = nn.Sequential(
            # (3, 224, 224)- > (96, 55, 55)
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=2),

            nn.ReLU(inplace=True),
            # (96, 55, 55) - > (96, 27, 27)
            nn.MaxPool2d(kernel_size=2),

            # (27, 27, 96) - > (27, 27, 256)
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            # (27, 27, 256) - > (13, 13, 256)
            nn.MaxPool2d(kernel_size=2),

            # (13, 13, 256) - > (13, 13, 384)
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            # (13, 13, 384) - > (13, 13, 384)
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            # (13, 13, 384) - > (13, 13, 256)
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            # (13, 13, 256) - > (6, 6, 256)
            nn.MaxPool2d(kernel_size=2),

        )

        self.classifiler = nn.Sequential(
            # Dropout() 中默认值为0.5
            nn.Dropout(),
            nn.Linear(in_features=6 * 6 * 256, out_features=4096),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.Dropout(0.5),
            nn.Linear(in_features=4096, out_features=self.num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.reshape(-1, 6 * 6 * 256)
        x = self.classifiler(x)
        return x

#
# try:
#     from torch.hub import load_state_dict_from_url
# except:
#     from torch.utils.model_zoo import load_url as load_state_dict_from_url
#
# model_url = {
#     'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
# }


def alexnet(pretrain=False, show=True, **kwargs):
    '''
        pretain: bool, return a pretained model on ImageNet
        show: bool type, displays the model
        return: the model

    '''
    model_in = AlexNet(**kwargs)
    if pretrain:
        # import the pretrained model. default=False
        state_dict = load_state_dict_from_url(model_url['alexnet'], progress=show)

        model_in.load_state_dict_from_url(state_dict)

    return model_in

#
# device = torch.device("cuda:0" if torch.cuda.is_available else "cpu")
# x = torch.randn((1, 3, 224, 224))
# net = alexnet()
# net.to(device)
# x = net.forward(x.cuda())
#
# torch.save(net.state_dict, r"D:\QQPCmgr\Desktop\a.pth")

训练模型:

import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import numpy as np
from net import alexnet

# 调整训练数据的尺寸,然后随机剪切
mytransforms = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(224),
    transforms.ToTensor()])
trainset = CIFAR10(download=False, root=r'D:\QQPCmgr\Desktop\data', transform=mytransforms)

Ctrainset = zip(trainset.data[:3000], trainset.targets[:3000])
Ctestset = zip(trainset.data[3000:3500], trainset.targets[3000:3500])


class CTraindata(Dataset):
    def __init__(self, trainset):
        super(CTraindata, self).__init__()
        self.trainset = trainset

    def __len__(self):
        return len(self.trainset)

    def __getitem__(self, index):
        return self.trainset[index][0], self.trainset[index][1]


# 参数设置
epoch = 50
num_classes = 10
batch_size = 64
lr = 1e-4
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
trainLoader = DataLoader(dataset=CTraindata(trainset),
                         batch_size=batch_size,
                         shuffle=False)
path = 'D://Desktop/data/'
if __name__ == '__main__':
    device = ("cuda:0" if torch.cuda.is_available() else "cpu")
    net = alexnet(num_classes=num_classes)
    net.train()
    certrion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(params=net.parameters(), lr=lr)
    net.to(device)
    for i in range(epoch):
        for index, data in enumerate(trainLoader, 0):
        	optimizer.zero_grad()
            X, y = data
            y = (F.one_hot(y, num_classes))
            X = X.to(device)
            y = y.to(device)
            y_pre = net.forward(X)
            loss = certrion(y_pre, y)
            loss.back_ward()
            optimizer.step()
            print("[epoch, loss]" % (loss.data.item, epoch))

        CtestX = Ctestset[0]
        CY = Ctestset[1].numpy()
        CY_pre = net.forward(CtestX.to(device))
        accuracy = np.equal(np.unravel_index(np.argmax(CY_pre.numpy()), CY_pre.numpy().shape), CY).sum / CY.shape[0]
        torch.save(net.state_dict, path + str(accuracy[:5]) + '_' + str(epoch) + '.pth')

预测模型:

from net import Alexnet
import torch
import torch.nn as nn
import torch.nn.functional as F
import PIL.Image as Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import numpy as np


device = ("cuda:0" if torch.cuda.is_available() else "cpu")
net = Alexnet()
net.eval()
#
weight_path = ""
net.state_dict = torch.load(weight_path)
net.to(device)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
if __name__ == '__main__':
    img_path =  str(input())
    img = Image.open(img_path)
    # 将图片的尺寸调整为网络输入的尺寸
    mytransforms = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
    ])
    imgX = (mytransforms(img)).to(device)
    y = net.forward(imgX).numpy()
    print("The result class is %s, and the accuracy is %f"%(classes[np.argmax(y)], y[np.argmax(y)]))
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值