深度学习-AlexNet

1. 网络结构
1.1 简介

AlexNet 是 2012 年 ImageNet 竞赛冠军获得者 Hinton 和他的学生 Alex Krizhevsky 设计的,该网络在 ImageNet LSVRC-2010 竞赛中错误率分别为 37.5%(top-1)和 17.0%(top-5)。

论文地址

在 AlexNet 中主要有以下几个特点:

  • 使用 GPU 进行训练;
  • 使用 Relu 激活函数;
  • 使用 LRN 局部响应归一化(这种归一化方法在以后的 CNN 中使用的越来越少,被 BatchNorm 替代);
  • 使用 Dropout,防止过拟合。
1.2 网络结构

inputkernel sizepaddingstrideoutput
conv3x224x22411x11(1, 2)496x55x55relu
maxpool96x55x553x3296x27x27
conv96x27x275x521256x27x27relu
maxpool256x27x273x32256x13x13
conv256x13x133x311384x13x13relu
conv384x13x133x311256x13x13relu
conv256x13x133x311256x13x13relu
maxpool256x13x133x32256x6x6flatten, dropout
fc92164096relu, dropout
fc40962048relu, dropout
fc20481000
2. 代码实现(Pytorch)

使用数据集CIFAR10进行图像分类

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision
from tqdm import tqdm
import numpy as np
from PIL import Image

import matplotlib.pyplot as plt
%matplotlib inline
class AlexNet(nn.Module):
    def __init__(self, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.5),
            nn.Linear(9216, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 2048),
            nn.ReLU(inplace=True),
            # 输出修改为CIFAR10的类别10
            nn.Linear(2048, 10)
        )
        if init_weights:
            self._initialize_weights()
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x
        
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                # 均值为0,方差为0.01的正态分布
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
# 定义网络模型
net = AlexNet(init_weights=True)
net
out:
AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Dropout(p=0.5, inplace=False)
    (2): Linear(in_features=9216, out_features=4096, bias=True)
    (3): ReLU(inplace=True)
    (4): Dropout(p=0.5, inplace=False)
    (5): Linear(in_features=4096, out_features=2048, bias=True)
    (6): ReLU(inplace=True)
    (7): Linear(in_features=2048, out_features=10, bias=True)
  )
)
# 加载数据并预处理
resize = (224, 224)
mean = (0.5, 0.5, 0.5)
std = (0.5, 0.5, 0.5)

data_transform = {
    "train": transforms.Compose([
        transforms.Resize(resize),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    "test": transforms.Compose([
        transforms.Resize(resize),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
}

train_dataset = torchvision.datasets.CIFAR10("./data", train=True, download=True, transform=data_transform["train"])
test_dataset = torchvision.datasets.CIFAR10("./data", train=False, download=True, transform=data_transform["test"])
classes = ("airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck")

batch_size = 512
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=4)

optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 训练
net.to(device)
loss_fn.to(device)
num_epochs = 10

for epoch in range(num_epochs):
    net.train()
    running_loss = 0.0
    for step, data in enumerate(tqdm(train_dataloader, desc=f"Train Epoch: {epoch}/{num_epochs}"), start=0):
        inputs = data[0]
        labels = data[1]
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    net.eval()
    with torch.no_grad():
        # 在测试集上的表现
        accuracy_num = 0
        for step, data in enumerate(tqdm(test_dataloader, desc=f"Test  Epoch: {epoch}/{num_epochs}"), start=0):
            inputs = data[0]
            labels = data[1]
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = net(inputs)
            
            predict_y = torch.max(outputs, dim=1)[1].to(device)
            acc = (predict_y == labels).sum().item()
            accuracy_num += acc
    print(f"Epoch: {epoch}/{num_epochs}; loss: {np.round(running_loss / len(train_dataloader), 3)}; Acc: {np.round(accuracy_num / len(test_dataset) * 100, 2)} %")

print(torch.cuda.memory_summary(device, abbreviated=True))

# 输入一张图片测试模型
img = Image.open("/root/autodl-tmp/DogsVSCats/train/train/cat.100.jpg")
plt.imshow(img)
trans = data_transform["test"]
img = trans(img)
img = torch.unsqueeze(img, dim=0)
net.eval()
with torch.no_grad():
    img = img.to(device)
    output = net(img)
    predict = torch.max(output, dim=1)[1].item()
    print(f"label: {classes[int(predict)]}")

  • 3
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值