AlexNet
代码地址:https://github.com/daiyizheng/cv-project/blob/master/_01_cv_base/_AlexNet/AlexNet.ipynb
模型摘要
本论文训练了一个大型深度卷积神经网络来将ImageNet LSVRC-2010竞赛的120万高分辨率的图像分到1000不同的类别中。在测试数据上,我们得到了top-1 37.5%, top-5 17.0%的错误率,这个结果比目前的最好结果好很多。这个神经网络有6000万参数和650000个神经元,包含5个卷积层(某些卷积层后面带有池化层)和3个全连接层,最后是一个1000维的softmax。为了训练的更快,使用了非饱和神经元并对卷积操作进行了非常有效的GPU实现。为了减少全连接层的过拟合,采用了一个最近开发的名为dropout的正则化方法,结果证明是非常有效的。也使用这个模型的一个变种参加了ILSVRC-2012竞赛,赢得了冠军并且与第二名 top-5 26.2%的错误率相比,我们取得了top-5 15.3%的错误率。
模型结构
数据集
猫狗二分类数据集
整体
代码
import argparse
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import PIL
from tqdm import tqdm
class AlexNet(nn.Module):
def __init__(self, args):
super().__init__()
## height = (height_in - height_kernel + 2*padding)/stride + 1
## width = (width_in - width_kernel + 2*padding)/stride + 1
## 第一层卷积层,kernel_size为3*3,out_channel为96,stride为4,初始图像大小为227*227, padding为2
self.feature = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=2, bias=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2, bias=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3*3, stride=2, padding=0),
nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(in_features=256*6*6,out_features=4096),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(inplace=True),
nn.Linear(in_features=4096, out_features=args.num_class)
)
def forward(self,x):
x = self.feature(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
class MyDataset(Dataset):
def __init__(self, data_path, is_train=True, transform=None):
self.data_path = data_path
self.is_train = is_train
if transform is None:
self.transform = torchvision.transforms.Compose([
torchvision.transforms.Resize(size=(227,227)),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
else:
self.transform = transform
self.path_list = os.listdir(self.data_path) # 列出所有图片命名
def __getitem__(self, index):
filename = self.path_list[index]
if self.is_train:
if filename.split(".")[0]=="dog":
label = 1
else:
label = 0
else:
label=0
label = torch.tensor(label, dtype=torch.int64)
img_path = os.path.join(self.data_path, filename)
img = PIL.Image.open(img_path)
img = self.transform(img)
return img, label
def __len__(self):
return len(self.path_list)
parser = argparse.ArgumentParser()
parser.add_argument('--num_class',default=2, type=int, help='分类个数')
parser.add_argument('--use_cuda', action='store_true',help='适用GPU进行训练')
args = parser.parse_args(args=['--num_class', '2', '--use_cuda'])
print(args)
device = torch.device("cuda" if args.use_cuda else "cpu")
model = AlexNet(args=args).to(device)
# model
from torchsummary import summary
summary(model, input_size=(3, 227, 227), device="cuda")
train_data = MyDataset("../../datasets/cat_dog_dataset/train/", is_train=True)
test_dataset = MyDataset("../../datasets/cat_dog_dataset/test/", is_train=False)
## 划分训练集和验证集
train_size = int(0.8*len(train_data))
dev_size = len(train_data) - train_size
train_dataset, dev_dataset = torch.utils.data.random_split(train_data, [train_size, dev_size])
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True, num_workers=0)
dev_dataloader = DataLoader(dev_dataset, batch_size=32, shuffle=True, pin_memory=True, num_workers=0)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True, num_workers=0)
## 参数
lr = 1e-5
optimizer = torch.optim.Adam(model.parameters(), lr)
loss_func = nn.CrossEntropyLoss(reduction="sum")
save_model_path = "../../results/AlexNet"
if not os.path.exists(save_model_path):
os.mkdir(save_model_path)
def compute_metric(pre, label):
return torch.sum(pre.argmax(dim=1).view(-1)==label.view(-1))
## 训练
epochs = 10
model.to(device)
train_loss_epoch_list = []
train_acc_epoch_list = []
dev_loss_epoch_list = []
dev_acc_epoch_list = []
best_score = 0
for epoch in tqdm(range(1, epochs+1)):
print(f"当前epoch:{epoch}")
model.train()
## 每个epoch中正确的个数
correct_per_epoch = 0
## 每个epoch的loss
loss_per_epoch = 0
for step, (data, label) in tqdm(enumerate(train_dataloader)):
data, label = data.to(device), label.to(device)
logits = model(data)
pre = torch.softmax(logits, dim=1)
loss = loss_func(logits, label)
correct_per_epoch += compute_metric(pre, label).item()
loss_per_epoch+= loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
if step%100==0:
print(f'batch_loss:{loss.item()},batch_acc:{compute_metric(pre, label).item()/len(label)*100}%')
train_loss_epoch_list.append(loss_per_epoch/len(train_dataset))
train_acc_epoch_list.append(correct_per_epoch/len(train_dataset)*100)
## dev 验证集上的实验结果
with torch.no_grad():
print("验证集开始测试")
dev_loss = 0.0
correct_dev = 0
model.eval()
for dev_step, (dev_data, dev_label) in tqdm(enumerate(dev_dataloader)):
dev_data, dev_label = dev_data.to(device), dev_label.to(device)
logits = model(dev_data)
dev_pre = torch.softmax(logits, dim=1)
correct_dev += compute_metric(dev_pre, dev_label).item()
loss = loss_func(logits, dev_label)
dev_loss+=loss.item()
current_score = correct_dev/len(dev_dataset)*100
print(f'dev_loss:{dev_loss}, dev_Acc:{current_score}')
dev_loss_epoch_list.append(dev_loss/len(dev_dataset))
dev_acc_epoch_list.append(current_score)
if best_score<current_score:
best_score=current_score
print(f"保存模型参数。。。。。。。。。。。best_score{best_score}")
torch.save(model.state_dict(), os.path.join(save_model_path,'alexnet.pt'))
## 绘制图
import matplotlib.pyplot as plt
plt.plot(range(1, epochs+1), train_loss_epoch_list, color="b", label="train_loss")
plt.plot(range(1, epochs+1), dev_loss_epoch_list, color="r", label="dev_loss")
plt.legend()
plt.show()
plt.plot(range(1, epochs+1), train_acc_epoch_list, color="g", label="train_acc")
plt.plot(range(1, epochs+1), dev_acc_epoch_list, color="y", label="dev_acc")
plt.legend()
plt.show()