Alexnet研读的的一些杂记
Alexnet的一些特色总结:
- 个人认为比较大的一个优点是真正考虑了神经元之间的关系,考虑生物大脑运行机制的时候,深度学习或许会变得更加可解释?
- 考虑到了激活函数对于训练速度的影响,使用Relu代替了tanh
- 覆盖的池化操作,一般的池化操作没有重叠,但是这篇文章池化操作是有重叠的。
- 数据增强:对原始数据进行适当的变换,防止过拟合。
- Dropout:按照一定的概率将其暂时从网络中丢弃。注意是暂时,对于随机梯度下降来说,由于是随机丢弃,故而每一个 mini-batch 都在训练不同的网络。引入dropout正则化相当于让一个模型实现多个模型的效果
- 一些超参数:
dropout 0.5
batch size 128
SGD Momentum 0.9
学习率1e-2
, 当验证准确率平稳时,手动减少 10
L2权重衰减是5e-4
AlexNet的网络结构
5个卷积层、3个池化层、2个归一化层和3个全连接层。
具体表示入下:[CONV1-MAX POOL1-NORM1-CONV2-MAX POOL2-NORM2-CONV3-CONV4-CONV5-Max POOL3-FC6-FC7-FC8]
- 输入: 227 × 227 × 227 227\times 227 \times 227 227×227×227的图片
- CONV1:使用96个 11 × 11 11\times11 11×11的卷积核,步长为4,由于(227-11)/4+1=55,所以输出的尺寸为 55 × 55 × 96 55 \times 55\times 96 55×55×96,共有 96 × 11 × 11 × 3 96\times11\times11\times3 96×11×11×3个参数。
- POOL1,使用 3 × 3 3\times 3 3×3的池化核,步长为2,由于(55-3)/2+1=27,所以输出为 27 × 27 × 96 27\times 27 \times 96 27×27×96
- NORM1(个人认为这一步很关键,对于神经元运行机制有所考虑):归一化之后仍然是 27 × 27 × 96 27\times 27 \times 96 27×27×96
- CONV2:使用256个 5 × 5 5\times 5 5×5的卷积核,stride为1,padding为2,由于(27+2*2-5)+1=27,所以输出为 27 × 27 × 256 27\times 27 \times 256 27×27×256.
- POOL2: 3 × 3 3\times3 3×3的池化核,stride为2,由于(27-3)/2+1=13,所以输出为 13 × 13 × 256 13\times 13 \times 256 13×13×256
- NORM2: 13 × 13 × 256 13\times 13 \times 256 13×13×256
- CONV3:384个 3 × 3 3\times3 3×3的卷积核,stride为1,padding为1,输出为 13 × 13 × 384 13\times13\times 384 13×13×384
- CONV4:384个 3 × 3 3\times3 3×3的卷积核,stride为1,padding为1,输出为 13 × 13 × 384 13\times13\times 384 13×13×384
- CONV5:256个 3 × 3 3\times3 3×3的卷积核,stride为1,padding为1,输出为 13 × 13 × 256 13\times13\times 256 13×13×256
- POOL3: 3 × 3 3\times3 3×3的池化核,stride为2,输出为 6 × 6 × 256 6\times6\times256 6×6×256
- FC6:4096个神经元,输出大小4096
- FC7:4096个神经元,输出大小4096
- FC8:1000个神经元,输出大小1000(class scores)
Alexnet的实现代码(基于pytorch)
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
from tqdm import tqdm
class LRN(nn.Module):
def __init__(self,in_channels,k=2,n=5,alpha=1.0e-4,beta=0.75):
super(LRN,self).__init__()
self.in_channels = in_channels
self.k = k
self.n = n
self.alpha = alpha
self.beta = beta
def forward(self,x):
x_squre = x.pow(2)
sum_section = torch.zeros(x_squre.size()).to(device)
#执行求和部分运算,输入数据维度(batch,channels,height,width)
for batch in range(x_squre.size(0)):
for channel in range(x_squre.size(1)):
lower = max(0,channel - self.n//2)
upper = min(x_squre.size(1)-1,channel + self.n//2)+1 #这里的+1是否可以不要?
sum_section[batch,channel] = x_squre[batch,lower:upper].sum(dim=0)
out = x/((self.k + self.alpha *sum_section).pow(self.beta))
return out
class AlexNet(nn.Module):
def __init__(self,num_class):
super(AlexNet,self).__init__()
self.act_func = nn.ReLU(True)
self.dropout = nn.Dropout(0.5)
self.maxpool = nn.MaxPool2d(kernel_size=3,stride=2)
#定义网络的每一层
self.conv1 = nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2)
self.conv1.bias.data = torch.zeros(self.conv1.bias.data.size())
self.N1 = LRN(96)
self.conv2 = nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,stride=1,padding=2)
self.conv2.bias.data = torch.zeros(self.conv2.bias.data.size())
self.N2 = LRN(256)
self.conv3 = nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,stride=1,padding=1)
self.conv3.bias.data = torch.zeros(self.conv3.bias.data.size())
self.conv4 = nn.Conv2d(in_channels=384,out_channels=384,kernel_size=3,stride=1,padding=1)
self.conv4.bias.data = torch.zeros(self.conv4.bias.data.size())
self.conv5 = nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1)
self.conv5.bias.data = torch.zeros(self.conv5.bias.data.size())
self.FC6 = nn.Linear(in_features=6*6*256,out_features=4096)
self.FC7 = nn.Linear(in_features=4096,out_features=4096)
self.FC8 = nn.Linear(in_features=4096,out_features=num_class)
#权重初始化
for m in self.modules(): #权重以及线性层偏置初始化
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
m.weight.data = torch.normal(torch.zeros(m.weight.data.size()), torch.ones(m.weight.data.size()) * 0.01) #N(0, 0.01^2), 具体函数说明在C1层中已经说明
if isinstance(m, nn.Linear):
m.bias.data = torch.ones(m.bias.data.size())
def forward(self,x):
# x = self.maxpool(self.N1(self.act_func(self.conv1(x))))
# x = self.maxpool(self.N2(self.act_func(self.conv2(x))))
# x = self.act_func(self.conv3(x))
# x = self.act_func(self.conv4(x))
# x = self.maxpool(self.act_func(self.conv5(x)))
# #将x拉成(batchsize,256*6*6)的张量用于全连接
# x = x.view(-1, 256*6*6)
# x = self.dropout((self.act_func(self.FC6(x))))
# x = self.dropout((self.act_func(self.FC7(x))))
# x = self.act_func(self.FC8(x))
x = self.maxpool(self.act_func(self.conv1(x)))
x = self.maxpool(self.act_func(self.conv2(x)))
x = self.act_func(self.conv3(x))
x = self.act_func(self.conv4(x))
x = self.maxpool(self.act_func(self.conv5(x)))
#将x拉成(batchsize,256*6*6)的张量用于全连接
x = x.view(-1, 256*6*6)
x = self.dropout((self.act_func(self.FC6(x))))
x = self.dropout((self.act_func(self.FC7(x))))
x = self.act_func(self.FC8(x))
return x
# 加载CIFAR-10数据集并进行预处理
transform = transforms.Compose([
transforms.Resize(227),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
trainset = torchvision.datasets.CIFAR10(root='/Users/shineber/Documents/PythonProject/DeepLearning/DL_Action/Dataset/', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
shuffle=True, num_workers=0)
testset = torchvision.datasets.CIFAR10(root='/Users/shineber/Documents/PythonProject/DeepLearning/DL_Action/Dataset/', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=256,
shuffle=False, num_workers=0)
net = AlexNet(10)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9)
if torch.cuda.device_count() > 1:
net = nn.DataParallel(net)
# net.load_state_dict(torch.load('net_epoch5.pth'))
device = torch.device("cuda:" if torch.cuda.is_available() else "mps")
net.to(device)
print(net)
# 训练模型
net.train()
num_epochs = 10
for epoch in range(num_epochs):
running_loss = 0.0
with tqdm(total=len(trainloader), desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch') as pbar:
for i, (inputs, labels) in enumerate(trainloader, 0):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
pbar.set_postfix({'Loss': running_loss / (i + 1)})
pbar.update(1)
# 保存模型
torch.save(net.state_dict(), f'net_epoch{epoch+1}.pth')
# 在测试集上评估模型性能
net.eval()
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy on the test set: %.2f %%' % (100 * correct / total))