AlexNet是2012年ISLVRC 2012(ImageNet Large Scale Visual Recognition Challenge) 竞赛的冠军网络, 分类准确率由传统的 70%+提升到 80%+。它是由Hinton和他的学生Alex Krizhevsky设计的。 也是在那年之后, 深度学习开始迅速发展。
该网络的亮点在于:
- 首次利用 GPU 进行网络加速训练。
- 使用了 ReLU 激活函数, 而不是传统的 Sigmoid 激活函数以及 Tanh 激活函数。
- 使用了 LRN 局部响应归一化。
- 在全连接层的前两层中使用了 Dropout 随机失活神经元操作, 以减少过拟合。
过拟合: 根本原因是特征维度过多, 模型假设过于复杂, 参数过多, 训练数据过少, 噪声过多, 导致拟合的函数完美的预测训练集, 但对新数据的测试集预测结果差。 过度的拟合了训练数据, 而没有考虑到泛化能力。
使用 Dropout 的方式在网络正向传播过程中随机失活一部分神经元,如下图所示:
经卷积后的矩阵尺寸大小计算公式为:
这里列出AlexNet所有层的核大小,核数目,填充和步长的大小
下载花分类数据集,下载地址:http://download.tensorflow.org/example_images/flower_photos.tgz
在powershell上运行
python ./split_data.py
使用split_data.py将花分类数据集分成train数据集和val数据集
split_data.py
import os
from shutil import copy
import random
def mkfile(file):
if not os.path.exists(file):
os.makedirs(file)
file = 'flower_data/flower_photos'
flower_class = [cla for cla in os.listdir(file) if ".txt" not in cla]
mkfile('flower_data/train')
for cla in flower_class:
mkfile('flower_data/train/'+cla)
mkfile('flower_data/val')
for cla in flower_class:
mkfile('flower_data/val/'+cla)
split_rate = 0.1
for cla in flower_class:
cla_path = file + '/' + cla + '/'
images = os.listdir(cla_path)
num = len(images)
eval_index = random.sample(images, k=int(num*split_rate))
for index, image in enumerate(images):
if image in eval_index:
image_path = cla_path + image
new_path = 'flower_data/val/' + cla
copy(image_path, new_path)
else:
image_path = cla_path + image
new_path = 'flower_data/train/' + cla
copy(image_path, new_path)
print("\r[{}] processing [{}/{}]".format(cla, index+1, num), end="") # processing bar
print()
print("processing done!")
创建模型: (由于我们数据集比较小,而且为了加快训练速度,所以直接把核的数目减半)
model_AlexNet.py
import torch.nn as nn
import torch
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
# 数据集比较小,为了加快训练速度,卷积核的个数只取一半
self.features = nn.Sequential(
# 卷积过程中N不是整数时,会舍弃小数部分,下面padding=2一样的效果
nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),
# inplace=True 对从上层网络Conv2d中传递下来的tensor直接进行修改,
# 这样能够节省运算内存,不用多存储其他变量
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(48, 128, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(128, 192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(128 * 6 * 6, 2048),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Linear(2048, num_classes),
)
# 初始化权重函数
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x)
#从第一维开始,batch不要动[batch,channel,height,width]
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
def _initialize_weights(self):
# 返回一个迭代器,迭代遍历每一个神经网络
for m in self.modules():
# 是否属于Conv2d类别
if isinstance(m, nn.Conv2d):
# 初始化变量方法kaiming_normal_
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
#如果传进来的是全连接层,使用正态分布进行赋值,bias=0
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
训练模型:
train_AlexNet.py
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from model_AlexNet import AlexNet
import os
import json
import time
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# 数据预处理
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224), # 随机裁剪到224×224
# 水平方向上随机翻转
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
# 标准化处理
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224,must (224,224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
}
image_path = "E:/data_set/flower_data/"
train_dataset = datasets.ImageFolder(root=image_path + "/train",
transform=data_transform["train"])
train_num = len(train_dataset)
# flower_list={'daisy':0,'dandelion':1,'roses':2,'sunflower':3,'tulips':4}
flower_list = train_dataset.class_to_idx
# 遍历flower_list字典,将key和value反过来
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=0)
validate_dataset = datasets.ImageFolder(root=image_path + "/val",
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=4, shuffle=False,
num_workers=0)
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
#
#
# def imshow(img):
# img = img / 2 + 0.5
# npimg=img.numpy()
# plt.imshow(np.transpose(npimg,(1,2,0)))
# plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))
net = AlexNet(num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
# pata=list(net.parameters())
optimizer = optim.Adam(net.parameters(), lr=0.0002)
save_path = './AlexNet.pth'
best_acc = 0.0
for epoch in range(10):
# train,管理dropout方法
net.train()
running_loss = 0.0
# 训练一个epoch所需要的时间
t1 = time.perf_counter()
for step, data in enumerate(train_loader, start=0):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
# print train process
rate = (step + 1) / len(train_loader)
a = "*" * int(rate * 50)
b = "." * int((1 - rate) * 50)
print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
print()
print(time.perf_counter() - t1)
# validate
net.eval()
acc = 0.0 # accumulate accurate number /epoch
with torch.no_grad():
for val_data in validate_loader:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += (predict_y == val_labels.to(device)).sum().item()
val_accurate = acc / val_num
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' %
(epoch + 1, running_loss / step, val_accurate))
print('Finished Training')
if __name__ == '__main__':
main()
使用训练好的模型文件进行预测:
predict_AlexNet.py
import torch
from model_AlexNet import AlexNet
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
import json
data_transform=transforms.Compose(
[transforms.Resize((224,224)),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
#load image
img=Image.open("tulip.jpg")
plt.imshow(img)
#[N,C,H,W]
img=data_transform(img)
#expand batch dimension
img=torch.unsqueeze(img,dim=0)
#read class_indict读取索引对应的类别的json文件
try:
json_file=open('./class_indices.json','r')
class_indict=json.load(json_file)
except Exception as e:
print(e)
exit(-1)
#create model
model=AlexNet(num_classes=5)
#load model weights
model_weight_path="./AlexNet.pth"
model.load_state_dict(torch.load(model_weight_path))
model.eval()
with torch.no_grad():
#predict class
#将输出压缩,去掉batch维度
output=torch.squeeze(model(img))
#softmax得出概率分布
predict=torch.softmax(output,dim=0)
#得到最大的索引值
predict_cla=torch.argmax(predict).numpy()
print(class_indict[str(predict_cla)],predict[predict_cla].item())
plt.show()
这根据一个up主讲的网络复现视频所实现的AlexNet,视频地址:https://www.bilibili.com/video/BV1W7411T7qc
强力推荐!!!