model
# Alex Net 亮点
# 1. 首次利用GPU进行网络加速
# 2.使用了Relu激活函数,而不是传统的sigmoid激活以及tanh激活
# 3.使用了LRN局部响应归一化
# 4.在全连接层的前两层使用了Dropout 随机失活神经元操作,以减少过拟合 重点!!!
import torch
import torch.nn as nn
# 网络结构分析
# conv1:rgb kernels:48 x 2 = 96 kernel_size = 11 padding[1, 2] stride = 4
# input_size [224, 224, 3] output_size [55, 55, 96]
# maxpool1: kernel_size:3 pading:0 stride:2 output_size[27, 27, 96]
# conv2: kernels: 128 x 2 = 256 kernel_size: 5 padding[2, 2] stide = 1
# output_size [27, 27, 256]
# maxpool2: kernel_size = 3 padding=0 stride=2 output_size[13, 13, 256]
# conv3: kernels: 192 x 2 = 384 kernel_size= 3 padding:[1, 1] stride:1 output_size[13, 13, 384]
# conv4:kernels: 384 kernel_size: 3 padding:[1,1] stride:1 output_size[13,13,384] 这里可以看到输出size并没有变化 但这个网络仍然加入了这个卷积层
# conv5:kernels: 256 kernel_size: 3 padding:[1,1] stride:1 output_size[13,13,256]
# maxpool3: kernel_size:3 padding:0 stride:2 output_size[6, 6, 256]
# 三个全连接层 最后一个分类是1000
# 公式复习 N=(W-F+2P)/S+1
# 输入图片的大小wXw 卷积核大小FxF 步长S padding p
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False): # init_weight 初始化权重
super(AlexNet, self).__init__()
self.features = nn.Sequential( # nn.Sequential 将一系列层结构打包 .features 专门用于提取图像的特征的结构
nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # 这里的padding只能传入两种形式的变量 一种是int 一种是tuple
# 如果传入tuple 如果是[1, 2] 上下补一行0 左右补两行0 # input
nn.ReLU(inplace=True), # inplace 增加计算量但是降低内存使用的一种方法
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(48, 128, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(128, 192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # 卷积核的数量取决于论文中的设置数量
)
self.classifier = nn.Sequential( # 分类器 classifier
nn.Dropout(p=0.5), # 随机失活比例
nn.Linear(128 * 6 * 6, 2048), # 2048全连接层节点个数
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Linear(2048, num_classes), # num_classes 初始化类别 原本有1000 但本数据集中只用了5
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, start_dim=1) # flatten 将传入的变量展平 从channel维度开始展平 因为第一维是batch 和LeNet中相同
x = self.classifier(x) #输入到分类结构
return x
def _initialize_weights(self):
for m in self.modules(): # 继承自nn.Module 迭代定义的每一层结构 遍历了每一层结构之后判断他属于哪一个类别
if isinstance(m, nn.Conv2d): # 遍历了每一层结构之后判断他属于哪一个类别
nn.init.kaiming_normal_(m.weight, mode='fan_out', ) # 如果他是卷积层 就用kaiming_normal_这个方法去初始化权重
if m.bias is not None: # 如果权重不是空值就清空为0
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear): # 如果是线性层就用init.normal_来初始化
nn.init.normal_(m.weight, 0, 0.01) # 用正态分布 均值为0, 方差为0.01
nn.init.constant_(m.bias, 0) # 同样初始化bias为0 实际上自动使用kaiming初始化
train
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from model import AlexNet
import os
import json
import time
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
data_transform = {
'train': transforms.Compose([transforms.RandomResizedCrop(224), # 随机裁剪 224x224像素
transforms.RandomHorizontalFlip(), # 水平方向随机反转
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
'val': transforms.Compose([transforms.Resize((224, 224)), # cant 224 must(224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
# 获取数据集所在根目录
train_dataset = datasets.ImageFolder(root='./train',
transform=data_transform['train']) # 数据预处理
train_num = len(train_dataset) # 打印数据集有多少张图片
#
# # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflowers':3, 'tulips:4'}
flower_list = train_dataset.class_to_idx #获取分类的名称所对应的索引
cla_dict = dict((val, key) for key, val in flower_list.items()) # 将键值和value对调
json_str = json.dumps(cla_dict, indent=4) # 通过json包 将cla_dict进行编码成json格式
with open('class_indices.json', 'w') as json_file: # 保存到一个json文件当中
json_file.write(json_str)
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=0)
validate_dataset = datasets.ImageFolder(root='./val', transform=data_transform['val'])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=0)
# 查看数据集的代码
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
#
#
# def imshow(img):
# img = img / 2 + 0.5
# npimg = img.numpy()
# plt.imshow(np.transpose(npimg, (1, 2, 0)))
# plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image)) 非常有用
net = AlexNet(num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
# pata = list(net.parameters()) 查看模型的一个参数
optimizer = optim.Adam(net.parameters(), lr=0.0002) #学习率自己调整
save_path = './AlexNet.pth'
best_acc = 0.0
for epoch in range(10):
net.train() # 我们使用了dropout 我们只希望在训练中随机失活 但是在预测过程中不希望他起作用 net.train()开启dropout net.eval()关闭dropout
running_loss = 0.0
t1 = time.perf_counter() # 训练一个epoch所需时间
for step, data in enumerate(train_loader, start=0):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistic
running_loss += loss.item()
# print train process
rate = (step + 1)/ len(train_loader)
a = "*" * int(rate * 50)
b = "." * int((1 - rate) * 50)
print("\r train loss:{:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
print()
print(time.perf_counter()-t1)
net.eval()
acc = 0.0
with torch.no_grad(): # 禁止参数跟踪
for data_test in validate_loader:
test_images, test_labels = data_test
outputs = net(test_images.to(device))
predict_y = torch.max(outputs, dim=1)[1] # 输出最大值作为预测
acc += (predict_y == test_labels.to(device)).sum().item()
accurate_test = acc / val_num
if accurate_test > best_acc:
best_acc = accurate_test
torch.save(net.state_dict(), save_path)
print('[epoch %d] train_loss: %d.3f test_accuracy:%.3f' %
(epoch + 1, running_loss / step, acc / val_num))
print('Finished Training')
predict
import torch
from model import AlexNet
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
import json
import os
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# load image
img_path = "5.jfif"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
json_file = open(json_path, "r")
class_indict = json.load(json_file)
# create model
model = AlexNet(num_classes=5).to(device)
# load model weights
weights_path = "./AlexNet.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path))
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
print(print_res)
plt.show()
if __name__ == '__main__':
main()
非常好用