数据扩充:Data augmentation
由于神经网络算法是基于数据驱动的,因此,有一种观点认为神经网络是靠数据喂出来的,如果能够增加训练数据,提供海量数据进行训练,则能够有效提升算法的准确率,因为这样可以避免过拟合,从而可以进一步增大、加深网络结构。而当训练数据有限时,可以通过一些变换从已有的训练数据集中生成一些新的数据,以快速地扩充训练数据。
其中,最简单、通用的图像数据变形的方式:水平翻转图像,从原始图像中随机裁剪、平移变换,颜色、光照变换。
训练过程
import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from classic_models.alexnet import AlexNet
from classic_models.googlenet_v1 import GoogLeNet
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
# mac m1 电脑选择mps GPU
def try_gpu(i=0):
if torch.cuda.device_count() >= i + 1:
return torch.device(f'cuda:{i}')
try:
return torch.device('mps')
except:
return torch.device('cpu')
def main():
# 判断可用设备
device = try_gpu()
print("using {} device.".format(device))
# 注意改成自己的数据集路径
data_path = '/Users/jiangxiyu/根目录/深度学习/flower'
assert os.path.exists(data_path), "{} path does not exist.".format(data_path)
# 数据预处理与增强
"""
ToTensor()能够把灰度范围从0-255变换到0-1之间的张量.
transform.Normalize()则把0-1变换到(-1,1). 具体地说, 对每个通道而言, Normalize执行以下操作: image=(image-mean)/std
其中mean和std分别通过(0.5,0.5,0.5)和(0.5,0.5,0.5)进行指定。原来的0-1最小值0则变成(0-0.5)/0.5=-1; 而最大值1则变成(1-0.5)/0.5=1.
也就是一个均值为0, 方差为1的正态分布. 这样的数据输入格式可以使神经网络更快收敛。
"""
data_transform = {
"train": transforms.Compose([transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)), # val不需要任何数据增强
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
# 使用ImageFlolder加载数据集中的图像,并使用指定的预处理操作来处理图像, ImageFlolder会同时返回图像和对应的标签。 (image path, class_index) tuples
train_dataset = datasets.ImageFolder(root=os.path.join(data_path, "train"), transform=data_transform["train"])
validate_dataset = datasets.ImageFolder(root=os.path.join(data_path, "val"), transform=data_transform["val"])
train_num = len(train_dataset)
val_num = len(validate_dataset)
# 使用class_to_idx给类别一个index,作为训练时的标签: {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
# 创建一个字典,存储index和类别的对应关系,在模型推理阶段会用到。
cla_dict = dict((val, key) for key, val in flower_list.items())
# 将字典写成一个json文件
json_str = json.dumps(cla_dict, indent=4)
with open( os.path.join(data_path, 'class_indices.json') , 'w') as json_file:
json_file.write(json_str)
batch_size = 64 # batch_size大小,是超参,可调,如果模型跑不起来,尝试调小batch_size
# 使用 DataLoader 将 ImageFloder 加载的数据集处理成批量(batch)加载模式
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True )
validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=4, shuffle=False ) # 注意,验证集不需要shuffle
print("using {} images for training, {} images for validation.".format(train_num, val_num))
# 实例化模型,并送进设备
net = AlexNet(num_classes = 5,init_weights=True)
net.to(device)
# 指定损失函数用于计算损失;指定优化器用于更新模型参数;指定训练迭代的轮数,训练权重的存储地址
loss_function = nn.CrossEntropyLoss() # MSE
optimizer = optim.Adam(net.parameters(), lr=0.0002)
epochs = 70
save_path = os.path.abspath(os.path.join(os.getcwd(), './results/weights/alexnet'))
if not os.path.exists(save_path):
os.makedirs(save_path)
best_acc = 0.0 # 初始化验证集上最好的准确率,以便后面用该指标筛选模型最优参数。
for epoch in range(epochs):
############################################################## train ######################################################
net.train()
acc_num = torch.zeros(1).to(device) # 初始化,用于计算训练过程中预测正确的数量
sample_num = 0 # 初始化,用于记录当前迭代中,已经计算了多少个样本
# tqdm是一个进度条显示器,可以在终端打印出现在的训练进度
train_bar = tqdm(train_loader, file=sys.stdout, ncols=100)
for data in train_bar :
images, labels = data
sample_num += images.shape[0] #[64, 3, 224, 224]
optimizer.zero_grad()
outputs = net(images.to(device)) # output_shape: [batch_size, num_classes]
pred_class = torch.max(outputs, dim=1)[1] # torch.max 返回值是一个tuple,第一个元素是max值,第二个元素是max值的索引。
acc_num += torch.eq(pred_class, labels.to(device)).sum()
loss = loss_function(outputs, labels.to(device)) # 求损失
loss.backward() # 自动求导
optimizer.step() # 梯度下降
# print statistics
train_acc = acc_num.item() / sample_num
# .desc是进度条tqdm中的成员变量,作用是描述信息
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss)
# validate
net.eval()
acc_num = 0.0 # accumulate accurate number per epoch
with torch.no_grad():
for val_data in validate_loader:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc_num += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc_num / val_num
print('[epoch %d] train_loss: %.3f train_acc: %.3f val_accuracy: %.3f' % (epoch + 1, loss, train_acc, val_accurate))
# 判断当前验证集的准确率是否是最大的,如果是,则更新之前保存的权重
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), os.path.join(save_path, "AlexNet.pth") )
# 每次迭代后清空这些指标,重新计算
train_acc = 0.0
val_accurate = 0.0
print('Finished Training')
# if __name__ == '__main__':
# main()
main()
验证过程
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from classic_models.alexnet import AlexNet
def try_gpu(i=0):
"""如果存在,则返回gpu(i),否则返回cpu()
Defined in :numref:`sec_use_gpu`"""
if torch.cuda.device_count() >= i + 1:
return torch.device(f'cuda:{i}')
try:
return torch.device('mps')
except:
return torch.device('cpu')
def main():
device = try_gpu()
print("using {} device.".format(device))
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# load image
img_path = "/Users/jiangxiyu/根目录/深度学习/flower/train/daisy/5547758_eea9edfd54_n.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = '/Users/jiangxiyu/根目录/深度学习/flower/class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
json_file = open(json_path, "r")
class_indict = json.load(json_file)
# create model
model = AlexNet(num_classes=5).to(device)
# load model weights
weights_path = "/Users/jiangxiyu/根目录/深度学习/Deep-Learning-Image-Classification-Models-Based-CNN-or-Attention/results/weights/alexnet/AlexNet.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path))
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()
AlexNet 网络结构
import torch.nn as nn
import torch
from torchsummary import summary
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[96, 55, 55]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[96, 27, 27]
nn.Conv2d(96, 256, kernel_size=5, padding=2), # output[256, 27, 27]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[256, 13, 13]
nn.Conv2d(256, 384, kernel_size=3, padding=1), # output[384, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(384, 384, kernel_size=3, padding=1), # output[384, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1), # output[256, 13, 13]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[256, 6, 6]
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def alexnet(num_classes):
model = AlexNet(num_classes=num_classes)
return model
net = AlexNet(num_classes=1000)
summary(net.to('mps'), (3,224,224))
#########################################################################################################################################
# Total params: 62,378,344
# Trainable params: 62,378,344
# Non-trainable params: 0
# ----------------------------------------------------------------
# Input size (MB): 0.57
# Forward/backward pass size (MB): 11.09
# Params size (MB): 237.95
# Estimated Total Size (MB): 249.62
# ----------------------------------------------------------------
# conv_parameters: 3,747,200
# fnn_parameters: 58,631,144 93% 的参数量