前言
- 🍨 本文為🔗365天深度學習訓練營 中的學習紀錄博客
- 🍖 原作者:K同学啊 | 接輔導、項目定制
一 我的环境
电脑:Dell Inspire 7000 i7 9750H
操作系统:Windows 11
显卡:NVIDIA GTX 1650 4G
语言环境:Python 3.11.0
开发工具:Jupyter Notebook 6.5.4
深度学习环境:Pytorch 2.2.2
cuda: 12.2.91
-
二 开发过程
-
1. 设置GPU
-
import torch import torch.nn as nn import torchvision.transforms as transforms import torchvision from torchvision import transforms, datasets import os,PIL,pathlib device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device
2. 导入数据
-
import os,PIL,random,pathlib data_dir = 'Data' data_dir = pathlib.Path(data_dir) data_paths = list(data_dir.glob('*')) print(data_paths) classeNames = [str(path).split("\\")[1] for path in data_paths] classeNames total_datadir = './Data/' # 关于transforms.Compose的更多介绍可以参考:https://blog.csdn.net/qq_38251616/article/details/124878863 train_transforms = transforms.Compose([ transforms.Resize([224, 224]), # 将输入图片resize成统一尺寸 transforms.ToTensor(), # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间 transforms.Normalize( # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛 mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。 ]) total_data = datasets.ImageFolder(total_datadir,transform=train_transforms) print(total_data) total_data.class_to_idx
3. 划分数据集
-
train_size = int(0.8 * len(total_data)) test_size = len(total_data) - train_size train_dataset, test_dataset = torch.utils.data.random_split(total_data, [train_size, test_size]) print(train_dataset, test_dataset) print(train_size,test_size) batch_size = 24 train_dl = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1) test_dl = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=1) for X, y in test_dl: print("Shape of X [N, C, H, W]: ", X.shape) print("Shape of y: ", y.shape, y.dtype) break
二、构建简单的CNN网络
-
import torch.nn.functional as F class Network_bn(nn.Module): def __init__(self): super(Network_bn, self).__init__() """ nn.Conv2d()函数: 第一个参数(in_channels)是输入的channel数量 第二个参数(out_channels)是输出的channel数量 第三个参数(kernel_size)是卷积核大小 第四个参数(stride)是步长,默认为1 第五个参数(padding)是填充大小,默认为0 """ self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=5, stride=1, padding=0) self.bn1 = nn.BatchNorm2d(12) self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=5, stride=1, padding=0) self.bn2 = nn.BatchNorm2d(12) self.pool = nn.MaxPool2d(2,2) self.conv4 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=5, stride=1, padding=0) self.bn4 = nn.BatchNorm2d(24) self.conv5 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=5, stride=1, padding=0) self.bn5 = nn.BatchNorm2d(24) self.fc1 = nn.Linear(24*50*50, len(classeNames)) def forward(self, x): x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = self.pool(x) x = F.relu(self.bn4(self.conv4(x))) x = F.relu(self.bn5(self.conv5(x))) x = self.pool(x) x = x.view(-1, 24*50*50) x = self.fc1(x) return x device = "cuda" if torch.cuda.is_available() else "cpu" print("Using {} device".format(device)) model = Network_bn().to(device) model
三、 训练模型
loss_fn = nn.CrossEntropyLoss() # 创建损失函数 learn_rate = 0.00065 # 学习率 opt = torch.optim.SGD(model.parameters(),lr=learn_rate) # 训练循环 def train(dataloader, model, loss_fn, optimizer): size = len(dataloader.dataset) # 训练集的大小,一共60000张图片 num_batches = len(dataloader) # 批次数目,1875(60000/32) train_loss, train_acc = 0, 0 # 初始化训练损失和正确率 for X, y in dataloader: # 获取图片及其标签 X, y = X.to(device), y.to(device) # 计算预测误差 pred = model(X) # 网络输出 loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失 # 反向传播 optimizer.zero_grad() # grad属性归零 loss.backward() # 反向传播 optimizer.step() # 每一步自动更新 # 记录acc与loss train_acc += (pred.argmax(1) == y).type(torch.float).sum().item() train_loss += loss.item() train_acc /= size train_loss /= num_batches return train_acc, train_loss def test (dataloader, model, loss_fn): size = len(dataloader.dataset) # 测试集的大小,一共10000张图片 num_batches = len(dataloader) # 批次数目,313(10000/32=312.5,向上取整) test_loss, test_acc = 0, 0 # 当不进行训练时,停止梯度更新,节省计算内存消耗 with torch.no_grad(): for imgs, target in dataloader: imgs, target = imgs.to(device), target.to(device) # 计算loss target_pred = model(imgs) loss = loss_fn(target_pred, target) test_loss += loss.item() test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item() test_acc /= size test_loss /= num_batches return test_acc, test_loss
正式训练
-
epochs = 50 train_loss = [] train_acc = [] test_loss = [] test_acc = [] for epoch in range(epochs): model.train() epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, opt) model.eval() epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn) train_acc.append(epoch_train_acc) train_loss.append(epoch_train_loss) test_acc.append(epoch_test_acc) test_loss.append(epoch_test_loss) template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%,Test_loss:{:.3f}') print(template.format(epoch+1, epoch_train_acc*100, epoch_train_loss, epoch_test_acc*100, epoch_test_loss)) print('Done')
-
四、 结果可视化
-
import matplotlib.pyplot as plt #隐藏警告 import warnings warnings.filterwarnings("ignore") #忽略警告信息 plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 plt.rcParams['figure.dpi'] = 100 #分辨率 epochs_range = range(epochs) plt.figure(figsize=(12, 3)) plt.subplot(1, 2, 1) plt.plot(epochs_range, train_acc, label='Training Accuracy') plt.plot(epochs_range, test_acc, label='Test Accuracy') plt.legend(loc='lower right') plt.title('Training and Validation Accuracy') plt.subplot(1, 2, 2) plt.plot(epochs_range, train_loss, label='Training Loss') plt.plot(epochs_range, test_loss, label='Test Loss') plt.legend(loc='upper right') plt.title('Training and Validation Loss') plt.show()
指定图片进行预测
-
from PIL import Image classes = list(total_data.class_to_idx) def predict_one_image(image_path, model, transform, classes): test_img = Image.open(image_path).convert('RGB') # plt.imshow(test_img) # 展示预测的图片 test_img = transform(test_img) img = test_img.to(device).unsqueeze(0) model.eval() output = model(img) _,pred = torch.max(output,1) pred_class = classes[pred] print(f'预测结果是:{pred_class}') # 预测训练集中的某张照片 predict_one_image(image_path='./Data/Monkeypox/M01_01_00.jpg', model=model, transform=train_transforms, classes=classes)
五、保存并加载模型
-
# 模型保存 PATH = './model.pth' # 保存的参数文件名 torch.save(model.state_dict(), PATH) # 将参数加载到model当中 model.load_state_dict(torch.load(PATH, map_location=device))
六、 运行截图
-
六,个人总结
- 这次收获比较多的。关键是那个预测准确率90%以上的目标,试了很多方法。
- 上手先调整学习率,达到88%甚至89%不费什么力气,但是最后这1%试了很多方法。
- 调整学习率,效果不大了。测试有一些波动,但是极少情况有能见到90%,属于误差。
- 试了Transforms翻转图片,旋转角度,没有效果。后来看了数据集,本来里面的数据图片很多就是翻转平移旋转来了,所以在里面再做就没效果了。
- 试了VGG模型,结果很差,猜测是不是数据量太小,复杂的模型反倒效果不好。
- 在模型里面增加了一层,似乎有改善,但是非常微小,甚至不如误差影响大。
- 修改Batch,似乎有些影响,但是组合太复杂。最后写了个程序,batch size从8-64,每次增加8个,然后learn rate从0.00025到0.0008,每次增加0.00005。每组跑40个epoch,跑3次。跑了一夜,最后看到能稳定3次都有90%+的batch size和learn Rate组合,batch size24和learn rate0.00065。