最近又温习了一遍卷积神经网络用作图像分类,声明:以下代码有AI辅助生成,用作整理复习用途。
当遇到图像分类任务时,如果训练集规模不大的时候,可以先考虑卷积神经网络效果是否足够,如果还不够再去考虑使用一些更复杂的网络。
如果遇到了训练集loss很低,而测试集或验证集效果较差,很大可能是因为过拟合了,可以通过dropout,添加正则项等方法改进。
如果训练集都没有收敛,可能是因为参数量过少等原因,可以提升模型的复杂度。
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import os
from PIL import Image
# 定义数据预处理
class UserDataset(Dataset):
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.transform = transform
# 获取所有类别名称
# 从类别映射成编号
# 存储图像路径和对应的标签
= # 将图像路径和对应标签的索引存储为元组
# 根据自己的数据集格式进行调整
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
img_path, label = self.images[idx]
#print(img_path)
img = Image.open(img_path).convert('RGB')
#print(img)
if self.transform:
img = self.transform(img)
#print(img)
return img, label
# 设置数据集根目录
root_dir = ''
val_dir = ''
test_dir = ''
# 定义转换
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(), # 添加随机水平翻转
transforms.RandomVerticalFlip(), # 添加随机垂直翻转
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # 添加颜色抖动
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# 创建自定义数据集
traindataset = UserDataset(root_dir, transform=transform)
valdataset= UserDataset(val_dir, transform=transform)
train_loader = torch.utils.data.DataLoader(traindataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(valdataset, batch_size=32, shuffle=True)
num_classes=4 #自定义
# 定义CNN网络模型
class ConvNet(nn.Module):
def __init__(self, dropout_rate=0.2):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(16)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(32)
self.fc1 = nn.Linear(32 * 56 * 56, 64) #参数可调
self.dropout = nn.Dropout(dropout_rate) #加入Dropout,缓解过拟合
self.fc2 = nn.Linear(64, num_classes)
# 使用kaiming初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.pool(nn.functional.relu(self.bn1(self.conv1(x))))
x = self.pool(nn.functional.relu(self.bn2(self.conv2(x))))
x = x.view(-1, 32 * 56 * 56)
x = nn.functional.relu(self.fc1(x)) # 32*56*56-》64
x = self.dropout(x) # 应用Dropout
x = self.fc2(x) #64-》numclasses
return x
model = ConvNet()
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# 训练模型
epochs=[]
loss_list=[]
acc_list=[]
num_epochs = 100
for epoch in range(num_epochs):
epochs.append(epoch+1)
running_loss = 0.0
print(epoch)
for inputs, labels in train_loader:
#print(inputs.shape)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
model.eval() # 评估
correct = 0
total = 0
with torch.no_grad(): # 不需要计算梯度
for images, labels in val_loader:
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
val_accuracy = 100 * correct / total
print('Epoch [{}/{}], Loss: {:.4f}, Validation Accuracy: {:.2f}%'
.format(epoch+1, num_epochs, running_loss/len(train_loader), val_accuracy))
loss_list.append(running_loss/len(train_loader))
acc_list.append(val_accuracy)
torch.save(model.state_dict(), 'model.pth')
plt.plot(epochs, loss_list, label='Loss', color='blue')
# 绘制准确率曲线
plt.plot(epochs, acc_list, label='Accuracy', color='red')
# 添加标题和标签
plt.title('Loss and Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Value')
plt.savefig('loss_accuracy_plot.png')