课程地址:30 第二部分完结竞赛:图片分类【动手学深度学习v2】_哔哩哔哩_bilibili
李沐老师官方总结:43 树叶分类竞赛技术总结【动手学深度学习v2】_哔哩哔哩_bilibili
[数据预处理及建立不同数据集]
import torch
import numpy as np
import pandas as pd
from torchvision import transforms
from PIL import Image
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
from torch.utils.data import Dataset, DataLoader
# 读取train和test图片列表
path = '/home/NAS/HUIDA/YaqinJiang/my/chapter_convolutional-modern/classify-leaves/'
train_csv = pd.read_csv(path + 'train.csv')
test_csv = pd.read_csv(path + 'test.csv')
# 由于train图片列表的排列有顺序性,为了尽量训练得全面,要先进行随机打乱
train_csv = train_csv.sample(frac=1).reset_index(drop=True)
# 将tarin的label按类别编号,用于预测对应
# 先创建一个标签到索引的映射
labels = sorted(list(set(train_csv.iloc[:, 1])))
label_index = {label: index for index, label in enumerate(labels)}
# 然后创建一个索引到标签的映射
index_label = {v: k for k, v in label_index.items()}
# 创建Dataset,用于读取train、valid和test数据
class LeavesDataSet(Dataset):
# 设置k,作为第k折
def __init__(self, data_csv, mode='train', k=0):
self.mode = mode
self.k = k
self.data_csv = data_csv
self.data_len = len(data_csv.index)
self.fold_size = self.data_len // 5
# 所有的train数据,最后训练的时候用
if mode == 'all':
self.all_images = np.asarray(self.data_csv.iloc[:, 0])
self.all_labels = np.asarray(self.data_csv.iloc[:, 1])
self.image_names = self.all_images
self.label_names = self.all_labels
self.trans = transforms.Compose([
transforms.Resize((224, 224)),
# train数据做数据增强
transforms.RandomHorizontalFlip(p=0.5),
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
elif mode == 'train':
# 第k折前后均为train数据
self.train_images = np.asarray(pd.concat([self.data_csv.iloc[0:self.k*self.fold_size, 0],
self.data_csv.iloc[(self.k+1)*self.fold_size:, 0]]))
self.train_labels = np.asarray(pd.concat([self.data_csv.iloc[0:self.k*self.fold_size, 1],
self.data_csv.iloc[(self.k+1)*self.fold_size:, 1]]))
self.image_names = self.train_images
self.label_names = self.train_labels
self.trans = transforms.Compose([
transforms.Resize((224, 224)),
# train数据做数据增强
transforms.RandomHorizontalFlip(p=0.5),
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
elif mode == 'valid':
# 第k折为valid数据
self.valid_images = np.asarray(self.data_csv.iloc[self.k*self.fold_size:(self.k+1)*self.fold_size, 0])
self.valid_labels = np.asarray(self.data_csv.iloc[self.k*self.fold_size:(self.k+1)*self.fold_size, 1])
self.image_names = self.valid_images
self.label_names = self.valid_labels
# valid数据不做数据增强
self.trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
elif mode == 'test':
self.test_images = np.asarray(self.data_csv.iloc[:,0])
self.image_names = self.test_images
# test数据不做数据增强
self.trans = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])
def __getitem__(self, index):
# 从image_names中索引到对应的image_name
image_name = self.image_names[index]
# 读取图片,并确保是RGB三通道
image = self.trans(Image.open(path + image_name).convert('RGB'))
if self.mode == 'test':
return image
else:
label = self.label_names[index]
inx = label_index[label]
# 将int类型的index转为tensor
#index = torch.from_numpy(np.array(index))
return image, inx
def __len__(self):
return len(self.image_names)
[训练与K折交叉验证]
def train(net, K, num_epochs, lr, wd, batch_size, device):
def init_weights(m):
if type(m) == nn.Linear or type(m) == nn.Conv2d:
# He初始化在ReLU激活函数中表现良好
nn.init.kaiming_normal_(m.weight)
net.apply(init_weights)
net.to(device)
print('training on', device)
optimizer = torch.optim.AdamW(net.parameters(), lr=lr, weight_decay=wd)
loss = nn.CrossEntropyLoss()
train_l_sum, train_a_sum, valid_a_sum = 0, 0, 0
# 设置K折交叉验证
for i in range(K):
train_ls, train_acc, valid_acc = [], [], []
train_dataset = LeavesDataSet(train_csv, mode='train', k=i)
# train_iter要随机洗牌
train_iter = DataLoader(train_dataset, batch_size, shuffle=True)
valid_dataset = LeavesDataSet(train_csv, mode='valid', k=i)
# valid_iter不用随机洗牌,尽量模拟test_data
valid_iter = DataLoader(valid_dataset, batch_size, shuffle=False)
for epoch in range(num_epochs):
# train
net.train()
metric = d2l.Accumulator(3)
for X, y in train_iter:
optimizer.zero_grad()
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
with torch.no_grad():
metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
train_ls.append(metric[0] / metric[2])
train_acc.append(metric[1] / metric[2])
# valid
valid_acc.append(d2l.evaluate_accuracy_gpu(net, valid_iter))
train_l_sum += train_ls[-1]
train_a_sum += train_acc[-1]
valid_a_sum += valid_acc[-1]
if i == 0:
d2l.plot(list(range(1, num_epochs + 1)), [train_ls, train_acc, valid_acc],
xlabel='epoch', xlim=[1, num_epochs],
legend=['train loss', 'train acc', 'valid acc'])
print(f'Fold{i + 1}, train loss {float(train_ls[-1]):f}, '
f'train acc {float(train_acc[-1]):f}, '
f'valid acc {float(valid_acc[-1]):f}')
return train_l_sum / K, train_a_sum / K, valid_a_sum / K
[模型选择]
net选择ResNet-18
class Residual(nn.Module):
def __init__(self, input_channels, num_channels,
use_1x1conv=False, strides=1):
super().__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels,
kernel_size=3, padding=1, stride=strides)
self.conv2 = nn.Conv2d(num_channels, num_channels,
kernel_size=3, padding=1)
if use_1x1conv:
self.conv3 = nn.Conv2d(input_channels, num_channels,
kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
def forward(self, X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
Y += X
return F.relu(Y)
def resnet_block(input_channels, num_channels, num_residuals,
first_block=False):
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(input_channels, num_channels,
use_1x1conv=True, strides=2))
else:
blk.append(Residual(num_channels, num_channels))
return blk
b1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64), nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))
net = nn.Sequential(b1, b2, b3, b4, b5,
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(), nn.Linear(512, 176))
X = torch.rand(size=(256, 3, 224, 224))
for layer in net:
X = layer(X)
print(layer.__class__.__name__,'output shape:\t', X.shape)
Sequential output shape: torch.Size([256, 64, 56, 56])
Sequential output shape: torch.Size([256, 64, 56, 56])
Sequential output shape: torch.Size([256, 128, 28, 28])
Sequential output shape: torch.Size([256, 256, 14, 14])
Sequential output shape: torch.Size([256, 512, 7, 7])
AdaptiveAvgPool2d output shape: torch.Size([256, 512, 1, 1])
Flatten output shape: torch.Size([256, 512])
Linear output shape: torch.Size([256, 176])
[训练并保存模型]
K, num_epochs, lr, wd, batch_size = 5, 50, 1e-4, 1e-3, 64
train_l, train_a, valid_a = train(net, K, num_epochs, lr, wd, batch_size, d2l.try_gpu())
print(f'{K}-fold validation: average train loss: {float(train_l):f}, '
f'average train accuracy: {float(train_a):f}, '
f'average validation accuracy: {float(valid_a):f}')
training on cpu
Fold1, train loss 0.074019, train acc 0.978683, valid acc 0.749046
Fold2, train loss 0.037991, train acc 0.986719, valid acc 0.922616
Fold3, train loss 0.023208, train acc 0.990397, valid acc 0.977112
Fold4, train loss 0.025671, train acc 0.989920, valid acc 0.977112
Fold5, train loss 0.031050, train acc 0.987945, valid acc 0.976022
5-fold validation: average train loss: 0.038388, average train accuracy: 0.986733, average validation accuracy: 0.920381
# k折验证后,存储模型
torch.save(net.state_dict(), 'classify-leaves-resnet18_k.pt')
# 在模型基础上再训练一遍所有train数据
def train_all(net, num_epochs, lr, wd, batch_size, device):
net.to(device)
print('training on', device)
optimizer = torch.optim.AdamW(net.parameters(), lr=lr, weight_decay=wd)
loss = nn.CrossEntropyLoss()
train_ls, train_acc = [], []
train_dataset = LeavesDataSet(train_csv, mode='all')
train_iter = DataLoader(train_dataset, batch_size, shuffle=True)
for epoch in range(num_epochs):
net.train()
metric = d2l.Accumulator(3)
for X, y in train_iter:
optimizer.zero_grad()
X, y = X.to(device), y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
optimizer.step()
with torch.no_grad():
metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
train_ls.append(metric[0] / metric[2])
train_acc.append(metric[1] / metric[2])
d2l.plot(list(range(1, num_epochs + 1)), [train_ls, train_acc],
xlabel='epoch', xlim=[1, num_epochs],
legend=['train loss', 'train acc'])
print(f'train loss: {float(train_ls[-1]):f}, '
f'train acc: {float(train_acc[-1]):f}')
net = nn.Sequential(b1, b2, b3, b4, b5,
nn.AdaptiveAvgPool2d((1,1)),
nn.Flatten(), nn.Linear(512, 176))
net.load_state_dict(torch.load('classify-leaves-resnet18_k.pt'))
<All keys matched successfully>
num_epochs, lr, wd, batch_size = 50, 1e-4, 1e-3, 64
train_all(net, num_epochs, lr, wd, batch_size, d2l.try_gpu())
training on cpu
train loss: 0.025533, train acc: 0.988885
# 存储模型
torch.save(net.state_dict(), 'classify-leaves-resnet18_all.pt')
[预测并输出提交文件]
def pred(net, batch_size, device):
net.to(device)
print('training on', device)
test_dataset = LeavesDataSet(test_csv, mode='test')
test_iter = DataLoader(test_dataset, batch_size, shuffle=False)
net.eval()
preds = None
with torch.no_grad():
for X in test_iter:
X = X.to(device)
p = net(X).detach()
if preds is None:
preds = p.argmax(1)
else:
preds = torch.cat([preds, p.argmax(1)])
submission = pd.read_csv(path + 'sample_submission.csv')
submission.label = preds.numpy()
submission.label = submission.label.apply(lambda x: index_label[x])
submission.to_csv(path + 'submission.csv', index=False)
pred(net, batch_size, d2l.try_gpu())
training on cpu
提交Kaggle后,Private Score: 0.85636,Public Score: 0.84386。
[小结]
- Classify-leaves数据预处理:
- 将train.csv中的所有label划分类别(176类),并赋予类别index,建立label转类别index的label_index映射,用于生成dataset时将label转为index;建立index转回label的index_label映射,用于生成预测文件时将index转为label输出到预测文件中。
- 考虑到train.csv中的label前后分布有区域性,为避免训练集和验证集的label分布差异,需要先将train.csv读取到的数据列表进行随机打乱。
- 建立数据库:
- 继承Dataset类,创建一个新的数据库类LeavesDataSet,用于读取训练集、验证集和测试集。
- 训练集和验证集都取自于train.csv,采用K折交叉验证法抽取验证集。
- 所有图片均进行resize(224, 224)和归一化处理(注意是所有图片,之前犯了个错误,只对训练集归一化,无法提升验证精度),训练集图片额外做数据增强处理(考虑到图片下方多有标尺,故只进行水平随机翻转)。
- 训练与验证:
- train函数基本与d2l.train_ch6一致,做了以下小调整:
- 根据6.6 LeNet的练习第2题结果,更换了He初始化;
- 更换了AdamW优化器,增加了weight_decay超参数。
- 受训练资源限制,选择了在第7章学习中表现良好的ResNet-18,有条件的可以试试ResNet-50。
- 进行K折交叉验证,K=5。
- train函数基本与d2l.train_ch6一致,做了以下小调整:
- 最后根据test.csv,用训练好的net进行预测并输出文件。
- 本次实践专注于从头开始完整实现图片分类,主打学习,没有花太多时间重复调参,受训练资源限制也没有用多个模型和更深的模型,所以最后精度不算高,提供一个基线。
[李沐老师技术总结]
技术分析
- 数据增强,在测试时多次试验稍弱的增强然后取平均
- 使用多个模型预测,最后结果加权平均 - 训练算法和学习率
- 清理数据
数据方面
- 有重复图片,可以手动去除
- 图片背景较多,而且树叶没有方向性,可以做更多增强
- 随机旋转,更大的剪裁
- 跨图片增强:
- Mixup:随机叠加两张图片
- CutMix:随机组合来自不同图片的块
模型方面
- 模型多为ResNet变种
- DenseNet,ResNeXt,ResNeSt,……
- EfficientNet
- 优化算法多为Adam或其变种
- 学习率一般是Cosine或者训练不动时往下调
AutoGluon
- 15行代码,安装加训练花时100分钟
- 精度96%
- 可以通过定制化提升精度
- 下一个版本将搜索更多的模型超参数
- AG目前主要仍是关注工业界应用上,非比赛
总结
- 提升精度思路:根据数据挑选增强,使用新模型、新优化算法,多个模型融合,测试时使用增强
- 数据相对简单,排名有相对随机性
- 在工业界应用中:
- 受使用模型融合和测试时增强,技术代价过高
- 通常固定模型超参数,而将精力主要花在提升数据质量