-
模型定义:使用
timm
库创建一个预训练的resnet18
模型。
import timm
model = timm.create_model('resnet18', pretrained=True, num_classes=2)
model = model.cuda()
-
训练/验证数据加载:使用
torch.utils.data.DataLoader
来加载训练集和验证集数据,并通过定义的transforms进行数据增强。train_loader = torch.utils.data.DataLoader(
FFDIDataset(train_label['path'].head(1000), train_label['target'].head(1000),
transforms.Compose([
transforms.Resize((256, 256)),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
), batch_size=40, shuffle=True, num_workers=4, pin_memory=True
)val_loader = torch.utils.data.DataLoader(
FFDIDataset(val_label['path'].head(1000), val_label['target'].head(1000),
transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
), batch_size=40, shuffle=False, num_workers=4, pin_memory=True
)criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.Adam(model.parameters(), 0.005)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.85)
best_acc = 0.0
for epoch in range(2):
scheduler.step()
print('Epoch: ', epoch)train(train_loader, model, criterion, optimizer, epoch)
val_acc = validate(val_loader, model, criterion)
if val_acc.avg.item() > best_acc:
best_acc = round(val_acc.avg.item(), 2)
torch.save(model.state_dict(), f'./model_{best_acc}.pt')
test_loader = torch.utils.data.DataLoader(
FFDIDataset(val_label['path'], val_label['target'],
transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
), batch_size=40, shuffle=False, num_workers=4, pin_memory=True
)val_label['y_pred'] = predict(test_loader, model, 1)[:, 1]
val_label[['img_name', 'y_pred']].to_csv('submit.csv', index=None)
-
数据增强用到的方法
-
图像大小调整:使用
transforms.Resize((256, 256))
将所有图像调整到256x256像素的尺寸,这有助于确保输入数据的一致性。 -
随机水平翻转:
transforms.RandomHorizontalFlip()
随机地水平翻转图像,这种变换可以模拟物体在不同方向上的观察,从而增强模型的泛化能力。 -
随机垂直翻转:
transforms.RandomVerticalFlip()
随机地垂直翻转图像,这同样是为了增加数据多样性,让模型能够学习到不同视角下的特征。 -
转换为张量:
transforms.ToTensor()
将图像数据转换为PyTorch的Tensor格式,这是在深度学习中处理图像数据的常用格式。 -
归一化:
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
对图像进行归一化处理,这里的均值和标准差是根据ImageNet数据集计算得出的,用于将图像像素值
标准化,这有助于模型的训练稳定性和收敛速度。数据集增强
数据增强是一种在机器学习和深度学习中提升模型性能的重要技术。它通过应用一系列随机变换来增加训练数据的多样性,从而提高模型的泛化能力。增加数据多样性是数据增强的核心目的。通过对原始图像进行如旋转、缩放、翻转等操作,可以生成新的训练样本,使模型学习到更丰富的特征表示。transforms.Compose: 这是一个转换操作的组合,它将多个图像预处理步骤串联起来:
transforms.Resize((256, 256))
:将所有图像调整为256x256像素的大小。transforms.RandomHorizontalFlip()
:随机水平翻转图像。transforms.RandomVerticalFlip()
:随机垂直翻转图像。transforms.ToTensor()
:将PIL图像或Numpy数组转换为torch.FloatTensor
类型,并除以255以将像素值范围从[0, 255]缩放到[0, 1]。transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
:对图像进行标准化,使用ImageNet数据集的均值和标准差。
- 训练与验证过程:定义了
train
函数来执行模型在一个epoch上的训练过程,包括前向传播、损失计算、反向传播和参数更新。-
定义了
validate
函数来评估模型在验证集上的性能,计算准确率
def validate(val_loader, model, criterion):
batch_time = AverageMeter('Time', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
progress = ProgressMeter(len(val_loader), batch_time, losses, top1)# switch to evaluate mode
model.eval()with torch.no_grad():
end = time.time()
for i, (input, target) in tqdm_notebook(enumerate(val_loader), total=len(val_loader)):
input = input.cuda()
target = target.cuda()# compute output
output = model(input)
loss = criterion(output, target)# measure accuracy and record loss
acc = (output.argmax(1).view(-1) == target.float().view(-1)).float().mean() * 100
losses.update(loss.item(), input.size(0))
top1.update(acc, input.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()# TODO: this should also be done with the ProgressMeter
print(' * Acc@1 {top1.avg:.3f}'
.format(top1=top1))
return top1def predict(test_loader, model, tta=10):
# switch to evaluate mode
model.eval()
test_pred_tta = None
for _ in range(tta):
test_pred = []
with torch.no_grad():
end = time.time()
for i, (input, target) in tqdm_notebook(enumerate(test_loader), total=len(test_loader)):
input = input.cuda()
target = target.cuda()# compute output
output = model(input)
output = F.softmax(output, dim=1)
output = output.data.cpu().numpy()test_pred.append(output)
test_pred = np.vstack(test_pred)
if test_pred_tta is None:
test_pred_tta = test_pred
else:
test_pred_tta += test_pred
return test_pred_ttadef train(train_loader, model, criterion, optimizer, epoch):
batch_time = AverageMeter('Time', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
progress = ProgressMeter(len(train_loader), batch_time, losses, top1)# switch to train mode
model.train()end = time.time()
for i, (input, target) in enumerate(train_loader):
input = input.cuda(non_blocking=True)
target = target.cuda(non_blocking=True)# compute output
output = model(input)
loss = criterion(output, target)# measure accuracy and record loss
losses.update(loss.item(), input.size(0))acc = (output.argmax(1).view(-1) == target.float().view(-1)).float().mean() * 100
top1.update(acc, input.size(0))# compute gradient and do SGD step
optimizer.zero_grad()
loss.backward()
optimizer.step()# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()if i % 100 == 0:
progress.pr2int(i)
性能评估:使用准确率(Accuracy)作为性能评估的主要指标,并在每个epoch后输出验证集上的准确率。
# measure accuracy and record loss
losses.update(loss.item(), input.size(0))acc = (output.argmax(1).view(-1) == target.float().view(-1)).float().mean() * 100
top1.update(acc, input.size(0))
-