2021青海省首届河湟杯数据湖算法大赛—车辆多属性识别赛道
记一个简单的分类任务 baseline
比赛地址:开发者社区
初赛 内容比较简单 就是区分车辆类型
type共4类,分别是:car、suv、van、truck。
没有做具体数据分布分析 简单baseline
K折训练 pytorch框架
训练参数
- 10折训练
- batch size根据GPU情况 16 32这种较为普遍
- 图像大小256*256
- 训练epoch设定了100 从结果上看有点过了 可能过拟合比较严重
- 网络backbone采用resnet101
- loss函数focal loss(gamma=2) 还是 CrossEntropyLoss+label smoothing 忘记了
- 优化器 adam
- lr初始0.0005 lr策略采用余弦退火 cosine
- 数据增强 随机水平翻转 随机旋转 normalize
训练部分代码
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Author :fangpf
@Date :2022/3/1 9:54
"""
import logging
import os
import time
import numpy as np
import tensorboardX as tb
import torch
from sklearn.model_selection import KFold
from torch import nn
from torch.utils.data import DataLoader
from torchvision.models import resnet101
from dataset.k_train_dataset import KTrainDataset
from utils.utils import build_optimizer, build_scheduler, load_pretrained_weight, seed_it
EPOCH = 200
BATCH_SIZE = 8
K_FOLD = 10
t = time.strftime("%Y%m%d%H%M%S", time.localtime())
train_log = 'logs/' + t + '.log'
logging.basicConfig(filename=train_log, format='%(asctime)s - %(name)s - %(levelname)s -%(module)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S ',
level=logging.INFO)
IMAGE_PATH = 'data/train'
writer = tb.SummaryWriter()
def val_model(model, val_dataloader):
model.eval()
softmax = nn.Softmax()
sum = 0
for index, data in enumerate(val_dataloader):
image, label = data
image = image.cuda()
out = model(image)
out = softmax(out).cpu().detach().numpy()
id = np.argmax(out)
sum += 1 if id == label.numpy()[0] else 0
return sum / len(val_dataloader)
def run_train(model, train_dataloader, val_dataloader, loss_func, optimizer, scheduler, fold):
best_acc = 0
for epoch in range(1, EPOCH+1):
model.train()
for i, data in enumerate(train_dataloader):
image, label = data
image = image.cuda()
label = label.cuda()
optimizer.zero_grad()
out = model(image)
loss = loss_func(out, label)
loss.backward()
optimizer.step()
if i % 50 == 0:
logging.info('Fold:{} Epoch:{}({}/{}) lr:{:6f} loss:{:6f}:'.format(
fold + 1, epoch, i, EPOCH, optimizer.param_groups[-1]['lr'], loss.item()))
index = fold * EPOCH * len(train_dataloader) + epoch * len(train_dataloader) + i + 1
if index % 20 == 0:
writer.add_scalar('loss', loss, index)
writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], index)
scheduler.step()
val_acc = val_model(model, val_dataloader)
logging.info('Fold:{} / Epoch:{} val acc: {:6f}'.format(fold+1, epoch, val_acc))
writer.add_scalar('val_acc', val_acc, fold * EPOCH + epoch)
if val_acc > best_acc:
best_acc = val_acc
torch.save(model.state_dict(), 'weights/best_k_train_{}_fold.pth'.format(fold+1))
logging.info('Best epoch/fold: {}/{} best acc: {:6f}'.format(epoch, fold+1, best_acc))
logging.info('Best acc: {:6f}'.format(best_acc))
def train():
seed = 2022
seed_it(seed)
images = os.listdir(IMAGE_PATH)
images = np.array(images)
folds = KFold(n_splits=K_FOLD, shuffle=True, random_state=seed).split(range(len(images)))
model = resnet101(num_classes=4)
load_pretrained_weight(model, 'weights/resnet101-5d3b4d8f.pth')
model = model.cuda()
loss_func = nn.CrossEntropyLoss(label_smoothing=0.2)
optimizer = build_optimizer(model)
scheduler = build_scheduler(optimizer, lr_scheduler='cosine', max_epoch=EPOCH)
# log
logger = logging.getLogger()
KZT = logging.StreamHandler()
KZT.setLevel(logging.DEBUG)
logger.addHandler(KZT)
for fold, (train_idx, val_idx) in enumerate(folds):
train_dataset = KTrainDataset('data/train', images[train_idx], 'data/train_sorted.csv', width=256, height=256, mode='train')
train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=0, shuffle=True)
val_dataset = KTrainDataset('data/train', images[val_idx], 'data/train_sorted.csv', width=256, height=256, mode='val')
val_dataloader = DataLoader(dataset=val_dataset, batch_size=1, num_workers=0, shuffle=False)
run_train(model, train_dataloader, val_dataloader, loss_func, optimizer, scheduler, fold)
writer.close()
if __name__ == '__main__':
train()
dataset部分
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
@Author :fangpf
@Date :2022/3/3 11:05
"""
import os
import pandas
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from utils.utils import resize_image
TYPES = ['car', 'suv', 'van', 'truck']
def get_transform(mode='train'):
if mode == 'train':
transform = transforms.Compose([
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomRotation(degrees=30),
transforms.ToTensor(),
transforms.Normalize(mean=[0.40895729, 0.43373401, 0.42956238], std=[0.20944411, 0.22798627, 0.21760352])
])
else:
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.40895729, 0.43373401, 0.42956238], std=[0.20944411, 0.22798627, 0.21760352])
])
return transform
class KTrainDataset(Dataset):
def __init__(self, data_root, image_path, label_path, width=448, height=448, mode='train'):
self.data_root = data_root
self.image_path = image_path
self.label_path = label_path
self.mode = mode
self.width = width
self.height = height
self.label_map = {}
self.images = []
self.labels = []
self.transform = get_transform(mode)
self._read_csv()
self._prepare_data()
def _prepare_data(self):
for image in self.image_path:
label = self.label_map[image]
self.images.append(os.path.join(self.data_root, image))
self.labels.append(label)
def _read_csv(self):
dataframe = pandas.read_csv(self.label_path, 'rb', engine='python')
all_labels = dataframe['id,type']
for line in all_labels:
image, label = line.strip().split(',')
label = TYPES.index(label)
self.label_map[image] = label
def __getitem__(self, index):
image = self.images[index]
im = resize_image(image, self.width, self.height)
im = self.transform(im)
label = self.labels[index]
return im, label
def __len__(self):
return len(self.images)
结果 86.95
简单baseline 分数不高