来源:投稿 作者:LSC
编辑:学姐
最终: 0.76757分
比赛网址:
http://challenge.xfyun.cn/topic/info?type=action-recognition&ch=ds22-dw-zmt05
赛题任务:
带标注的训练数据,即视频中的每一帧都有动作标签;不带标注的测试数据。
作品介绍视频要求:视频数据按照数据来源存放在不同的文件夹中,视频文件采用H.264编码的mp4格式;标签文件对应视频文件放在同一文件夹下,标签文件采用txt格式,每一行标明帧号和本帧的人物动作label。
评价指标
模型预测结果采用准确率(accuarcy)进行评价,对于模型预测的结果,严格对比每一帧预测结果与真实标注的要素名和要素内容,若二者完全一致,则记为本帧识别正确。
对于一段测试视频计算准确率的方法为: accuracy=本段视频中完全预测正确的要素个数/本段视频的帧数。
对于一个模型计算准确率的方法为:accuracy=累加每段视频预测的准确率/总的测试视频个数。
赛题需要对视频的图像内容进行识别,因此可以考虑抽象为图像分类任务。完成赛题的步骤为:
-
(1)视频抽帧
-
(2)构建分类数据集
-
(3)训练分类模型
-
(4)对测试集进行预测
baseline代码是在恒源云平台上运行的
(1)训练集和测试集抽帧
由于赛题是按照帧标注的数据,因此我们抽帧可以直接选择所有的帧,并进行保存为图像。
import cv2, os, glob, codecs
if not os.path.exists('/hy-tmp/frames'):
os.mkdir('/hy-tmp/frames')
os.mkdir(os.path.join('/hy-tmp/frames', 'train'))
os.mkdir(os.path.join('/hy-tmp/frames', 'test'))
def extract_images(video_path, out_dir):
video_name = os.path.basename(video_path).split('.')[0]
cam = cv2.VideoCapture(video_path)
print(video_path)
frame_count = 1
while True:
successed, img = cam.read()
if not successed:
break
outfile = f'./{out_dir}/{video_name}-{frame_count:06}.jpg'
cv2.imwrite(outfile, img)
frame_count += 1
paths = glob.glob('/hy-tmp/act_rec_data/train/*')
paths.sort()
train_video_path = [x for x in paths if not x.endswith('txt')]
train_ann_path = [x for x in paths if x.endswith('txt')]
paths = glob.glob('/hy-tmp/act_rec_data/test/*')
test_video_path = [x for x in paths if not x.endswith('txt')]
for path in train_video_path:
extract_images(path, os.path.join('/hy-tmp/frames', 'train'))
for path in test_video_path:
extract_images(path, os.path.join('/hy-tmp/frames', 'test'))
(2)构建分类数据集
!pip install tqdm
from torch.utils.data.dataset import Dataset
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision.models as models
import os
import sys
import glob
import pandas as pd
import numpy as np
from tqdm import tqdm
%pylab inline
import cv2
from PIL import Image
import torch
torch.manual_seed(0) # 减少随机性
torch.backends.cudnn.deterministic = False # 是否有确定性
torch.backends.cudnn.benchmark = True # 自动寻找最适合当前配置的高效算法,提高运行效率
class XunFeiDataset(Dataset):
def __init__(self, img_path, img_label, transform=None):
self.img_path = img_path
self.img_label = img_label
if transform is not None:
self.transform = transform
else:
self.transform = None
def __getitem__(self, index):
img = cv2.imread(self.img_path[index])
img = img.astype(np.float32)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if self.transform is not None:
img = self.transform(image=img)['image']
return img, torch.from_numpy(np.array(self.img_label[index]))
def __len__(self):
return len(self.img_path)
train_img_list = []
train_label_list = []
for path in train_ann_path:
basename = os.path.basename(path)[:-4]
anns = codecs.open(path).readlines()
for idx, ann in enumerate(anns):
frame_count = idx + 1
train_img_list.append(f'/hy-tmp/frames/train/{basename}-{frame_count:06}.jpg')
if ',' not in ann:
train_label_list.append(19)
else:
train_label_list.append(int(ann.split(',')[1]))
train_df = pd.DataFrame({
'path': train_img_list,
'label': train_label_list
})
train_df['label_int'], lbl = pd.factorize(train_df['label'])
# 筛选非空的图片
train_df = train_df[train_df['path'].apply(lambda x: cv2.imread(x) is not None)]
print(train_df.shape)
train_df = train_df.sample(frac=1.0)
train_df
(3)训练分类模型
我尝试了一下,resnet和efficientnet系列效果比较好,swin_transformer系列效果不太好而且模型太大保存不方便
model = models.efficientnet_b7(True)
model.classifier = nn.Sequential(
nn.Dropout(p=0.5, inplace=True),
nn.Linear(in_features=2560, out_features=14, bias=True)
)
!pip install 'albumentations'
import albumentations as A
from albumentations.pytorch import ToTensorV2
def train(train_loader, model, criterion, optimizer):
model.train()
train_loss = 0.0
for i, (input, target) in enumerate(train_loader):
input = input.cuda(non_blocking=True)
target = target.cuda(non_blocking=True)
# compute output
output = model(input)
loss = criterion(output, target)
# compute gradient and do SGD step
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
return train_loss/len(train_loader)
def validate(val_loader, model, criterion):
model.eval()
val_acc = 0.0
with torch.no_grad():
end = time.time()
for i, (input, target) in enumerate(val_loader):
input = input.cuda()
target = target.cuda()
# compute output
output = model(input)
loss = criterion(output, target)
val_acc += (output.argmax(1) == target).sum().item()
return val_acc / len(val_loader.dataset)
def predict(test_loader, model, criterion):
model.eval()
val_acc = 0.0
test_pred = []
with torch.no_grad():
end = time.time()
for i, (input, target) in enumerate(test_loader):
input = input.cuda()
target = target.cuda()
# compute output
output = model(input)
test_pred.append(output.data.cpu().numpy())
return np.vstack(test_pred)
# 随机拆分
train_loader = torch.utils.data.DataLoader(
XunFeiDataset(train_df['path'].values[:-20000], train_df['label_int'].values[:-20000],
A.Compose([
A.Resize(300, 300),
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.5),
A.RandomContrast(p=0.5),
A.RandomBrightness(p=0.5),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2(),
])
), batch_size=10, shuffle=True, num_workers=4, pin_memory=False
)
val_loader = torch.utils.data.DataLoader(
XunFeiDataset(train_df['path'].values[-2000:], train_df['label_int'].values[-2000:],
A.Compose([
A.Resize(300, 300),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2(),
])
), batch_size=2, shuffle=False, num_workers=1, pin_memory=False
)
model = model.to('cuda')
criterion = nn.CrossEntropyLoss().cuda() # 自带softmax
optimizer = torch.optim.SGD(model.parameters(), 0.005)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.01, step_size_up=5, mode="triangular2")
best_acc = 0
for _ in range(15):
train_loss = train(train_loader, model, criterion, optimizer)
val_acc = validate(val_loader, model, criterion)
if val_acc > best_acc:
torch.save(model.state_dict(), 'model.pth')
best_acc = val_acc
scheduler.step()
print(train_loss, val_acc)
(4)对测试集进行预测
test_img_list = glob.glob(os.path.join('/hy-tmp/frames', 'test') + '/*')
test_img_list.sort()
test_img_list = pd.DataFrame(test_img_list)
test_img_list = test_img_list[test_img_list[0].apply(lambda x: cv2.imread(x) is not None)]
test_loader = torch.utils.data.DataLoader(
XunFeiDataset(test_img_list[0].values, [0] * len(test_img_list),
A.Compose([
A.Resize(300, 300),
A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
ToTensorV2(),
])
), batch_size=10, shuffle=False, num_workers=1, pin_memory=False
)
test_pred = []
for data, _ in test_loader:
pred = model(data.cuda())
test_pred += list(pred.argmax(1).cpu().numpy())
test_img_list['label'] = test_pred
test_img_list['video'] =test_img_list[0].apply(lambda x: os.path.basename(x).split('-')[0])
test_img_list['label'] = test_img_list['label'].apply(lambda x: lbl[x])
if os.path.exists('labels'):
os.rmdir('labels')
os.mkdir('labels')
for path in test_video_path:
cam = cv2.VideoCapture(path)
length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
video_name = os.path.basename(path)[:-4]
df = test_img_list[test_img_list['video'] == video_name]
with open(os.path.join('labels', video_name + '.txt'), 'w') as up:
for idx, row in enumerate(df.iterrows()):
if row[1]['label'] == 19:
up.write('{0}\n'.format(idx))
else:
up.write('{0},{1}\n'.format(idx, row[1]['label']))
if idx < length-1:
up.write('{0}\n'.format(idx))
# 压缩结果标签,下载到本地,然后提交
!zip labels.zip labels/ -r
点击下方卡片关注《学姐带你玩AI》🚀🚀🚀
回复“比赛”领取190+场比赛top方案,打包好了直接领
码字不易,欢迎大家点赞评论收藏!