动作识别笔记(一)在mxnet上训练Slowfast视频动作识别模型

在mxnet上训练Slowfast视频动作识别模型

mxnet的安装(略)

slowfast模型简介(略)

准备自己的数据集

训练文件目录如下所示,每种动作的视频放在单独的文件夹中。视频格式一定要是同一种且mxnet支持的格式。

例如eating文件夹下:
在这里插入图片描述
按照官方的文档,还需要创建一个train.txt(可以创建一个空的,后边训练的程序里边会自动填写),包含所有视频的信息。第一列是文件夹和名字,第二类是取出的帧数,第三类是类别编号。
在这里插入图片描述

模型训练

我自己写了个train_tool.py来调用opencv的函数封装成mxnet能调用的类来完成训练中的数据处理。

from mxnet.gluon import Block
import cv2 as cv
import numpy as np
class VideoScale(Block):
    def __init__(self, size):
        super(VideoScale, self).__init__()
        self.size = size
    def forward(self, clips):
        new_clips = []
        for cur_img in clips:
            new_img=cv.resize(cur_img,dsize=self.size,interpolation = cv.INTER_AREA)
            new_clips.append(new_img)
        return new_clips

class VideoRearrange(Block):
    def __init__(self, fast_frame_num, slow_frame_num):
        super(VideoRearrange, self).__init__()
        self.fast_frame_num = fast_frame_num
        self.slow_frame_num = slow_frame_num

    def forward(self, clips):
        clips = np.array(clips)
        frame_num = clips.shape[0]
        fast_gap = int(frame_num/self.fast_frame_num)
        slow_gap = int(frame_num/self.slow_frame_num)
        fast_frame_id_list = range(0, int(fast_gap*self.fast_frame_num), fast_gap)
        slow_frame_id_list = range(0, int(slow_gap*self.slow_frame_num), slow_gap)
        frame_id_list = list(fast_frame_id_list) + list(slow_frame_id_list)
        new_clips = [clips[vid, :, :, :] for vid, _ in enumerate(frame_id_list)]        
        return new_clips

训练大致包括以下过程:
1.收集训练集信息
2.数据预处理
3.模型训练
4.保存模型参数为param文件,保存分类信息为npy文件。
5.加载测试
训练代码

from __future__ import division

import argparse, time, logging, os, sys, math

import numpy as np
import mxnet as mx
import gluoncv as gcv
from mxnet import gluon, nd, init, context
from mxnet import autograd as ag
from mxnet.gluon import nn
from mxnet.gluon.data.vision import transforms

from gluoncv.data.transforms import video
from gluoncv.data import VideoClsCustom
from gluoncv.model_zoo import get_model
from gluoncv.utils import makedirs, LRSequential, LRScheduler, split_and_load, TrainingHistory,export_block

import decord
import os

import glob
import train_tool

batch = 5
train_data_dir = r'C:\Users\Visungky\source\repos\ActionRecognition\train_data'
train_list_dir =  r'C:\Users\Visungky\source\repos\ActionRecognition\train.txt'
train_list = open(train_list_dir,'w')
label_list = os.listdir(train_data_dir)
class_num = 0
classes = []
#print(label_list)
print('Collecting training data...')
for label in label_list:
    video_label_dir = os.path.join(train_data_dir,label)
    video_names = os.listdir(video_label_dir)    
    
    for video_name in video_names:
        video_dir = os.path.join(video_label_dir,video_name)
        path_to_video = os.path.join(label,video_name)
        train_list.write('%s %d %s\n'%(path_to_video,100,class_num))
    class_num = class_num+1
    classes.append(label)
train_list.close()
print('Collect finish.')
print(classes)

# Load and prepare data
num_gpus = 1
ctx = [mx.gpu(i) for i in range(num_gpus)]
transform_fn = transforms.Compose([
    video.VideoCenterCrop(size=(800,500)),
    train_tool.VideoScale(size=(224,224)),
    train_tool.VideoRearrange(fast_frame_num=32,slow_frame_num=4),
    video.VideoToTensor(),
    video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
per_device_batch_size = 2
num_workers = 0
batch_size = per_device_batch_size * num_gpus
train_dataset = VideoClsCustom(root=os.path.expanduser(train_data_dir),
                               setting=os.path.expanduser(train_list_dir),
                               train=True,
                                transform=transform_fn,
                               #different models need different frame length
                               new_length=36,
                               video_loader=True,
                               use_decord=True,
                               video_ext='avi'
                               )
print('Load %d training samples.' % len(train_dataset))
print('Total %d classes' % class_num)
train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size,
                                   shuffle=True, num_workers=num_workers)
#
model_name = 'slowfast_4x16_resnet50_custom'
net = get_model(name=model_name, nclass=5, use_kinetics_pretrain=True)
net.collect_params().reset_ctx(ctx)
print('%s model is successfully loaded.' % model_name)

# Learning rate decay factor
lr_decay = 0.1
# Epochs where learning rate decays
lr_decay_epoch = [40, 80, 100]
# Stochastic gradient descent
optimizer = 'sgd'
# Set parameters
optimizer_params = {'learning_rate': 0.001, 'wd': 0.0001, 'momentum': 0.9}
# Define our trainer for net
trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
train_metric = mx.metric.Accuracy()
train_history = TrainingHistory(['training-acc'])
epochs = 60
lr_decay_count = 0
print("\n***Training model***\n")

for epoch in range(epochs):
    tic = time.time()
    train_metric.reset()
    train_loss = 0

    # Learning rate decay
    if epoch == lr_decay_epoch[lr_decay_count]:
        trainer.set_learning_rate(trainer.learning_rate*lr_decay)
        lr_decay_count += 1

    # Loop through each batch of training data
    for i, batch in enumerate(train_data):
        # Extract data and label
        
        data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
        # AutoGrad
        with ag.record():
            output = []
            for _, X in enumerate(data):
                X = X.reshape((-1,) + X.shape[2:])
                #print(X.shape)
                pred = net(X)
                output.append(pred)
            loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]

        # Backpropagation
        for l in loss:
            l.backward()

        # Optimize
        trainer.step(batch_size)

        # Update metrics
        train_loss += sum([l.mean().asscalar() for l in loss])
        train_metric.update(label, output)

        if i == 100:
            break

    name, acc = train_metric.get()

    # Update history and print metrics
    train_history.update([acc])
    print('[Epoch %d] train=%f loss=%f time: %f' %
        (epoch, acc, train_loss / (i+1), time.time()-tic))
print("\n**Training finish**\n")
# We can plot the metric scores with:
train_history.plot(save_path='acc.jpg')
print("Exporting...")
#export_block('./myTrainedModel',net)
net.save_parameters('./mypara.params')
classes = np.array(classes)
np.save('classes.npy',classes)
print("Export done:")
print(str(glob.glob('*.params')[0]))
print(str(glob.glob('*.npy')[0]))

# Load my net
print('Loading saved net')
mynet = get_model(name=model_name, nclass=class_num)
mynet.load_parameters('./mypara.params')
print('Load finished')

训练完成会输出acc曲线
在这里插入图片描述

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值