行为识别TSN模型量化部署

深度学习扛把子

已于 2023-03-29 14:03:28 修改

阅读量350

点赞数

文章标签： python 信息可视化数据分析

于 2023-03-28 17:32:40 首次发布

本文链接：https://blog.csdn.net/qq_16792139/article/details/129817198

版权

该文详细阐述了一个从ONNX模型开始的视频帧处理流程，包括转为RGB图片、图像resize、三裁剪、归一化、通道数调整以及使用ONNXRuntime进行推理的过程。代码示例展示了如何读取视频帧、进行数据预处理并最终获取识别结果。

摘要由CSDN通过智能技术生成

1.首先是转onnx
在这里插入图片描述

2.拆分数据预处理流程

0: {'type': 'SampleFrames', 'clip_len': 1, 'frame_interval': 1, 'num_clips': 25, 'test_mode': True}
1: {'type': 'RawFrameDecode'}
2: {'type': 'Resize', 'scale': (-1, 256)}
3: {'type': 'ThreeCrop', 'crop_size': 256}
4: {'type': 'Normalize', 'mean': [123.675, 116.28, 103.53], 'std': [58.395, 57.12, 57.375], 'to_bgr': False}
5: {'type': 'FormatShape', 'input_format': 'NCHW'}
6: {'type': 'Collect', 'keys': ['imgs'], 'meta_keys': []}
7: {'type': 'ToTensor', 'keys': ['imgs']}

{'frame_dir': 'dataset/reading', 'total_frames': 33, 'label': -1, 'start_index': 1, 'filename_tmpl': '{:01}.jpg', 'modality': 'RGB'}

0没啥用
1.得到rgb图片
2.resize短边256

def _resize_imgs(self, imgs, new_w, new_h):
        return [
            mmcv.imresize(
                img, (new_w, new_h), interpolation=self.interpolation)
            for img in imgs
        ]

results['imgs'] = self._resize_imgs(results['imgs'], new_w,
                                                    new_h)

3.对图片3个位置的box进行crop

img_h, img_w = results['imgs'][0].shape[:2]#(256, 455)
        crop_w, crop_h = self.crop_size#(256, 256)
        assert crop_h == img_h or crop_w == img_w

        if crop_h == img_h:
            w_step = (img_w - crop_w) // 2
            offsets = [
                (0, 0),  # left
                (2 * w_step, 0),  # right
                (w_step, 0),  # middle
            ]
        elif crop_w == img_w:
            h_step = (img_h - crop_h) // 2
            offsets = [
                (0, 0),  # top
                (0, 2 * h_step),  # down
                (0, h_step),  # middle
            ]

        cropped = []
        crop_bboxes = []
        for x_offset, y_offset in offsets:
            bbox = [x_offset, y_offset, x_offset + crop_w, y_offset + crop_h]
            crop = [
                img[y_offset:y_offset + crop_h, x_offset:x_offset + crop_w]
                for img in imgs
            ]
            cropped.extend(crop)
            crop_bboxes.extend([bbox for _ in range(len(imgs))])

        crop_bboxes = np.array(crop_bboxes)#25*3变成75

4.归一化
mmcv.imnormalize_(img, self.mean, self.std, self.to_bgr)
5.修改通道数
elif self.input_format == ‘NCHW’:#true
imgs = np.transpose(imgs, (0, 3, 1, 2))
7.转tensor
然后我们得到完整的推理流程

import torch

from mmaction.apis import init_recognizer, inference_recognizer
import onnxruntime as rt
import mmcv
import os
import cv2
import numpy as np
from operator import itemgetter
config_file = 'configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py'#1
checkpoint_file = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'
device = 'cuda:0' # or 'cpu'
device = torch.device(device)

 # build the model from a config file and a checkpoint file
# model = init_recognizer(config_file, checkpoint_file, device=device)
config = mmcv.Config.fromfile(config_file)
# ONXX_MODEL = "action_tsn.onnx"
ONXX_MODEL = "tmp.onnx"

model = rt.InferenceSession(ONXX_MODEL)
model.cfg = config
# test rawframe directory of a single video and show the result:
# video = 'writing.mp4'
video = 'dataset/stand'#图片接口
labels = 'tools/data/kinetics/label_map_k400.txt'
# labels = 'tools/data/ucf101/label_map.txt'
# results = inference_recognizer(model, video)
filelist = os.listdir(video)
imgs = []
for file in filelist:
    img = cv2.imread('dataset/stand/'+file)#opencv打开的图片默认通道是BGR
    img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #resize
    img_h, img_w,_ = img.shape
    new_w, new_h = mmcv.rescale_size((img_w, img_h), (float("inf"), 256))
    img = mmcv.imresize(
                img, (new_w, new_h), interpolation='bilinear')
    imgs.append(img)
cropped = []
img_h, img_w = imgs[0].shape[:2]#(256, 455)
crop_w, crop_h = [256,256]
#threecrop
if crop_h == img_h:
        w_step = (img_w - crop_w) // 2
        offsets = [
            (0, 0),  # left
            (2 * w_step, 0),  # right
            (w_step, 0),  # middle
        ]
elif crop_w == img_w:
    h_step = (img_h - crop_h) // 2
    offsets = [
        (0, 0),  # top
        (0, 2 * h_step),  # down
        (0, h_step),  # middle
    ]
for x_offset, y_offset in offsets:
            bbox = [x_offset, y_offset, x_offset + crop_w, y_offset + crop_h]
            crop = [
                img[y_offset:y_offset + crop_h, x_offset:x_offset + crop_w]
                for img in imgs
            ]
            cropped.extend(crop)

# results['imgs'] = cropped #75个imgs    
n = len(cropped)
h, w, c = cropped[0].shape
imgs = np.empty((n, h, w, c), dtype=np.float32)
for i, img in enumerate(cropped):
    imgs[i] = img
for img in imgs:
    mmcv.imnormalize_(img, np.array([123.675,116.28,103.53]), np.array([58.395,57.12,57.375]),False)
imgs = np.transpose(imgs, (0, 3, 1, 2))
imgs = torch.from_numpy(imgs).unsqueeze(0)#添加第0维
# results['imgs'] = imgs
scores = model.run(#10，120 #2
        None, {'onnx::Reshape_0': imgs.detach().numpy()})[0]#detach()返回一个不需要梯度的新张量
num_classes = scores.shape[-1]
score_tuples = tuple(zip(range(num_classes), scores[0]))
score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
results = score_sorted[:5]
# show the results
labels = open('tools/data/kinetics/label_map_k400.txt').readlines()
labels = [x.strip() for x in labels]
results = [(labels[k[0]], k[1]) for k in results]

print(f'The top-5 labels with corresponding scores are:')
for result in results:
    print(f'{result[0]}: ', result[1])