行为识别TSN模型量化部署

1.首先是转onnx
在这里插入图片描述

2.拆分数据预处理流程

0: {'type': 'SampleFrames', 'clip_len': 1, 'frame_interval': 1, 'num_clips': 25, 'test_mode': True}
1: {'type': 'RawFrameDecode'}
2: {'type': 'Resize', 'scale': (-1, 256)}
3: {'type': 'ThreeCrop', 'crop_size': 256}
4: {'type': 'Normalize', 'mean': [123.675, 116.28, 103.53], 'std': [58.395, 57.12, 57.375], 'to_bgr': False}
5: {'type': 'FormatShape', 'input_format': 'NCHW'}
6: {'type': 'Collect', 'keys': ['imgs'], 'meta_keys': []}
7: {'type': 'ToTensor', 'keys': ['imgs']}
{'frame_dir': 'dataset/reading', 'total_frames': 33, 'label': -1, 'start_index': 1, 'filename_tmpl': '{:01}.jpg', 'modality': 'RGB'}

0没啥用
1.得到rgb图片
2.resize短边256

def _resize_imgs(self, imgs, new_w, new_h):
        return [
            mmcv.imresize(
                img, (new_w, new_h), interpolation=self.interpolation)
            for img in imgs
        ]
results['imgs'] = self._resize_imgs(results['imgs'], new_w,
                                                    new_h)

3.对图片3个位置的box进行crop

img_h, img_w = results['imgs'][0].shape[:2]#(256, 455)
        crop_w, crop_h = self.crop_size#(256, 256)
        assert crop_h == img_h or crop_w == img_w

        if crop_h == img_h:
            w_step = (img_w - crop_w) // 2
            offsets = [
                (0, 0),  # left
                (2 * w_step, 0),  # right
                (w_step, 0),  # middle
            ]
        elif crop_w == img_w:
            h_step = (img_h - crop_h) // 2
            offsets = [
                (0, 0),  # top
                (0, 2 * h_step),  # down
                (0, h_step),  # middle
            ]

        cropped = []
        crop_bboxes = []
        for x_offset, y_offset in offsets:
            bbox = [x_offset, y_offset, x_offset + crop_w, y_offset + crop_h]
            crop = [
                img[y_offset:y_offset + crop_h, x_offset:x_offset + crop_w]
                for img in imgs
            ]
            cropped.extend(crop)
            crop_bboxes.extend([bbox for _ in range(len(imgs))])

        crop_bboxes = np.array(crop_bboxes)#25*3变成75

4.归一化
mmcv.imnormalize_(img, self.mean, self.std, self.to_bgr)
5.修改通道数
elif self.input_format == ‘NCHW’:#true
imgs = np.transpose(imgs, (0, 3, 1, 2))
7.转tensor
然后我们得到完整的推理流程

import torch

from mmaction.apis import init_recognizer, inference_recognizer
import onnxruntime as rt
import mmcv
import os
import cv2
import numpy as np
from operator import itemgetter
config_file = 'configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py'#1
checkpoint_file = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'
device = 'cuda:0' # or 'cpu'
device = torch.device(device)

 # build the model from a config file and a checkpoint file
# model = init_recognizer(config_file, checkpoint_file, device=device)
config = mmcv.Config.fromfile(config_file)
# ONXX_MODEL = "action_tsn.onnx"
ONXX_MODEL = "tmp.onnx"

model = rt.InferenceSession(ONXX_MODEL)
model.cfg = config
# test rawframe directory of a single video and show the result:
# video = 'writing.mp4'
video = 'dataset/stand'#图片接口
labels = 'tools/data/kinetics/label_map_k400.txt'
# labels = 'tools/data/ucf101/label_map.txt'
# results = inference_recognizer(model, video)
filelist = os.listdir(video)
imgs = []
for file in filelist:
    img = cv2.imread('dataset/stand/'+file)#opencv打开的图片默认通道是BGR
    img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #resize
    img_h, img_w,_ = img.shape
    new_w, new_h = mmcv.rescale_size((img_w, img_h), (float("inf"), 256))
    img = mmcv.imresize(
                img, (new_w, new_h), interpolation='bilinear')
    imgs.append(img)
cropped = []
img_h, img_w = imgs[0].shape[:2]#(256, 455)
crop_w, crop_h = [256,256]
#threecrop
if crop_h == img_h:
        w_step = (img_w - crop_w) // 2
        offsets = [
            (0, 0),  # left
            (2 * w_step, 0),  # right
            (w_step, 0),  # middle
        ]
elif crop_w == img_w:
    h_step = (img_h - crop_h) // 2
    offsets = [
        (0, 0),  # top
        (0, 2 * h_step),  # down
        (0, h_step),  # middle
    ]
for x_offset, y_offset in offsets:
            bbox = [x_offset, y_offset, x_offset + crop_w, y_offset + crop_h]
            crop = [
                img[y_offset:y_offset + crop_h, x_offset:x_offset + crop_w]
                for img in imgs
            ]
            cropped.extend(crop)

# results['imgs'] = cropped #75个imgs    
n = len(cropped)
h, w, c = cropped[0].shape
imgs = np.empty((n, h, w, c), dtype=np.float32)
for i, img in enumerate(cropped):
    imgs[i] = img
for img in imgs:
    mmcv.imnormalize_(img, np.array([123.675,116.28,103.53]), np.array([58.395,57.12,57.375]),False)
imgs = np.transpose(imgs, (0, 3, 1, 2))
imgs = torch.from_numpy(imgs).unsqueeze(0)#添加第0维
# results['imgs'] = imgs
scores = model.run(#10,120 #2
        None, {'onnx::Reshape_0': imgs.detach().numpy()})[0]#detach()返回一个不需要梯度的新张量
num_classes = scores.shape[-1]
score_tuples = tuple(zip(range(num_classes), scores[0]))
score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
results = score_sorted[:5]
# show the results
labels = open('tools/data/kinetics/label_map_k400.txt').readlines()
labels = [x.strip() for x in labels]
results = [(labels[k[0]], k[1]) for k in results]

print(f'The top-5 labels with corresponding scores are:')
for result in results:
    print(f'{result[0]}: ', result[1])
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值