1.首先是转onnx
2.拆分数据预处理流程
0: {'type': 'SampleFrames', 'clip_len': 1, 'frame_interval': 1, 'num_clips': 25, 'test_mode': True}
1: {'type': 'RawFrameDecode'}
2: {'type': 'Resize', 'scale': (-1, 256)}
3: {'type': 'ThreeCrop', 'crop_size': 256}
4: {'type': 'Normalize', 'mean': [123.675, 116.28, 103.53], 'std': [58.395, 57.12, 57.375], 'to_bgr': False}
5: {'type': 'FormatShape', 'input_format': 'NCHW'}
6: {'type': 'Collect', 'keys': ['imgs'], 'meta_keys': []}
7: {'type': 'ToTensor', 'keys': ['imgs']}
{'frame_dir': 'dataset/reading', 'total_frames': 33, 'label': -1, 'start_index': 1, 'filename_tmpl': '{:01}.jpg', 'modality': 'RGB'}
0没啥用
1.得到rgb图片
2.resize短边256
def _resize_imgs(self, imgs, new_w, new_h):
return [
mmcv.imresize(
img, (new_w, new_h), interpolation=self.interpolation)
for img in imgs
]
results['imgs'] = self._resize_imgs(results['imgs'], new_w,
new_h)
3.对图片3个位置的box进行crop
img_h, img_w = results['imgs'][0].shape[:2]#(256, 455)
crop_w, crop_h = self.crop_size#(256, 256)
assert crop_h == img_h or crop_w == img_w
if crop_h == img_h:
w_step = (img_w - crop_w) // 2
offsets = [
(0, 0), # left
(2 * w_step, 0), # right
(w_step, 0), # middle
]
elif crop_w == img_w:
h_step = (img_h - crop_h) // 2
offsets = [
(0, 0), # top
(0, 2 * h_step), # down
(0, h_step), # middle
]
cropped = []
crop_bboxes = []
for x_offset, y_offset in offsets:
bbox = [x_offset, y_offset, x_offset + crop_w, y_offset + crop_h]
crop = [
img[y_offset:y_offset + crop_h, x_offset:x_offset + crop_w]
for img in imgs
]
cropped.extend(crop)
crop_bboxes.extend([bbox for _ in range(len(imgs))])
crop_bboxes = np.array(crop_bboxes)#25*3变成75
4.归一化
mmcv.imnormalize_(img, self.mean, self.std, self.to_bgr)
5.修改通道数
elif self.input_format == ‘NCHW’:#true
imgs = np.transpose(imgs, (0, 3, 1, 2))
7.转tensor
然后我们得到完整的推理流程
import torch
from mmaction.apis import init_recognizer, inference_recognizer
import onnxruntime as rt
import mmcv
import os
import cv2
import numpy as np
from operator import itemgetter
config_file = 'configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py'#1
checkpoint_file = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'
device = 'cuda:0' # or 'cpu'
device = torch.device(device)
# build the model from a config file and a checkpoint file
# model = init_recognizer(config_file, checkpoint_file, device=device)
config = mmcv.Config.fromfile(config_file)
# ONXX_MODEL = "action_tsn.onnx"
ONXX_MODEL = "tmp.onnx"
model = rt.InferenceSession(ONXX_MODEL)
model.cfg = config
# test rawframe directory of a single video and show the result:
# video = 'writing.mp4'
video = 'dataset/stand'#图片接口
labels = 'tools/data/kinetics/label_map_k400.txt'
# labels = 'tools/data/ucf101/label_map.txt'
# results = inference_recognizer(model, video)
filelist = os.listdir(video)
imgs = []
for file in filelist:
img = cv2.imread('dataset/stand/'+file)#opencv打开的图片默认通道是BGR
img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#resize
img_h, img_w,_ = img.shape
new_w, new_h = mmcv.rescale_size((img_w, img_h), (float("inf"), 256))
img = mmcv.imresize(
img, (new_w, new_h), interpolation='bilinear')
imgs.append(img)
cropped = []
img_h, img_w = imgs[0].shape[:2]#(256, 455)
crop_w, crop_h = [256,256]
#threecrop
if crop_h == img_h:
w_step = (img_w - crop_w) // 2
offsets = [
(0, 0), # left
(2 * w_step, 0), # right
(w_step, 0), # middle
]
elif crop_w == img_w:
h_step = (img_h - crop_h) // 2
offsets = [
(0, 0), # top
(0, 2 * h_step), # down
(0, h_step), # middle
]
for x_offset, y_offset in offsets:
bbox = [x_offset, y_offset, x_offset + crop_w, y_offset + crop_h]
crop = [
img[y_offset:y_offset + crop_h, x_offset:x_offset + crop_w]
for img in imgs
]
cropped.extend(crop)
# results['imgs'] = cropped #75个imgs
n = len(cropped)
h, w, c = cropped[0].shape
imgs = np.empty((n, h, w, c), dtype=np.float32)
for i, img in enumerate(cropped):
imgs[i] = img
for img in imgs:
mmcv.imnormalize_(img, np.array([123.675,116.28,103.53]), np.array([58.395,57.12,57.375]),False)
imgs = np.transpose(imgs, (0, 3, 1, 2))
imgs = torch.from_numpy(imgs).unsqueeze(0)#添加第0维
# results['imgs'] = imgs
scores = model.run(#10,120 #2
None, {'onnx::Reshape_0': imgs.detach().numpy()})[0]#detach()返回一个不需要梯度的新张量
num_classes = scores.shape[-1]
score_tuples = tuple(zip(range(num_classes), scores[0]))
score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)
results = score_sorted[:5]
# show the results
labels = open('tools/data/kinetics/label_map_k400.txt').readlines()
labels = [x.strip() for x in labels]
results = [(labels[k[0]], k[1]) for k in results]
print(f'The top-5 labels with corresponding scores are:')
for result in results:
print(f'{result[0]}: ', result[1])