不得不吐槽一下,官方给的教程真的是混乱不堪,尤其是python版本的(当然也有可能是我技术不到家,看的比较费劲)。所以为了以后用到MNN框架进行推理时,不再去费力的看官方的文档,我从yolov5源码中抠出前后处理部分,并用MNN进行推理,具体代码如下。
------2023.08.16 完善了在arm上调用代码时,返回结果为空的错误
目录
1.YOLOV5转MNN
什么,你还没有安装MNN!也不会其他模型转MNN模型!那就看看我的上一篇博客吧(里面也包含了推理API各个步骤的注意事项以及参数讲解):
2.使用MNN进行推理(Expr API)
看着蛮长,其实你只需要修改main函数中的模型和测试图片路径即可。原始模型使用的是Yolov5官方的,关于原始模型如何转onnx,可以参考我的另一篇博客。
import time
import MNN
import cv2
import numpy as np
ori_img_path = './data/ori_img/03.jpg'
mnn_model_path = './model/yolov5x.mnn'
imgsz = (640, 640)
CLASSES = {
0: 'person',
1: 'bicycle',
2: 'car',
3: 'motorbike',
4: 'aeroplane',
5: 'bus',
6: 'train',
7: 'truck',
8: 'boat',
9: 'traffic light',
10: 'fire hydrant',
11: 'stop sign',
12: 'parking meter',
13: 'bench',
14: 'bird',
15: 'cat',
16: 'dog',
17: 'horse',
18: 'sheep',
19: 'cow',
20: 'elephant',
21: 'bear',
22: 'zebra',
23: 'giraffe',
24: 'backpack',
25: 'umbrella',
26: 'handbag',
27: 'tie',
28: 'suitcase',
29: 'frisbee',
30: 'skis',
31: 'snowboard',
32: 'sports ball',
33: 'kite',
34: 'baseball bat',
35: 'baseball glove',
36: 'skateboard',
37: 'surfboard',
38: 'tennis racket',
39: 'bottle',
40: 'wine glass',
41: 'cup',
42: 'fork',
43: 'knife',
44: 'spoon',
45: 'bowl',
46: 'banana',
47: 'apple',
48: 'sandwich',
49: 'orange',
50: 'broccoli',
51: 'carrot',
52: 'hot dog',
53: 'pizza',
54: 'donut',
55: 'cake',
56: 'chair',
57: 'sofa',
58: 'potted plant',
59: 'bed',
60: 'dining table',
61: 'toilet',
62: 'tvmonitor',
63: 'laptop',
64: 'mouse',
65: 'remote',
66: 'keyboard',
67: 'cell phone',
68: 'microwave',
69: 'oven',
70: 'toaster',
71: 'sink',
72: 'refrigerator',
73: 'book',
74: 'clock',
75: 'vase',
76: 'scissors',
77: 'teddy bear',
78: 'hair drier',
79: 'toothbrush'
}
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
# isinstance 用来判断某个变量是否属于某种类型
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
return y
def nms_boxes(boxes, scores):
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= 0.45)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
def box_iou(box1, box2, eps=1e-7):
(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
inter = (np.min(a2, b2) - np.max(a1, b1)).clamp(0).prod(2)
return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
def non_max_suppression(
prediction,
conf_thres=0.25,
iou_thres=0.45,
classes=None,
agnostic=False,
multi_label=False,
labels=(),
max_det=300,
nm=0, # number of masks
):
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
Returns:
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
"""
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
if isinstance(prediction, (list, tuple)): # YOLOv5 model in validation model, output = (inference_out, loss_out)
prediction = prediction[0] # select only inference output
bs = prediction.shape[0] # batch size
nc = prediction.shape[2] - nm - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Settings
max_wh = 7680 # (pixels) maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
mi = 5 + nc # mask start index
output = [np.zeros((0, 6 + nm))] * bs
for xi, x in enumerate(prediction): # image index, image inference
x = x[xc[xi]] # confidence
if labels and len(labels[xi]):
lb = labels[xi]
v = np.zeros(len(lb), nc + nm + 5)
v[:, :4] = lb[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
x = np.concatenate((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box/Mask
box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
mask = x[:, mi:] # zero columns if no masks
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
x = np.concatenate((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
else: # best class only
conf = np.max(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1)
j = np.argmax(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1)
x = np.concatenate((box, conf, j, mask), 1)[conf.reshape(box.shape[:1][0]) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == np.array(classes, device=x.device)).any(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
index = x[:, 4].argsort(axis=0)[:max_nms][::-1]
x = x[index]
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = nms_boxes(boxes, scores)
i = i[:max_det] # limit detections
# 用来合并框的
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = np.multiply(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
return output
def clip_boxes(boxes, shape):
# Clip boxes (xyxy) to image shape (height, width)
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
# Rescale boxes (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
boxes[..., [0, 2]] -= pad[0] # x padding
boxes[..., [1, 3]] -= pad[1] # y padding
boxes[..., :4] /= gain
clip_boxes(boxes, img0_shape)
return boxes
if __name__ == "__main__":
t1 = time.time()
# (1) load model
net = MNN.nn.load_module_from_file(mnn_model_path, ["images"], ["output0"])
# preprocess
img = cv2.imread(ori_img_path)
img = cv2.resize(img, (640, 640))
im = letterbox(img, imgsz, auto=True)[0] # padded resize
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
im = np.ascontiguousarray(im)
im = im.astype(np.float32)
im = im / 255
if len(im.shape) == 3:
im = im[None] # expand for batch dim
# (2) 构建一个Var类型的占位符来保存numpy,placeholder(shape, format, dtype)
input_var = MNN.expr.placeholder(im.shape, MNN.expr.NCHW)
input_var.write(im)
# (3) cv2 read shape is NHWC, Module's need is NC4HW4, convert it
input_var = MNN.expr.convert(input_var, MNN.expr.NC4HW4)
# (4) inference
output_var = net.forward(input_var)
# (5) the output from net may be NC4HW4, turn to linear layout
output_var = MNN.expr.convert(output_var, MNN.expr.NCHW)
output_var = output_var.read()
t2 = time.time()
print("infer time:", t2-t1)
# NMS
conf_thres = 0.25 # 置信度阈值
iou_thres = 0.45 # iou阈值
max_det = 1000 # 每张图片的最大目标个数
classes = None # 是否只保留特定的类别
agnostic_nms = False # 进行nms是否也去除不同类别之间的框
pred = non_max_suppression(output_var, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
# Process predictions
seen = 0
for i, det in enumerate(pred): # per image
seen += 1
if len(det):
# 将预测信息(相对img_size 640)映射回原图 img0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], img.shape).round()
outputs = pred[0][:, :6]
if len(outputs[:, 4:] > 0):
for i in outputs:
prob = i[4]
cls = int(i[5])
prob = np.around(prob, decimals=2)
if prob >= 0.5:
all_pred_boxes = i[:4]
for b in range(len(all_pred_boxes)):
x1 = int(all_pred_boxes[0])
y1 = int(all_pred_boxes[1])
x2 = int(all_pred_boxes[2])
y2 = int(all_pred_boxes[3])
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(img, CLASSES[cls] + ' ' + str(prob), (x1, y1), cv2.FONT_HERSHEY_TRIPLEX, 0.8, (0, 0, 255),
1, 4)
cv2.imwrite("./test_result.png", img)
3.推理结果
4.使用MNN进行推理(Session API)
import MNN
import cv2
import numpy as np
import onnxruntime as rt
CLASSES = {
0: 'person',
1: 'bicycle',
2: 'car',
3: 'motorbike',
4: 'aeroplane',
5: 'bus',
6: 'train',
7: 'truck',
8: 'boat',
9: 'traffic light',
10: 'fire hydrant',
11: 'stop sign',
12: 'parking meter',
13: 'bench',
14: 'bird',
15: 'cat',
16: 'dog',
17: 'horse',
18: 'sheep',
19: 'cow',
20: 'elephant',
21: 'bear',
22: 'zebra',
23: 'giraffe',
24: 'backpack',
25: 'umbrella',
26: 'handbag',
27: 'tie',
28: 'suitcase',
29: 'frisbee',
30: 'skis',
31: 'snowboard',
32: 'sports ball',
33: 'kite',
34: 'baseball bat',
35: 'baseball glove',
36: 'skateboard',
37: 'surfboard',
38: 'tennis racket',
39: 'bottle',
40: 'wine glass',
41: 'cup',
42: 'fork',
43: 'knife',
44: 'spoon',
45: 'bowl',
46: 'banana',
47: 'apple',
48: 'sandwich',
49: 'orange',
50: 'broccoli',
51: 'carrot',
52: 'hot dog',
53: 'pizza',
54: 'donut',
55: 'cake',
56: 'chair',
57: 'sofa',
58: 'potted plant',
59: 'bed',
60: 'dining table',
61: 'toilet',
62: 'tvmonitor',
63: 'laptop',
64: 'mouse',
65: 'remote',
66: 'keyboard',
67: 'cell phone',
68: 'microwave',
69: 'oven',
70: 'toaster',
71: 'sink',
72: 'refrigerator',
73: 'book',
74: 'clock',
75: 'vase',
76: 'scissors',
77: 'teddy bear',
78: 'hair drier',
79: 'toothbrush'
}
def box_iou(box1, box2, eps=1e-7):
(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
inter = (np.min(a2, b2) - np.max(a1, b1)).clamp(0).prod(2)
return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
# isinstance 用来判断某个变量是否属于某种类型
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
return y
def nms_boxes(boxes, scores):
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= 0.45)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
def non_max_suppression(
prediction,
conf_thres=0.25,
iou_thres=0.45,
classes=None,
agnostic=False,
multi_label=False,
labels=(),
max_det=300,
nm=0, # number of masks
):
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
Returns:
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
"""
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
if isinstance(prediction, (list, tuple)): # YOLOv5 model in validation model, output = (inference_out, loss_out)
prediction = prediction[0] # select only inference output
bs = prediction.shape[0] # batch size
nc = prediction.shape[2] - nm - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Settings
max_wh = 7680 # (pixels) maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
mi = 5 + nc # mask start index
output = [np.zeros((0, 6 + nm))] * bs
for xi, x in enumerate(prediction): # image index, image inference
x = x[xc[xi]] # confidence
if labels and len(labels[xi]):
lb = labels[xi]
v = np.zeros(len(lb), nc + nm + 5)
v[:, :4] = lb[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
x = np.concatenate((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box/Mask
box = xywh2xyxy(x[:, :4]) # center_x, center_y, width, height) to (x1, y1, x2, y2)
mask = x[:, mi:] # zero columns if no masks
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:mi] > conf_thres).nonzero(as_tuple=False).T
x = np.concatenate((box[i], x[i, 5 + j, None], j[:, None].float(), mask[i]), 1)
else: # best class only
conf = np.max(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1)
j = np.argmax(x[:, 5:mi], 1).reshape(box.shape[:1][0], 1)
x = np.concatenate((box, conf, j, mask), 1)[conf.reshape(box.shape[:1][0]) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == np.array(classes, device=x.device)).any(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
index = x[:, 4].argsort(axis=0)[:max_nms][::-1]
x = x[index]
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = nms_boxes(boxes, scores)
i = i[:max_det] # limit detections
# 用来合并框的
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = np.multiply(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
return output
def clip_boxes(boxes, shape):
# Clip boxes (xyxy) to image shape (height, width)
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
# Rescale boxes (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
boxes[..., [0, 2]] -= pad[0] # x padding
boxes[..., [1, 3]] -= pad[1] # y padding
boxes[..., :4] /= gain
clip_boxes(boxes, img0_shape)
return boxes
if __name__ == "__main__":
mnnModulePath = ""
IMG_Path = ""
Result_Path = ""
imgsz = (640, 640)
img = cv2.imread(IMG_Path)
img = cv2.resize(img, (640, 640))
# preprocess
im = letterbox(img, imgsz, auto=True)[0] # padded resize
im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
im = np.ascontiguousarray(im)
im = im.astype(np.float32)
im /= 255 # 0 - 255 to 0.0 - 1.0
if len(im.shape) == 3:
im = im[None] # expand for batch dim
interpreter = MNN.Interpreter(mnnModulePath)
config = {}
config['precision'] = 'low'
config['backend'] = 'CPU'
config['thread'] = 4
session = interpreter.createSession(config)
input_tensor = interpreter.getSessionInput(session)
interpreter.resizeTensor(input_tensor, (1, 3, 640, 640))
interpreter.resizeSession(session)
mnn_input = MNN.Tensor((1, 3, 640, 640), MNN.Halide_Type_Float, im, MNN.Tensor_DimensionType_Caffe)
input_tensor.copyFrom(mnn_input)
interpreter.runSession(session)
output_tensor = interpreter.getSessionOutput(session)
tmp_output = MNN.Tensor((1, 25000, 85), MNN.Halide_Type_Float, np.ones([1, 25000, 85]).astype(np.float32),
MNN.Tensor_DimensionType_Caffe)
output_tensor.copyToHostTensor(tmp_output)
pred = tmp_output.getNumpyData()
# NMS
conf_thres = 0.25 # confidence threshold
iou_thres = 0.45 # NMS IOU threshold
max_det = 1000 # maximum detections per image
classes = None # filter by class: --class 0, or --class 0 2 3
agnostic_nms = False # class-agnostic NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
# Process predictions
seen = 0
for i, det in enumerate(pred): # per image
seen += 1
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], img.shape).round()
outputs = pred[0][:, :5]
if len(outputs[:, 4:] > 0):
for i in outputs:
prob = i[4]
cls = int(i[4])
prob = np.around(prob, decimals=2)
# print(prob)
if prob >= 0.4:
all_pred_boxes = i[:4]
for b in range(len(all_pred_boxes)):
x1 = int(all_pred_boxes[0])
y1 = int(all_pred_boxes[1])
x2 = int(all_pred_boxes[2])
y2 = int(all_pred_boxes[3])
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 1)
cv2.putText(img, CLASSES[cls] + ' ' + str(prob), (x1, y1), cv2.FONT_HERSHEY_TRIPLEX, 0.8,
(0, 255, 0),
1, 4)
cv2.imwrite(Result_Path, img)
5.推理结果