1、模型预测
import onnxruntime
import numpy as np
import cv2
import time
class ChtDeploy():
def __init__(self, img_path, onnx_path, iou_threshold=0.45, conf_threshold=0.3, detect_w=640, detect_h=640):
self.img = cv2.imread(img_path) # h,w,c
self.img_h = self.img.shape[0]
self.img_w = self.img.shape[1]
self.iou_threshold = iou_threshold
self.conf_threshold = conf_threshold
self.detect_w = detect_w
self.detect_h = detect_h
self.onnx = onnx_path
self.max_wh = max(self.detect_h, self.detect_w)
def letterbox(self):
if (self.img_h == self.detect_h and self.img_w == self.detect_w):
return self.img
scale = min(self.detect_w / self.img_w, self.detect_h / self.img_h) # 缩放比例
# nw, nh = int(self.img_w * scale), int(self.img_h * scale)
# image = cv2.resize(self.img, (nw, nh), interpolation=cv2.INTER_LINEAR)
# img_back = np.ones((self.detect_h, self.detect_w, 3), dtype=np.uint8) * 128
# # 将image放在画布中心区域-letterbox
# img_back[(self.detect_h - nh) // 2: (self.detect_h - nh) // 2 + nh, (self.detect_w - nw) // 2:(self.detect_w - nw) // 2 + nw, :] = image
# 先缩fang,再平移
h_t, w_t = abs(self.detect_h - scale * self.img_h) / 2, abs(self.detect_w - scale * self.img_w) / 2
A = np.array([[scale, 0, w_t], [0, scale, h_t]], dtype=np.float32)
img_back = cv2.warpAffine(self.img, A, (self.detect_w, self.detect_h), borderValue=(128, 128, 128))
return img_back, A
def img2input(self, img):
img = np.transpose(img, (2, 0, 1))
img = img / 255
return np.expand_dims(img, axis=0).astype(np.float32) # (1,3,640,640)
def infer(self, onnx, img):
session = onnxruntime.InferenceSession(onnx)
input_name = session.get_inputs()[0].name
label_name = session.get_outputs()[0].name
pred = session.run([label_name], {input_name: img})[0]
return pred # yolov8 1 * 84 * 8400 yolov5 1 * 25200 * 85
def xywh_to_x1y1x2y2(self, boxes):
# 提取中心点坐标和宽高
x_center, y_center, width, height = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
# 计算左上角和右下角坐标
x1 = x_center - width / 2
y1 = y_center - height / 2
x2 = x_center + width / 2
y2 = y_center + height / 2
# 将计算结果组合成新的数组
xyxy_boxes = np.stack((x1, y1, x2, y2), axis=1)
return xyxy_boxes
def normalpred(self, pred): # the style of v8 to v5
if pred.shape[1] < pred.shape[2]: # v8
pred = np.squeeze(pred).T # 1 * 84 * 8400 -> 8400 * 84
scores = np.max(pred[:, 4:], axis=1)
classes = np.argmax(pred[:, 4:], axis=1)
mask = scores > self.conf_threshold # 置信度过滤
boxes = self.xywh_to_x1y1x2y2(pred[mask])
scores = scores[mask]
classes = classes[mask]
return boxes, scores, classes
pred = np.squeeze(pred)
scores = pred[:, 4]
classes = np.argmax(pred[:, 5:], axis=1)
mask = scores > self.conf_threshold # 置信度过滤
boxes = self.xywh_to_x1y1x2y2(pred[mask])
scores = scores[mask]
classes = classes[mask]
return boxes, scores, classes
def box_area(self, boxes):
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
def box_iou(self, box1, box2):
area1 = self.box_area(box1) # N
area2 = self.box_area(box2) # M
# broadcasting, 两个数组各维度大小 从后往前对比一致, 或者 有一维度值为1;
lt = np.maximum(box1[:, np.newaxis, :2], box2[:, :2])
rb = np.minimum(box1[:, np.newaxis, 2:], box2[:, 2:])
wh = rb - lt
wh = np.maximum(0, wh) # [N, M, 2]
inter = wh[:, :, 0] * wh[:, :, 1]
iou = inter / (area1[:, np.newaxis] + area2 - inter)
return iou
def numpy_nms(self, boxes, scores, iou_threshold):
idxs = scores.argsort() # 按分数 降序排列的索引 [N]
keep = []
while idxs.size > 0: # 统计数组中元素的个数
max_score_index = idxs[-1]
max_score_box = boxes[max_score_index][None, :]
keep.append(max_score_index)
if idxs.size == 1:
break
idxs = idxs[:-1] # 将得分最大框 从索引中删除; 剩余索引对应的框 和 得分最大框 计算IoU;
other_boxes = boxes[idxs] # [?, 4]
ious = self.box_iou(max_score_box, other_boxes) # 一个框和其余框比较 1XM
idxs = idxs[ious[0] <= iou_threshold]
return keep
def draw_res(self, boxes, classes, img, A, color=(255, 255, 0), thickness=2):
namelist = ["red", "green", "yellow", "off"]
for i, box in enumerate(boxes):
x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness)
# 在矩形内部添加文字
# 设置字体和字体大小
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
thickness_text = 2
# 设置文字内容和颜色
text = namelist[classes[i]-1]
print("===>",text)
text_color = (255, 255, 255) # 白色
# 获取文字的宽度和高度
(text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness_text)
# 计算文字的位置(矩形中心)
text_x = x1
text_y = y1
# 在图片上添加文字
cv2.putText(img, text, (text_x, text_y), font, font_scale, text_color, thickness_text)
Q = cv2.invertAffineTransform(A)
img = cv2.warpAffine(img, Q, (self.img_w, self.img_h), borderValue=(128, 128, 128))
cv2.imshow('detect', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
def deploy(self):
img, A = self.letterbox()
img1 = self.img2input(img)
pred = self.infer(self.onnx, img1)
boxes, scores, classes = self.normalpred(pred)
c = classes * self.max_wh
nb = boxes + c[:, np.newaxis]
id = self.numpy_nms(nb, scores, self.iou_threshold)
self.draw_res(boxes[id], classes[id], img, A)
print(id)
# start_time = time.time()
#
# # 执行你的代码
# # ...
d = ChtDeploy(img_path="pic03.png", onnx_path="yolov5.onnx",iou_threshold=0.45,conf_threshold=0.7)
d.deploy()
# # 记录结束时间
# end_time = time.time()
#
# # 计算执行时间
# elapsed_time = end_time - start_time
# print(f"代码执行时间: {elapsed_time} 秒")
2、模型打包
import torch
from models.experimental import attempt_load
model = attempt_load('best.pt', map_location=torch.device('cpu')) # load FP32 model
model.eval() # set in evaluation mode
input_tensor = torch.randn(1, 3, 640, 640) # create a random input tensor
torch.onnx.export(model, input_tensor, "best.onnx") # export the model to ONNX