话不多说,直接上代码:
import time
import cv2
import numpy as np
import onnxruntime
import colorsys
class YOLOv8:
def __init__(self, path, conf_thres=0.7, iou_thres=0.7):
self.conf_threshold = conf_thres
self.iou_threshold = iou_thres
# Initialize model
self.initialize_model(path)
# connect pose
self.skeleton = [[1, 2], [2, 3], [3, 4], [4, 5],
[1, 6], [6, 7], [7, 8], [8, 9],
[1, 10], [10, 11], [11, 12], [12, 13],
[1, 14], [14, 15], [15, 16], [16, 17],
[1, 18], [18, 19], [19, 20], [20, 21]]
def __call__(self, image,class_num):
return self.detect_objects(image,class_num)
def initialize_model(self, path):
# self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider','CPUExecutionProvider'])
self.session = onnxruntime.InferenceSession(path,onnxruntime.SessionOptions(), providers=['CUDAExecutionProvider'])
# Get model info
self.get_input_details()
self.get_output_details()
def detect_objects(self, image, class_num):
input_tensor, ratio = self.prepare_input(image)
# Perform inference on the image
outputs = self.inference(input_tensor)
# print(outputs)
self.boxes, self.scores, self.class_ids, self.pose = self.process_output(outputs, ratio,class_num)
return self.boxes, self.scores, self.class_ids, self.pose
def prepare_input(self, image):
self.img_height, self.img_width = image.shape[:2]
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize
input_img, ratio = self.ratioresize(input_img)
# Scale input pixel values to 0 to 1
input_img = input_img / 255.0
input_img = input_img.transpose(2, 0, 1)
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
return input_tensor, ratio
def inference(self, input_tensor):
start = time.perf_counter()
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
outputs = np.array(outputs)
return outputs
def process_output(self, output, ratio, class_num):
predictions = np.squeeze(output[0]).T
# Filter out object confidence scores below threshold
scores = np.max(predictions[:, 4:4+class_num], axis=1)
predictions = predictions[scores > self.conf_threshold, :]
scores = scores[scores > self.conf_threshold]
if len(scores) == 0:
return [], [], [], []
# Get the class with the highest confidence
class_ids = np.argmax(predictions[:, 4:4+class_num], axis=1)
# Get bounding boxes for each object
boxes = self.extract_boxes(predictions, ratio)
# Apply non-maxima suppression to suppress weak, overlapping bounding boxes
indices = self.nms(boxes, scores, self.iou_threshold)
# Get pose
modelpose = predictions[:, 4+class_num:]
modelpose *= ratio
return boxes[indices], scores[indices], class_ids[indices], modelpose[indices]
def extract_boxes(self, predictions, ratio):
# Extract boxes from predictions
boxes = predictions[:, :4]
# Scale boxes to original image dimensions
# boxes = self.rescale_boxes(boxes)
boxes *= ratio
# Convert boxes to xyxy format
boxes = self.xywh2xyxy(boxes)
return boxes
def rescale_boxes(self, boxes):
# Rescale boxes to original image dimensions
input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])
return boxes
def get_input_details(self):
model_inputs = self.session.get_inputs()
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
self.input_shape = model_inputs[0].shape
self.input_height = self.input_shape[2]
self.input_width = self.input_shape[3]
def get_output_details(self):
model_outputs = self.session.get_outputs()
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
#
def ratioresize(self, im, color=114):
shape = im.shape[:2]
new_h, new_w = self.input_height, self.input_width
padded_img = np.ones((new_h, new_w, 3), dtype=np.uint8) * color
# Scale ratio (new / old)
r = min(new_h / shape[0], new_w / shape[1])
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
if shape[::-1] != new_unpad:
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
padded_img[: new_unpad[1], : new_unpad[0]] = im
padded_img = np.ascontiguousarray(padded_img)
return padded_img, 1 / r
def nms(self, boxes, scores, iou_threshold):
# Sort by score
sorted_indices = np.argsort(scores)[::-1]
keep_boxes = []
while sorted_indices.size > 0:
# Pick the last box
box_id = sorted_indices[0]
keep_boxes.append(box_id)
# Compute IoU of the picked box with the rest
ious = self.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
# Remove boxes with IoU over the threshold
keep_indices = np.where(ious < iou_threshold)[0]
# print(keep_indices.shape, sorted_indices.shape)
sorted_indices = sorted_indices[keep_indices + 1]
return keep_boxes
def compute_iou(self, box, boxes):
# Compute xmin, ymin, xmax, ymax for both boxes
xmin = np.maximum(box[0], boxes[:, 0])
ymin = np.maximum(box[1], boxes[:, 1])
xmax = np.minimum(box[2], boxes[:, 2])
ymax = np.minimum(box[3], boxes[:, 3])
# Compute intersection area
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
# Compute union area
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
union_area = box_area + boxes_area - intersection_area
# Compute IoU
iou = intersection_area / union_area
return iou
def xywh2xyxy(self, x):
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
def draw_detections(self, image, boxes, scores, class_ids,class_list,colors,pose_xyxy,pose_shape):
tl = round(0.001 * (image.shape[0] + image.shape[1]) / 2) + 1 # line/font thickness
num_kpts, steps = pose_shape[0], pose_shape[1]
for i, (box, score, class_id,all_pose) in enumerate(zip(boxes, scores, class_ids,pose_xyxy)):
# box
x1, y1, x2, y2 = box
c1, c2 = (int(x1), int(y1)), (int(x2), int(y2))
cv2.rectangle(image, c1, c2, colors[class_id], 2)
# label and score
label = f"{class_list[class_id]}:{score:.2f}"
tf = max(tl - 1, 1) # font thickness
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
cv2.rectangle(image, c1, c2, colors[class_id], -1, cv2.LINE_AA) # filled
cv2.putText(image, label, (c1[0], c1[1] - 2),0, tl / 3, [0, 0, 255], thickness=tf, lineType=cv2.LINE_AA)
# pose
for kid in range(num_kpts):
x_coord, y_coord = all_pose[steps * kid], all_pose[steps * kid + 1]
cv2.circle(image, (int(float(x_coord)), int(float(y_coord))), 2, (255, 0, 0), -1)
for sk_id, sk in enumerate(self.skeleton):
pos1 = (int(float(all_pose[(sk[0] - 1) * steps])), int(float(all_pose[(sk[0] - 1) * steps + 1])))
pos2 = (int(float(all_pose[(sk[1] - 1) * steps])), int(float(all_pose[(sk[1] - 1) * steps + 1])))
conf1 = all_pose[(sk[0] - 1) * steps + 1]
conf2 = all_pose[(sk[1] - 1) * steps + 1]
if conf1 > 0.5 and conf2 > 0.5:
cv2.line(image, pos1, pos2, (0, 256, 0), thickness=1)
return image
if __name__ == "__main__":
import os
from tqdm import tqdm
model_path = r"runs/train_pose/v8/weights/best.onnx" # onnx path
yolov8_detector = YOLOv8(model_path, conf_thres=0.3, iou_thres=0.45)
img_dir_path = r"datasets/images/val" # img dir
class_name = ["pose0", "pose1","pose2", "pose3","pose4", "pose5"] # model class
pose_shape = (21,2) # pose shape
class_num = len(class_name)
hsv_tuples = [(x / class_num, 1., 1.) for x in range(class_num)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
for root,dir,vid_list in os.walk(img_dir_path):
for img_name in tqdm([x for x in vid_list if x.endswith((".jpg",".png"))]):
img_path = os.path.join(root,img_name)
image = cv2.imread(img_path)
boxes, scores, class_ids, pose_xyxy= yolov8_detector(image,class_num)
# show
image_with_detections = yolov8_detector.draw_detections(image, boxes, scores, class_ids,class_name,colors,pose_xyxy,pose_shape)
result_img = cv2.resize(image_with_detections, (1152, 640))
cv2.imshow("result", result_img)
cv2.waitKey(0)