提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
前言
随着人工智能技术的不断发展,计算机视觉领域的研究和应用也越来越广泛。其中,关键点检测作为计算机视觉领域的一个重要分支,被广泛应用于手势识别、人机交互、虚拟现实等领域。而Yolov8-pose作为一种高效、快速的关键点检测算法,已经在很多领域得到了广泛的应用。本文将介绍如何将Yolov8-pose训练的pt模型转换至rknn 应用至rv1126平台。
特别说明:如有侵权告知删除,谢谢。
一、训练pt模型文件
训练代码参考官方开源的yolov8pose 训练代码,得到pt模型文件,本文采用的为8.2.0版本。
git clone https://github.com/ultralytics/ultralytics.git
二、pt转onnx
1.将原始pt模型保存在仅含权重信息的pt模型
(1)修改ultralytics/engine/model.py中的_load函数中添加如下保存权重模型代码:
def _load(self, weights: str, task=None) -> None:
"""
Initializes a new model and infers the task type from the model head.
Args:
weights (str): model checkpoint to be loaded
task (str | None): model task
"""
if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")):
weights = checks.check_file(weights) # automatically download and return local filename
weights = checks.check_model_file_from_stem(weights) # add suffix, i.e. yolov8n -> yolov8n.pt
if Path(weights).suffix == ".pt":
self.model, self.ckpt = attempt_load_one_weight(weights)
self.task = self.model.args["task"]
self.overrides = self.model.args = self._reset_ckpt_args(self.model.args)
self.ckpt_path = self.model.pt_path
else:
weights = checks.check_file(weights) # runs in all cases, not redundant with above call
self.model, self.ckpt = weights, None
self.task = task or guess_model_task(weights)
self.ckpt_path = weights
self.overrides["model"] = weights
self.overrides["task"] = self.task
self.model_name = weights
# 添加保存权重值代码
self.model.fuse()
self.model.eval()
torch.save(self.model.state_dict(),"./yolov8n-pose-data.pt")
print("======================== save pt weights Finished! .... ")
(2)新建建savept.py,添加如下代码进行模型测试的同时实现模型的保存,代码如下:
from ultralytics import YOLO
import time
# Initialize a model
model = YOLO("./yolov8n-pose.pt")
# 推理
results = model(task='pose', mode='predict', source='/home/av/newdisk/code/ultralytics_yolov8_pose_8_2_0/bus.jpg', line_width=3, show=True, save=False, device='cpu')
time.sleep(5)
最终,得到权重模型yolov8n-pose-data.pt
2.仅含权重信息的pt模型转onnx
(1)修改ultralytics/engine/model.py中的_new函数中添加如下保存权重模型代码,完整代码如下:
def _new(self, cfg: str, task=None, model=None, verbose=False) -> None:
"""
Initializes a new model and infers the task type from the model definitions.
Args:
cfg (str): model configuration file
task (str | None): model task
model (BaseModel): Customized model.
verbose (bool): display model info on load
"""
cfg_dict = yaml_model_load(cfg)
self.cfg = cfg
self.task = task or guess_model_task(cfg_dict)
self.model = (model or self._smart_load("model"))(cfg_dict, verbose=verbose and RANK == -1) # build model
self.overrides["model"] = self.cfg
self.overrides["task"] = self.task
# Below added to allow export from YAMLs
self.model.args = {**DEFAULT_CFG_DICT, **self.overrides} # combine default and model args (prefer model args)
self.model.task = self.task
self.model_name = cfg
#-----添加导出onnx代码------------
self.model.fuse()
self.model.eval()
self.model.load_state_dict(torch.load("./yolov8n-pose-data.pt", map_location='cpu'), strict=False)
print("=========== onnx =========== ")
dummy_input = torch.randn(1, 3, 640, 640)
input_names = ["data"]
output_names = ["cls1", "reg1", "cls2", "reg2", "cls3", "reg3", "ps1", "ps2", "ps3"]
torch.onnx.export(self.model, dummy_input,"./yolov8n-pose-data.onnx", verbose=False, input_names=input_names, output_names=output_names, opset_version=11)
print("======================== convert onnx Finished! .... ")
(2) 修改ultralytics/nn/modules/head.py下的目标检测Detect头和Pose姿态检测头,修改后的完整代码如下,直接原来的即可。
#--add---export onnx----
class Detect(nn.Module):
"""YOLOv8 Detect head for detection models."""
dynamic = False # force grid reconstruction
export = False # export mode
shape = None
anchors = torch.empty(0) # init
strides = torch.empty(0) # init
def __init__(self, nc=80, ch=()): # detection layer
super().__init__()
self.nc = nc # number of classes
self.nl = len(ch) # number of detection layers
self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
self.no = nc + self.reg_max * 4 # number of outputs per anchor
self.stride = torch.zeros(self.nl) # strides computed during build
c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels
self.cv2 = nn.ModuleList(
nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
# 导出 onnx 增加
self.conv1x1 = nn.Conv2d(16, 1, 1, bias=False).requires_grad_(False)
x = torch.arange(16, dtype=torch.float)
self.conv1x1.weight.data[:] = nn.Parameter(x.view(1, 16, 1, 1))
def forward(self, x):
"""Concatenates and returns predicted bounding boxes and class probabilities."""
shape = x[0].shape # BCHW
y = []
for i in range(self.nl):
t1 = self.cv2[i](x[i])
t2 = self.cv3[i](x[i])
y.append(self.conv1x1(t1.view(t1.shape[0], 4, 16, -1).transpose(2, 1).softmax(1)))
# y.append(t2.sigmoid())
y.append(t2)
return y
for i in range(self.nl):
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
if self.training:
return x
elif self.dynamic or self.shape != shape:
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'): # avoid TF FlexSplitV ops
box = x_cat[:, :self.reg_max * 4]
cls = x_cat[:, self.reg_max * 4:]
else:
box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
y = torch.cat((dbox, cls.sigmoid()), 1) # 官方代码
# y = torch.cat((self.dfl(box), cls.sigmoid()), 1) # 导出本实例的onnx使用
return y if self.export else (y, x)
def bias_init(self):
"""Initialize Detect() biases, WARNING: requires stride availability."""
m = self # self.model[-1] # Detect() module
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
# ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency
for a, b, s in zip(m.cv2, m.cv3, m.stride): # from
a[-1].bias.data[:] = 1.0 # box
b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img)
#-----zs-add export onnx-------
class Pose(Detect):
"""YOLOv8 Pose head for keypoints models."""
def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
"""Initialize YOLO network with default parameters and Convolutional Layers."""
super().__init__(nc, ch)
self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total
self.detect = Detect.forward
c4 = max(ch[0] // 4, self.nk)
self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
def forward(self, x):
"""Perform forward pass through YOLO model and return predictions."""
bs = x[0].shape[0] # batch size
# kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
ps = []
for i in range(self.nl):
ps.append(self.cv4[i](x[i]))
x = self.detect(self, x)
return x, ps
if self.training:
return x, kpt
pred_kpt = self.kpts_decode(bs, kpt)
return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
def kpts_decode(self, bs, kpts):
"""Decodes keypoints."""
ndim = self.kpt_shape[1]
if self.export: # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
y = kpts.view(bs, *self.kpt_shape, -1)
a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
if ndim == 3:
a = torch.cat((a, y[:, :, 1:2].sigmoid()), 2)
return a.view(bs, self.nk, -1)
else:
y = kpts.clone()
if ndim == 3:
y[:, 2::3].sigmoid_() # inplace sigmoid
y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
return y
(3)新建export_onnx.py ,添加如下代码,执行后生成转换后的onnx模型。
from ultralytics import YOLO
model = YOLO('./ultralytics/cfg/models/v8/yolov8n-pose.yaml')
3.简化转换后的onnx
为了使转换后的模型能够顺利转换为rv1126的rknn模型,这里使用onnxsim将onnx模型进行简化
新建simonnx.py,添加如下代码,执行后得到简化后的onnx模型
import onnxsim
import onnx
print('Simplifying the ONNX model')
model_onnx = onnx.load("./yolov8n-pose-data.onnx")
model_onnx, _ = onnxsim.simplify(model_onnx)
onnx.save(model_onnx, "./yolov8n-pose-data-smi.onnx")
最终,得到简化后的模型yolov8n-pose-data-smi.onnx
三、onnx转rknn
采用rknn_toolkit工具构建转换程序,新建onnx2rknn.py,添加如下代码:
import os
import urllib
import traceback
import time
import sys
import numpy as np
import cv2
from rknn.api import RKNN
from math import exp
ONNX_MODEL = './yolov8n-pose-data-smi.onnx'
RKNN_MODEL = './yolov8n_pose_pre.rknn'
DATASET = './dataset.txt'
QUANTIZE_ON = True
DEFAULT_PRE_compile = True #是否预编译
color_list = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11],
[2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
CLASSES = ['person']
meshgrid = []
class_num = len(CLASSES)
headNum = 3
keypoint_num = 17
strides = [8, 16, 32]
mapSize = [[80, 80], [40, 40], [20, 20]]
nmsThresh = 0.55
objectThresh = 0.5
input_imgH = 640
input_imgW = 640
class DetectBox:
def __init__(self, classId, score, xmin, ymin, xmax, ymax, pose):
self.classId = classId
self.score = score
self.xmin = xmin
self.ymin = ymin
self.xmax = xmax
self.ymax = ymax
self.pose = pose
def GenerateMeshgrid():
for index in range(headNum):
for i in range(mapSize[index][0]):
for j in range(mapSize[index][1]):
meshgrid.append(j + 0.5)
meshgrid.append(i + 0.5)
def IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2):
xmin = max(xmin1, xmin2)
ymin = max(ymin1, ymin2)
xmax = min(xmax1, xmax2)
ymax = min(ymax1, ymax2)
innerWidth = xmax - xmin
innerHeight = ymax - ymin
innerWidth = innerWidth if innerWidth > 0 else 0
innerHeight = innerHeight if innerHeight > 0 else 0
innerArea = innerWidth * innerHeight
area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
area2 = (xmax2 - xmin2) * (ymax2 - ymin2)
total = area1 + area2 - innerArea
return innerArea / total
def NMS(detectResult):
predBoxs = []
sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)
for i in range(len(sort_detectboxs)):
xmin1 = sort_detectboxs[i].xmin
ymin1 = sort_detectboxs[i].ymin
xmax1 = sort_detectboxs[i].xmax
ymax1 = sort_detectboxs[i].ymax
classId = sort_detectboxs[i].classId
if sort_detectboxs[i].classId != -1:
predBoxs.append(sort_detectboxs[i])
for j in range(i + 1, len(sort_detectboxs), 1):
if classId == sort_detectboxs[j].classId:
xmin2 = sort_detectboxs[j].xmin
ymin2 = sort_detectboxs[j].ymin
xmax2 = sort_detectboxs[j].xmax
ymax2 = sort_detectboxs[j].ymax
iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2)
if iou > nmsThresh:
sort_detectboxs[j].classId = -1
return predBoxs
def sigmoid(x):
return 1 / (1 + exp(-x))
def postprocess(out, img_h, img_w):
print('postprocess ... ')
detectResult = []
output = []
for i in range(len(out)):
output.append(out[i].reshape((-1)))
scale_h = img_h / input_imgH
scale_w = img_w / input_imgW
gridIndex = -2
for index in range(headNum):
reg = output[index * 2 + 0]
cls = output[index * 2 + 1]
pose = output[headNum * 2 + index]
for h in range(mapSize[index][0]):
for w in range(mapSize[index][1]):
gridIndex += 2
for cl in range(class_num):
cls_val = sigmoid(cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
if cls_val > objectThresh:
x1 = (meshgrid[gridIndex + 0] - reg[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) * strides[index]
y1 = (meshgrid[gridIndex + 1] - reg[1 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) * strides[index]
x2 = (meshgrid[gridIndex + 0] + reg[2 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) * strides[index]
y2 = (meshgrid[gridIndex + 1] + reg[3 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) * strides[index]
xmin = x1 * scale_w
ymin = y1 * scale_h
xmax = x2 * scale_w
ymax = y2 * scale_h
xmin = xmin if xmin > 0 else 0
ymin = ymin if ymin > 0 else 0
xmax = xmax if xmax < img_w else img_w
ymax = ymax if ymax < img_h else img_h
poseResult = []
for kc in range(keypoint_num):
px = pose[(kc * 3 + 0) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]
py = pose[(kc * 3 + 1) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]
vs = sigmoid(pose[(kc * 3 + 2) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])
x = (px * 2.0 + (meshgrid[gridIndex + 0] - 0.5)) * strides[index] * scale_w
y = (py * 2.0 + (meshgrid[gridIndex + 1] - 0.5)) * strides[index] * scale_h
poseResult.append(vs)
poseResult.append(x)
poseResult.append(y)
# print(poseResult)
box = DetectBox(cl, cls_val, xmin, ymin, xmax, ymax, poseResult)
detectResult.append(box)
# NMS
print('detectResult:', len(detectResult))
predBox = NMS(detectResult)
return predBox
def export_rknn_inference(img):
# Create RKNN object
rknn = RKNN(verbose=True)
# pre-process config
print('--> Config model')
rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rv1126')
print('done')
# Load ONNX model
print('--> Loading model')
ret = rknn.load_onnx(model=ONNX_MODEL,outputs=["cls1", "reg1", "cls2", "reg2", "cls3", "reg3", "ps1", "ps2", "ps3"])
if ret != 0:
print('Load model failed!')
exit(ret)
print('done')
# Build model
print('--> Building model')
#ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET, rknn_batch_size=1)
ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET,pre_compile=DEFAULT_PRE_compile)
if ret != 0:
print('Build model failed!')
exit(ret)
print('done')
# Export RKNN model
print('--> Export rknn model')
ret = rknn.export_rknn(RKNN_MODEL)
if ret != 0:
print('Export rknn model failed!')
exit(ret)
print('done')
# Init runtime environment
print('--> Init runtime environment')
ret = rknn.init_runtime()
# ret = rknn.init_runtime(target='rk3566')
if ret != 0:
print('Init runtime environment failed!')
exit(ret)
print('done')
# Inference
print('--> Running model')
outputs = rknn.inference(inputs=[img])
rknn.release()
print('done')
return outputs
if __name__ == '__main__':
print('This is main ...')
GenerateMeshgrid()
img_path = './bus.jpg'
orig_img = cv2.imread(img_path)
img_h, img_w = orig_img.shape[:2]
origimg = cv2.resize(orig_img, (input_imgW, input_imgH), interpolation=cv2.INTER_LINEAR)
origimg = cv2.cvtColor(origimg, cv2.COLOR_BGR2RGB)
img = np.expand_dims(origimg, 0)
outputs = export_rknn_inference(img)
out = []
for i in range(len(outputs)):
out.append(outputs[i])
predbox = postprocess(out, img_h, img_w)
print('obj num is :', len(predbox))
for i in range(len(predbox)):
xmin = int(predbox[i].xmin)
ymin = int(predbox[i].ymin)
xmax = int(predbox[i].xmax)
ymax = int(predbox[i].ymax)
classId = predbox[i].classId
score = predbox[i].score
cv2.rectangle(orig_img, (xmin, ymin), (xmax, ymax), (128, 128, 0), 2)
ptext = (xmin, ymin)
title = CLASSES[classId] + "%.2f" % score
cv2.putText(orig_img, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)
pose = predbox[i].pose
for i in range(0, keypoint_num):
if pose[i * 3] > 0.5:
if i < 5:
color = color_list[0]
elif 5 <= i < 12:
color = color_list[1]
else:
color = color_list[2]
cv2.circle(orig_img, (int(pose[i * 3 + 1]), int(pose[i * 3 + 2])), 2, color, 5)
for i, sk in enumerate(skeleton):
if pose[(sk[0] - 1) * 3] > 0.5:
pos1 = (int(pose[(sk[0] - 1) * 3 + 1]), int(pose[(sk[0] - 1) * 3 + 2]))
pos2 = (int(pose[(sk[1] - 1) * 3 + 1]), int(pose[(sk[1] - 1) * 3 + 2]))
if (sk[0] - 1) < 5:
color = color_list[0]
elif 5 <= sk[0] - 1 < 12:
color = color_list[1]
else:
color = color_list[2]
cv2.line(orig_img, pos1, pos2, color, thickness=2, lineType=cv2.LINE_AA)
cv2.imwrite('./test_rknn_result.jpg', orig_img)
# cv2.imshow("test", orig_img)
# cv2.waitKey(0)
执行后,得到转换后的rknn,测试图片的识别结果如下: