yolov8pose在rv1126上的模型转换及部署测试

jefferysheng

已于 2024-08-09 08:26:12 修改

阅读量2.1k

点赞数 50

文章标签： YOLO 深度学习计算机视觉 yolov8

于 2024-08-08 18:42:20 首次发布

本文链接：https://blog.csdn.net/baidu_41284315/article/details/141032554

版权

提示：文章写完后，目录可以自动生成，如何生成可参考右边的帮助文档

文章目录

前言
一、训练pt模型文件
二、pt转onnx
三、onnx转rknn
五、参考链接

前言

随着人工智能技术的不断发展，计算机视觉领域的研究和应用也越来越广泛。其中，关键点检测作为计算机视觉领域的一个重要分支，被广泛应用于手势识别、人机交互、虚拟现实等领域。而Yolov8-pose作为一种高效、快速的关键点检测算法，已经在很多领域得到了广泛的应用。本文将介绍如何将Yolov8-pose训练的pt模型转换至rknn 应用至rv1126平台。

特别说明：如有侵权告知删除，谢谢。

一、训练pt模型文件

训练代码参考官方开源的yolov8pose 训练代码，得到pt模型文件，本文采用的为8.2.0版本。

git clone https://github.com/ultralytics/ultralytics.git

二、pt转onnx

1.将原始pt模型保存在仅含权重信息的pt模型

（1）修改ultralytics/engine/model.py中的_load函数中添加如下保存权重模型代码：

def _load(self, weights: str, task=None) -> None:
        """
        Initializes a new model and infers the task type from the model head.

        Args:
            weights (str): model checkpoint to be loaded
            task (str | None): model task
        """
        
        if weights.lower().startswith(("https://", "http://", "rtsp://", "rtmp://", "tcp://")):
            weights = checks.check_file(weights)  # automatically download and return local filename
        weights = checks.check_model_file_from_stem(weights)  # add suffix, i.e. yolov8n -> yolov8n.pt

        if Path(weights).suffix == ".pt":
            self.model, self.ckpt = attempt_load_one_weight(weights)
            self.task = self.model.args["task"]
            self.overrides = self.model.args = self._reset_ckpt_args(self.model.args)
            self.ckpt_path = self.model.pt_path
        else:
            weights = checks.check_file(weights)  # runs in all cases, not redundant with above call
            self.model, self.ckpt = weights, None
            self.task = task or guess_model_task(weights)
            self.ckpt_path = weights
        self.overrides["model"] = weights
        self.overrides["task"] = self.task
        self.model_name = weights
        
        
          # 添加保存权重值代码
          self.model.fuse()
          self.model.eval()
          torch.save(self.model.state_dict(),"./yolov8n-pose-data.pt")
          print("======================== save pt weights Finished! .... ")

（2）新建建savept.py，添加如下代码进行模型测试的同时实现模型的保存，代码如下：

from ultralytics import YOLO
import time
# Initialize a model
model = YOLO("./yolov8n-pose.pt")  
# 推理
results = model(task='pose', mode='predict', source='/home/av/newdisk/code/ultralytics_yolov8_pose_8_2_0/bus.jpg', line_width=3, show=True, save=False, device='cpu')
time.sleep(5)

最终，得到权重模型yolov8n-pose-data.pt

2.仅含权重信息的pt模型转onnx

（1）修改ultralytics/engine/model.py中的_new函数中添加如下保存权重模型代码，完整代码如下：

 def _new(self, cfg: str, task=None, model=None, verbose=False) -> None:
        """
        Initializes a new model and infers the task type from the model definitions.

        Args:
            cfg (str): model configuration file
            task (str | None): model task
            model (BaseModel): Customized model.
            verbose (bool): display model info on load
        """
        cfg_dict = yaml_model_load(cfg)
        self.cfg = cfg
        self.task = task or guess_model_task(cfg_dict)
        self.model = (model or self._smart_load("model"))(cfg_dict, verbose=verbose and RANK == -1)  # build model
        self.overrides["model"] = self.cfg
        self.overrides["task"] = self.task

        # Below added to allow export from YAMLs
        self.model.args = {**DEFAULT_CFG_DICT, **self.overrides}  # combine default and model args (prefer model args)
        self.model.task = self.task
        self.model_name = cfg

     
          #-----添加导出onnx代码------------
          self.model.fuse()
          self.model.eval()
          self.model.load_state_dict(torch.load("./yolov8n-pose-data.pt", map_location='cpu'), strict=False)

          print("===========  onnx =========== ")
          dummy_input = torch.randn(1, 3, 640, 640)
          input_names = ["data"]
          output_names = ["cls1", "reg1", "cls2", "reg2", "cls3", "reg3", "ps1", "ps2", "ps3"]
          torch.onnx.export(self.model, dummy_input,"./yolov8n-pose-data.onnx", verbose=False, input_names=input_names, output_names=output_names, opset_version=11)
          print("======================== convert onnx Finished! .... ")

(2) 修改ultralytics/nn/modules/head.py下的目标检测Detect头和Pose姿态检测头，修改后的完整代码如下，直接原来的即可。

 #--add---export onnx----
    class Detect(nn.Module):
        """YOLOv8 Detect head for detection models."""
        dynamic = False  # force grid reconstruction
        export = False  # export mode
        shape = None
        anchors = torch.empty(0)  # init
        strides = torch.empty(0)  # init

        def __init__(self, nc=80, ch=()):  # detection layer
            super().__init__()
            self.nc = nc  # number of classes
            self.nl = len(ch)  # number of detection layers
            self.reg_max = 16  # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
            self.no = nc + self.reg_max * 4  # number of outputs per anchor
            self.stride = torch.zeros(self.nl)  # strides computed during build
            c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc)  # channels
            self.cv2 = nn.ModuleList(
                nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch)
            self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
            self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()

            # 导出 onnx 增加
            self.conv1x1 = nn.Conv2d(16, 1, 1, bias=False).requires_grad_(False)
            x = torch.arange(16, dtype=torch.float)
            self.conv1x1.weight.data[:] = nn.Parameter(x.view(1, 16, 1, 1))

        def forward(self, x):
            """Concatenates and returns predicted bounding boxes and class probabilities."""
            shape = x[0].shape  # BCHW
            y = []
            for i in range(self.nl):
                t1 = self.cv2[i](x[i])
                t2 = self.cv3[i](x[i])
                y.append(self.conv1x1(t1.view(t1.shape[0], 4, 16, -1).transpose(2, 1).softmax(1)))
                # y.append(t2.sigmoid())
                y.append(t2)
            return y

            for i in range(self.nl):
                x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
            if self.training:
                return x
            elif self.dynamic or self.shape != shape:
                self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
                self.shape = shape

            x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
            if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'):  # avoid TF FlexSplitV ops
                box = x_cat[:, :self.reg_max * 4]
                cls = x_cat[:, self.reg_max * 4:]
            else:
                box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
            dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
            y = torch.cat((dbox, cls.sigmoid()), 1)           # 官方代码
            # y = torch.cat((self.dfl(box), cls.sigmoid()), 1)    # 导出本实例的onnx使用
            return y if self.export else (y, x)

        def bias_init(self):
            """Initialize Detect() biases, WARNING: requires stride availability."""
            m = self  # self.model[-1]  # Detect() module
            # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
            # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # nominal class frequency
            for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
                a[-1].bias.data[:] = 1.0  # box
                b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)

    #-----zs-add  export onnx-------
    class Pose(Detect):
        """YOLOv8 Pose head for keypoints models."""

        def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
            """Initialize YOLO network with default parameters and Convolutional Layers."""
            super().__init__(nc, ch)
            self.kpt_shape = kpt_shape  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
            self.nk = kpt_shape[0] * kpt_shape[1]  # number of keypoints total
            self.detect = Detect.forward

            c4 = max(ch[0] // 4, self.nk)
            self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)

        def forward(self, x):
            """Perform forward pass through YOLO model and return predictions."""
            bs = x[0].shape[0]  # batch size
            # kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1)  # (bs, 17*3, h*w)

            ps = []
            for i in range(self.nl):
                ps.append(self.cv4[i](x[i]))
            x = self.detect(self, x)
            return x, ps
            if self.training:
                return x, kpt
            pred_kpt = self.kpts_decode(bs, kpt)
            return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))

        def kpts_decode(self, bs, kpts):
            """Decodes keypoints."""
            ndim = self.kpt_shape[1]
            if self.export:  # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
                y = kpts.view(bs, *self.kpt_shape, -1)
                a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
                if ndim == 3:
                    a = torch.cat((a, y[:, :, 1:2].sigmoid()), 2)
                return a.view(bs, self.nk, -1)
            else:
                y = kpts.clone()
                if ndim == 3:
                    y[:, 2::3].sigmoid_()  # inplace sigmoid
                y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
                y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
                return y

（3）新建export_onnx.py ,添加如下代码，执行后生成转换后的onnx模型。

from ultralytics import YOLO
model = YOLO('./ultralytics/cfg/models/v8/yolov8n-pose.yaml')

3.简化转换后的onnx

为了使转换后的模型能够顺利转换为rv1126的rknn模型，这里使用onnxsim将onnx模型进行简化
新建simonnx.py,添加如下代码，执行后得到简化后的onnx模型

import onnxsim
import onnx

print('Simplifying the ONNX model')
model_onnx = onnx.load("./yolov8n-pose-data.onnx")
model_onnx, _ = onnxsim.simplify(model_onnx)
onnx.save(model_onnx, "./yolov8n-pose-data-smi.onnx")

最终，得到简化后的模型yolov8n-pose-data-smi.onnx

三、onnx转rknn

采用rknn_toolkit工具构建转换程序，新建onnx2rknn.py，添加如下代码：

import os
import urllib
import traceback
import time
import sys
import numpy as np
import cv2
from rknn.api import RKNN
from math import exp

ONNX_MODEL = './yolov8n-pose-data-smi.onnx'
RKNN_MODEL = './yolov8n_pose_pre.rknn'
DATASET = './dataset.txt'

QUANTIZE_ON = True
DEFAULT_PRE_compile = True  #是否预编译 

color_list = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11],
            [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]

CLASSES = ['person']

meshgrid = []

class_num = len(CLASSES)
headNum = 3
keypoint_num = 17

strides = [8, 16, 32]
mapSize = [[80, 80], [40, 40], [20, 20]]
nmsThresh = 0.55
objectThresh = 0.5

input_imgH = 640
input_imgW = 640


class DetectBox:
    def __init__(self, classId, score, xmin, ymin, xmax, ymax, pose):
        self.classId = classId
        self.score = score
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        self.pose = pose


def GenerateMeshgrid():
    for index in range(headNum):
        for i in range(mapSize[index][0]):
            for j in range(mapSize[index][1]):
                meshgrid.append(j + 0.5)
                meshgrid.append(i + 0.5)


def IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2):
    xmin = max(xmin1, xmin2)
    ymin = max(ymin1, ymin2)
    xmax = min(xmax1, xmax2)
    ymax = min(ymax1, ymax2)

    innerWidth = xmax - xmin
    innerHeight = ymax - ymin

    innerWidth = innerWidth if innerWidth > 0 else 0
    innerHeight = innerHeight if innerHeight > 0 else 0

    innerArea = innerWidth * innerHeight

    area1 = (xmax1 - xmin1) * (ymax1 - ymin1)
    area2 = (xmax2 - xmin2) * (ymax2 - ymin2)

    total = area1 + area2 - innerArea

    return innerArea / total


def NMS(detectResult):
    predBoxs = []

    sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)

    for i in range(len(sort_detectboxs)):
        xmin1 = sort_detectboxs[i].xmin
        ymin1 = sort_detectboxs[i].ymin
        xmax1 = sort_detectboxs[i].xmax
        ymax1 = sort_detectboxs[i].ymax
        classId = sort_detectboxs[i].classId

        if sort_detectboxs[i].classId != -1:
            predBoxs.append(sort_detectboxs[i])
            for j in range(i + 1, len(sort_detectboxs), 1):
                if classId == sort_detectboxs[j].classId:
                    xmin2 = sort_detectboxs[j].xmin
                    ymin2 = sort_detectboxs[j].ymin
                    xmax2 = sort_detectboxs[j].xmax
                    ymax2 = sort_detectboxs[j].ymax
                    iou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2)
                    if iou > nmsThresh:
                        sort_detectboxs[j].classId = -1
    return predBoxs


def sigmoid(x):
    return 1 / (1 + exp(-x))


def postprocess(out, img_h, img_w):
    print('postprocess ... ')

    detectResult = []

    output = []
    for i in range(len(out)):
        output.append(out[i].reshape((-1)))

    scale_h = img_h / input_imgH
    scale_w = img_w / input_imgW

    gridIndex = -2

    for index in range(headNum):
        reg = output[index * 2 + 0]
        cls = output[index * 2 + 1]
        pose = output[headNum * 2 + index]

        for h in range(mapSize[index][0]):
            for w in range(mapSize[index][1]):
                gridIndex += 2

                for cl in range(class_num):
                    cls_val = sigmoid(cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])

                    if cls_val > objectThresh:
                        x1 = (meshgrid[gridIndex + 0] - reg[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) * strides[index]
                        y1 = (meshgrid[gridIndex + 1] - reg[1 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) * strides[index]
                        x2 = (meshgrid[gridIndex + 0] + reg[2 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) * strides[index]
                        y2 = (meshgrid[gridIndex + 1] + reg[3 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]) * strides[index]

                        xmin = x1 * scale_w
                        ymin = y1 * scale_h
                        xmax = x2 * scale_w
                        ymax = y2 * scale_h

                        xmin = xmin if xmin > 0 else 0
                        ymin = ymin if ymin > 0 else 0
                        xmax = xmax if xmax < img_w else img_w
                        ymax = ymax if ymax < img_h else img_h

                        poseResult = []
                        for kc in range(keypoint_num):
                            px = pose[(kc * 3 + 0) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]
                            py = pose[(kc * 3 + 1) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]
                            vs = sigmoid(pose[(kc * 3 + 2) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])

                            x = (px * 2.0 + (meshgrid[gridIndex + 0] - 0.5)) * strides[index] * scale_w
                            y = (py * 2.0 + (meshgrid[gridIndex + 1] - 0.5)) * strides[index] * scale_h

                            poseResult.append(vs)
                            poseResult.append(x)
                            poseResult.append(y)
                        # print(poseResult)
                        box = DetectBox(cl, cls_val, xmin, ymin, xmax, ymax, poseResult)
                        detectResult.append(box)

    # NMS
    print('detectResult:', len(detectResult))
    predBox = NMS(detectResult)

    return predBox


def export_rknn_inference(img):
    # Create RKNN object
    rknn = RKNN(verbose=True)

    # pre-process config
    print('--> Config model')
    rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rv1126')
    print('done')

    # Load ONNX model
    print('--> Loading model')                          
    ret = rknn.load_onnx(model=ONNX_MODEL,outputs=["cls1", "reg1", "cls2", "reg2", "cls3", "reg3", "ps1", "ps2", "ps3"])
    if ret != 0:
        print('Load model failed!')
        exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    #ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET, rknn_batch_size=1)
    ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET,pre_compile=DEFAULT_PRE_compile)
    if ret != 0:
        print('Build model failed!')
        exit(ret)
    print('done')

    # Export RKNN model
    print('--> Export rknn model')
    ret = rknn.export_rknn(RKNN_MODEL)
    if ret != 0:
        print('Export rknn model failed!')
        exit(ret)
    print('done')

    # Init runtime environment
    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    # ret = rknn.init_runtime(target='rk3566')
    if ret != 0:
        print('Init runtime environment failed!')
        exit(ret)
    print('done')

    # Inference
    print('--> Running model')
    outputs = rknn.inference(inputs=[img])
    rknn.release()
    print('done')

    return outputs


if __name__ == '__main__':
    print('This is main ...')
    GenerateMeshgrid()
    img_path = './bus.jpg'
    orig_img = cv2.imread(img_path)
    img_h, img_w = orig_img.shape[:2]
    
    
    origimg = cv2.resize(orig_img, (input_imgW, input_imgH), interpolation=cv2.INTER_LINEAR)
    origimg = cv2.cvtColor(origimg, cv2.COLOR_BGR2RGB)
    
    img = np.expand_dims(origimg, 0)

    outputs = export_rknn_inference(img)

    out = []
    for i in range(len(outputs)):
        out.append(outputs[i])
    predbox = postprocess(out, img_h, img_w)

    print('obj num is :', len(predbox))

    for i in range(len(predbox)):
        xmin = int(predbox[i].xmin)
        ymin = int(predbox[i].ymin)
        xmax = int(predbox[i].xmax)
        ymax = int(predbox[i].ymax)
        classId = predbox[i].classId
        score = predbox[i].score

        cv2.rectangle(orig_img, (xmin, ymin), (xmax, ymax), (128, 128, 0), 2)
        ptext = (xmin, ymin)
        title = CLASSES[classId] + "%.2f" % score
        cv2.putText(orig_img, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)

        pose = predbox[i].pose
        for i in range(0, keypoint_num):
            if pose[i * 3] > 0.5:
                if i < 5:
                    color = color_list[0]
                elif 5 <= i < 12:
                    color = color_list[1]
                else:
                    color = color_list[2]
                cv2.circle(orig_img, (int(pose[i * 3 + 1]), int(pose[i * 3 + 2])), 2, color, 5)

        for i, sk in enumerate(skeleton):
            if pose[(sk[0] - 1) * 3] > 0.5:
                pos1 = (int(pose[(sk[0] - 1) * 3 + 1]), int(pose[(sk[0] - 1) * 3 + 2]))
                pos2 = (int(pose[(sk[1] - 1) * 3 + 1]), int(pose[(sk[1] - 1) * 3 + 2]))
                if (sk[0] - 1) < 5:
                    color = color_list[0]
                elif 5 <= sk[0] - 1 < 12:
                    color = color_list[1]
                else:
                    color = color_list[2]
                cv2.line(orig_img, pos1, pos2, color, thickness=2, lineType=cv2.LINE_AA)

    cv2.imwrite('./test_rknn_result.jpg', orig_img)
    # cv2.imshow("test", orig_img)
    # cv2.waitKey(0)