1.上几篇文件写了,怎么自定义自己的yolo数据,怎么训练自己自定义的数据,怎么用onnxruntime调用yolo导出的onnx模型,这文章进一步写用onnxruntime调用yolo导出的onnx模型进行图片分割。
视频讲解地址:https://www.bilibili.com/video/BV1Y1421Z7LS/?spm_id_from=333.999.0.0&vd_source=a858ab6f3e2b18f287232b03ba9022e2
公众号地址:
https://mp.weixin.qq.com/s?__biz=MzkzNzYyNTE0Ng==&mid=2247483776&idx=1&sn=ad1b4a1e7715257acd347965dc76ee09&chksm=c28dd2d5f5fa5bc39749fb8770ee9480dcdc18313df3516f640770fcf58493cead77d8998dc6&token=909058031&lang=zh_CN#rd
2.这里直接上核心代码
'''
类名:ImageDealWith
说明:图片处理类
功能:
1.加载图片
注意:
文件中已经建立_image_deal_with=ImageDealWith() 可以直接调用模块使用
著作权信息:
作者:照彩云归
联系方式:
版本信息:
日期:2023-12-25
版本:v1.0.0
描述:基本类
'''
import math
import cv2
from PySide6.QtCore import QObject
from PySide6.QtGui import QImage,QPixmap,QPainter,QImageReader
from ultralytics import YOLO
import cv2 as cv
import numpy as np
import torch
import onnxruntime as ort
import onnx
import torch
class ImageDealWith(QObject):
'''图片'''
CurrentImage=None
'''模板'''
Model=None
'''onnx模板'''
OnnxModel=None
###识别框的坐标
BoxX = None
BoxY = None
###识别框的长宽
BoxWidth = None
BoxHeight = None
###识别框的类别
BoxClass = None
###onnx的阈值
ConfidenceThres=None
###onnx的iou
Iou=None
###多边形x y
PolygonsXY=None
###yolo识别类
Yolo_classes=None
###当前图片的大小
Img_width=0
Img_height=0
#初始化
def __init__(self,parent=None):
super(ImageDealWith,self).__init__(parent)
self.CurrentImage=QImage()
self.Model=YOLO('yolov8m-seg.pt')
self.OnnxModel = ort.InferenceSession('yolov8m-seg.onnx')
###这里初始化列表才有用
self.BoxY = []
self.BoxX = []
self.BoxWidth = []
self.BoxHeight = []
self.BoxClass = []
self.ConfidenceThres=0.5
self.Iou=20
self.PolygonsXY=[]
self.Yolo_classes = [
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
"sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
"suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
"skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon",
"bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut",
"cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
"clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
]
###图片大小
self.Img_width=0
self.Img_height=0
pass
'''
读取图片
'''
def LoadImage(self,image_path_):
read_dir_=QImageReader(image_path_)
self.CurrentImage=read_dir_.read()
self.Img_height=self.CurrentImage.height()
self.Img_width=self.CurrentImage.width()
pass
'''
图片分析
'''
def ImageAnalyze(self):
self.BoxY.clear()
self.BoxX.clear()
self.BoxWidth.clear()
self.BoxHeight.clear()
self.BoxClass.clear()
self.PolygonsXY.clear()
if (self.CurrentImage.width() > 0) & (self.CurrentImage.height() > 0):
image_mat_ = self.CoverQImageToMat()
results_=self.Model(image_mat_)
len(results_)
for result_ in results_:
boxes_ = result_.boxes
masks = result_.masks
#names_=results_.names
#获取对象边界
for box_ in boxes_:
x_,y_,w_,h_=box_.xywh[0]
c=box_.cls
cls_=self.Model.names[int(c)]
self.BoxX.append(float(x_))
self.BoxY.append(float(y_))
self.BoxWidth.append(float(w_))
self.BoxHeight.append(float(h_))
self.BoxClass.append(cls_)
###获取对象掩码
for mask in masks:
xys_=mask.xy[0]
self.PolygonsXY.append(xys_)
pass
pass
###Qimage转Mat
def CoverQImageToMat(self):
ptr_=self.CurrentImage.constBits()
# ptr_.setsize(self.CurrentImage.byteCount())
###QImage内部是按照每个像素4个字节的方式组织数据的,即使最后一个alpha通道没有使用,也用0xff来填充
image_mat_=np.array(ptr_).reshape(self.CurrentImage.height(),self.CurrentImage.width(),4)
image_mat_rgb_=cv2.cvtColor(image_mat_,cv2.COLOR_RGBA2RGB)
return image_mat_rgb_
###Mat转成QImage
def CoverMatToQImage(self,image_mat:cv.Mat):
qimage_=QImage(image_mat.data,
image_mat.shape[1],
image_mat.shape[0],
image_mat.shape[1]*3,
QImage.Format.Format_RGB888)
return qimage_
###清空结果
def ClearResult(self):
self.PolygonsXY.clear()
self.BoxY.clear()
self.BoxX.clear()
self.BoxWidth.clear()
self.BoxHeight.clear()
self.BoxClass.clear()
###导出onnx
def ExportOnnx(self):
self.Model.export(format='onnx')
# device = torch.device('cpu')
# net=torch.load('best.pt',map_location='cpu')
# dummpy_input_=torch.randn(1,3,640,640)
# torch.onnx.export(net,
# dummpy_input_,
# 'best.onnx',
# export_params=True,
# input_names=['input'],
# output_names=['output'],
# opset_version=11)
pass
###使用onnx推理
def UseOnnxInference(self):
if (self.CurrentImage.width() > 0) & (self.CurrentImage.height() > 0):
self.PolygonsXY.clear()
self.BoxY.clear()
self.BoxX.clear()
self.BoxWidth.clear()
self.BoxHeight.clear()
self.BoxClass.clear()
#获取输入点
inputs =self.OnnxModel.get_inputs()
len(inputs)
input_onnx_=inputs[0]
print("Name:",input_onnx_.name)##输入节点名称
print("Type:", input_onnx_.type)##数据格式
print("Shape:",input_onnx_.shape)###数据维度
image_mat_ = self.CoverQImageToMat() ###图片转换
print("ImageMatShape:",image_mat_.shape)
##获取图片输入输出
self.Img_width = image_mat_.shape[1]
self.Img_height = image_mat_.shape[0]
target_image_height_ = 640
target_image_width_ = 640
# scale_precentage_=min(target_image_height_/image_mat_.shape[0],target_image_width_/image_mat_.shape[1])
scale_precentage_x_=target_image_width_/image_mat_.shape[1]
scale_precentage_y_ = target_image_height_ / image_mat_.shape[0]
image_mat_=cv2.resize(image_mat_,None,fx=scale_precentage_x_,fy=scale_precentage_y_)#缩放成固定大小
# image_mat_=image_mat_.resize(target_image_width_,target_image_height_)
# print("ImageMatShape:", image_mat_.shape)
image_np_=np.array(image_mat_)###图片转成np数组
print("ImageNpShape:", image_np_.shape)
image_np_=image_np_.transpose(2,0,1)##转成通道在前面的维度
print("ImageNpShape:", image_np_.shape)
image_np_=image_np_.reshape(1,3,640,640)##添加一个新维度
image_np_[0, 0, 0, 0]
print(image_np_[0, 0, 0, 0])
print("ImageNpShape:", image_np_.shape)
image_np_ = image_np_.astype(np.float32)
image_np_=image_np_/255.0##数据归一化
print(image_np_[0, 0, 0, 0])
###获取输出点
outputs=self.OnnxModel.get_outputs()
print(len(outputs))
###获取第一个矩阵输出 检测到的特征
output_onnx1_=outputs[0]
print("Name:",output_onnx1_.name)
print("Type:",output_onnx1_.type)
print("Shape:",output_onnx1_.shape)
###获取第二个矩阵输出 掩码
output_onnx2_ = outputs[1]
print("Name:", output_onnx2_.name)
print("Type:", output_onnx2_.type)
print("Shape:", output_onnx2_.shape)
###运行推理
outputs=self.OnnxModel.run(None,{"images":image_np_})
len(outputs)
###获取第一个输出
output0 = outputs[0]
###获取第二个输出
output1 = outputs[1]
print("Output0:", output0.shape, "Output1:", output1.shape)
###第一个输出转置
output0 = output0[0].transpose()
output1 = output1[0]
print("Output0:", output0.shape, "Output1:", output1.shape)
###获取边界跟掩码
boxes = output0[:, 0:84]
masks = output0[:, 84:]
print("Boxes:", boxes.shape, "Masks:", masks.shape)
print("Masks:",masks.shape,"Output1:", output1.shape)
###转置下输出
output1 = output1.reshape(32, 160 * 160)
print("Masks:",masks.shape, "Output1:",output1.shape)
###两个数组链接
# masks = masks @ output1
masks=np.dot(masks,output1)
print("Masks:",masks.shape)
"""
把boxes跟mask链接在一起
0-4 - x_center, y_center, width and height of bounding box
4-84 - Object class probabilities for all 80 classes, that this YOLOv8 model can detect
84-25684 - Pixels of segmentation mask as a single row. Actually, the segmentation mask is a 160x160 matrix, but we just flattened it
"""
boxes = np.hstack((boxes, masks))
print("Boxes:",boxes.shape)
objects_ = []
###先判断是否满足
for boxe in boxes:
prob_ = boxe[4:84].max()
if prob_ > self.ConfidenceThres:
xc, yc, w, h = boxe[:4]
class_id = boxe[4:84].argmax()
###转成图片大小
x1 = (xc - w / 2) / target_image_width_ * self.Img_width
y1 = (yc - h / 2) / target_image_height_ * self.Img_height
x2 = (xc + w / 2) / target_image_width_ * self.Img_width
y2 = (yc + h / 2) / target_image_height_ * self.Img_height
label = self.Yolo_classes[class_id]
###获取掩码
mask =self.get_mask(boxe[84:25684], (x1, y1, x2, y2))
polygons =self.get_polygon(mask,x1,y1)
# polygon=polygon
objects_.append([x1, y1, x2, y2, label, prob_, mask, polygons])
# self.BoxX.append(float(x1))
# self.BoxY.append(float(y1))
# self.BoxWidth.append(float(x2-x1))
# self.BoxHeight.append(float(y2-y1))
# self.BoxClass.append(label)
# self.PolygonsXY.append(polygon)
pass
###判断是否重叠
lenght_box_ = len(objects_)
resulets_object_=[]
while lenght_box_ > 0:
object_temp_ = objects_[0].copy()
objects_temp_ = []
##不是重叠的添加进去boxs_temp_
for object_ in objects_:
###计算intersection
box1_x1_, box1_y1_, box1_x2_, box1_y2_ = object_temp_[:4]
box2_x1_, box2_y1_, box2_x2_, box2_y2_ = object_[:4]
distance1_ = math.sqrt((box1_x1_ - box2_x1_) ** 2 + (box1_y1_ - box2_y1_) ** 2)
distance2_ = math.sqrt((box1_x2_ - box2_x2_) ** 2 + (box1_y2_ - box2_y2_) ** 2)
if (distance1_ < self.Iou) & (distance2_ < self.Iou):
pass
else:
objects_temp_.append(object_)
pass
pass
objects_.clear() ###清空原来的
for object_ in objects_temp_:
objects_.append(object_)
lenght_box_ = len(objects_)
resulets_object_.append(object_temp_)
for object_ in resulets_object_:
box1_x1_, box1_y1_, box1_x2_, box1_y2_,class_id_, prob_,mask_,polygons_ = object_
label = self.Yolo_classes[class_id]
# class_id_, prob_ = object_[3:5]
# polygon_=object_[7]
width = box1_x2_ - box1_x1_
height = box1_y2_ - box1_y1_
center_x_ = (box1_x2_ + box1_x1_) / 2
center_y_ = (box1_y2_ + box1_y1_) / 2
self.BoxX.append(float(center_x_))
self.BoxY.append(float(center_y_))
self.BoxWidth.append(float(width))
self.BoxHeight.append(float(height))
self.BoxClass.append(label)
for polygon_ in polygons_:
self.PolygonsXY.append(polygon_)
pass
###获取掩码
def get_mask(self,row, box):
mask = row.reshape(160, 160)
###打印mask数据
print("mask data:",mask)
###把mask转换成概率
mask = self.sigmoid(mask)
###打印mask数据
print("mask probably:",mask)
###概率大于0.5的认为是前景物体 小于0.5的认为是背景
mask = (mask > 0.5).astype('uint8') * 255
###取出box边界
x1, y1, x2, y2 = box
###转换成mask的box点
mask_x1 = round(x1 / self.Img_width * 160)
mask_y1 = round(y1 / self.Img_height * 160)
mask_x2 = round(x2 / self.Img_width * 160)
mask_y2 = round(y2 / self.Img_height * 160)
###裁剪对象
mask = mask[mask_y1:mask_y2, mask_x1:mask_x2]
###转换成图片大小
mask = np.array(mask)
###转成图片
img_=cv2.Mat(mask)
###换成对应大小
scale_precentage_x_ = round(x2 - x1) / img_.shape[1]
scale_precentage_y_ = round(y2 - y1)/ img_.shape[0]
img_ = cv2.resize(img_, None, fx=scale_precentage_x_, fy=scale_precentage_y_) # 缩放成固定大小
# mask = mask.resize((round(x2 - x1), round(y2 - y1)))
mask = np.array(img_)
return mask
###获取mask的多边形
def get_polygon(self,mask,x1_,y1_):
contours = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours))
polygons_ = []
# lenght_ = len(contours)
for contour1_ in contours[0]:
polygon_ = []
for contour2_ in contour1_:
polygon_.append([contour2_[0][0]+x1_,contour2_[0][1]+y1_])
polygons_.append(polygon_)
###polygons_ = [[contour[0][0]+x1_, contour[0][1]+y1_] for contour in contours[0][0]]
return polygons_
###激活函数
def sigmoid(self,z):
return 1 / (1 + np.exp(-z))
###是否资源
def ReleaseSource(self):
self.ClearResult()
pass
_image_deal_with=ImageDealWith()
这里建一个图片处理类:
onnx推理的主要核心函数UseOnnxInference()
onnx推理分割图片跟onnx推理识别图片基本相同,唯一的不同在于最后推理输出的output数据不同。onnx推理分割图片和onnx推理识别图片,输入节点input格式一样,不同的是最后推理输出节点outputs,推理输出的outputs包含两个矩阵分别是output0,output1。
output0里面的信息是关于图片分割出来对象的boxes信息(x1,y1,x2,y2)跟识别这个对象是哪一类的概率信息(class)
output1里面的信息是关于图片分割出来对象的掩码信息,但是里面的内容我们一般看不懂,需要用激活函数sigmoid,转成概率值,然后进行概率值二值化,想这里我们概率二值化就是大于0.5认为图片灰度为255,小于0.5图片灰度为0。
这里的UseOnnxInference()函数对outputs处理,还把output0,output1,整合到一个矩阵boxes列表,然后在同一做对象概率筛选,对象重叠帅选。
output0,output1怎么整合一起的呢?我们可以知道output0固定8400行,也就是yolo最多识别到的物体是8400个,每一行前4个元素是boxex信息(x1,y1,x2,y2),着就是识别物体的对象概率,这里识别对象的种类80种,也就是每一行4到84元素是识别物体对象概率的信息,然后剩下的就是识别物体掩码的信息,会发现剩下的元素个数跟output1维度能对上,通过这个维度,可以把两个output整合成一个boxes。
后面对boxes数据进行处理就容易了。
def get_mask(self,row, box)这个函数是把掩码数据转成黑白图片mask。
def get_polygon(self,mask,x1_,y1_)这个函数是提取mask中对象的边缘。
3.代码地址
https://gitee.com/wenyuanmo/py-qt-load-yolo-model-onnx-segment-thing/tree/master