Python+Yolov8+ONNX实时缺陷目标检测

最新推荐文章于 2024-06-30 10:14:22 发布

放氮气的蜗牛

最新推荐文章于 2024-06-30 10:14:22 发布

阅读量1.5k

点赞数 2

分类专栏：深度学习文章标签： python YOLO 目标检测 ONNX

本文链接：https://blog.csdn.net/qq_53762188/article/details/130935611

版权

深度学习专栏收录该内容

3 篇文章 4 订阅

订阅专栏

该文介绍了使用YOLOv8模型结合ONNX进行图像缺陷检测的过程，包括模型转换、输入图像尺寸调整、图像归一化、推理、结果筛选、像素还原、重叠区域处理以及实时摄像头图像的处理。通过这些步骤，实现了对图片中缺陷的高效定位和标记，并优化了程序逻辑，降低了运行时间，增加了实时检测功能。

摘要由CSDN通过智能技术生成

相比于上一篇Windows10+Python+Yolov8+ONNX图片缺陷识别，并在原图中标记缺陷，有onnx模型则无需配置，无需训练。

优化了程序逻辑，降低了程序运行时间，增加了实时检测功能

1、模型转换

通过训练得到的模型是pt文件，我们需要转换为onnx文件

from ultralytics import YOLO
 
# 加载模型
model = YOLO("models\\best.pt")
 
# 转换模型
model.export(format="onnx")

2、查看模型结构

通过以下网站来查看onnx模型结构

best.onnx (netron.app)

可以得到，输入图片的尺寸要求为3*640*640，输出结果为float32的n*8400二维数组，n为数据集缺陷种类的数量

3、修改输入图片的尺寸

为防止图片畸变，所以需要将图片修改为如下形状

import onnxruntime
import numpy as np
import tkinter
from tkinter import filedialog
import random
import cv2

# 弹出文件选择框，让用户选择要打开的图片
filepath = tkinter.filedialog.askopenfilename()
# 如果用户选择了一个文件，则加载该文件并显示
if filepath != '':
    # 读取图片
    image = cv2.imread(filepath)
    # 获取图像尺寸
    h, w = image.shape[:2]
    # 将BGR图像转换为RGB图像
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # 尺寸变换
    if h > w:
        img = cv2.resize(image, (int(w / h * 640) , 640))
    else:
        img = cv2.resize(image, (640 , int(h / w * 640)))

    # 创建单色背景图像
    background = np.zeros((640, 640, 3), np.uint8)
    background[:] = (255, 0, 0)  
    # 将图像居中放置
    x_offset = (640 - img.shape[1]) // 2
    y_offset = (640 - img.shape[0]) // 2
    background[y_offset:y_offset+img.shape[0], x_offset:x_offset+img.shape[1]] = img
    
    # 显示图片
    cv2.imshow('Result', background)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

4、图像数据归一化

为了方便深度学习模型对图片数据进行推理，需要对读入图片进行归一化处理

# 将像素值转换为浮点数，并将其归一化到0~1之间
img = image.astype(np.float32) / 255.0   
    
# 将图像从HWC格式转换为CHW格式
img = np.transpose(img, (2, 0, 1))
# 将图像从CHW格式转换为NCHW格式，批次大小为1
img = np.expand_dims(img, axis=0)

5、模型推理

将修改好的图像数据，用onnx模型推理工具进行推理，得到n*8400二维数组的推理结果，n为数据集缺陷种类的数量

# onnx测试
session = onnxruntime.InferenceSession(onnx_model_path)
inputs = {session.get_inputs()[0].name: image}
logits = session.run(None, inputs)[0]

# 将输出转换为二维数组
# 将(1, 9, 8400)的形状转换为(9, 8400)的形状
output = logits.reshape((9, -1))
# 将二维数组转置为(8400, 9)的形状
output = output.transpose((1, 0))

6、推理结果筛选

9*8400二维数组转成8400*9方便处理，9列数据分别表示了检测框的中心x坐标、y坐标、宽度、高度、每个缺陷的置信系数

需要筛选出缺陷置信系数大于阈值的检测框

# 缺陷位置和缺陷置信系数
selected = np.zeros((0, 9))
# 缺陷置信系数
Thresh = np.zeros((0, 1))
# 缺陷类型
typ = np.zeros((0, 1), dtype=int)

i = 0
# 循环遍历每一行,筛选大于阈值的缺陷
for n in range(num.shape[0]):
    # 如果第4~8列中有大于阈值的元素
    if np.any(num[n, 4:] > threshold):
        # 将这一行添加到selected数组中
        selected = np.vstack((selected, num[n]))

        # 如果第4列大于阈值
        if selected[i, 4] == max(selected[i, 4:]):
            # 将type数组第i个元素赋值为缺陷类型0
            typ = np.vstack((typ, 0))
            # 将Thresh数组第i个元素赋值为缺陷类型0的阈值
            Thresh = np.vstack((Thresh, selected[i, 4]))
        elif selected[i, 5] == max(selected[i, 4:]):
            typ = np.vstack((typ, 1))
            Thresh = np.vstack((Thresh, selected[i, 5]))
        elif selected[i, 6] == max(selected[i, 4:]):
            typ = np.vstack((typ, 2))
            Thresh = np.vstack((Thresh, selected[i, 6]))
        elif selected[i, 7] == max(selected[i, 4:]):
            typ = np.vstack((typ, 3))
            Thresh = np.vstack((Thresh, selected[i, 7]))
        elif selected[i, 8] == max(selected[i, 4:]):
            typ = np.vstack((typ, 4))
            Thresh = np.vstack((Thresh, selected[i, 8]))
        i = i + 1

7、像素还原

将筛选结果还原成原图像素点坐标

# 获取selected数组的第0、1、2和3列，分别对应缺陷中心x，y坐标，宽度，高度
x_center = select[:, 0]
y_center = select[:, 1]
width = select[:, 2]
height = select[:, 3]

# 计算左上角坐标
x_min = x_center - width / 2
y_min = y_center - height / 2

# 创建bbox数组，将左上角坐标和宽度、高度存储进去
bbox = np.zeros((select.shape[0], 6))
bbox[:, 0] = x_min
bbox[:, 1] = y_min
bbox[:, 2] = width
bbox[:, 3] = height
# 将type数组和Thresh数组分别添加到bbox数组的第4列和第5列
bbox[:, 4] = typ
bbox[:, 5] = thresh
# 图像比例恢复
if h > w:
    bbox[:, :4] *= (h/640)
    bbox[:, 0] -= (h/2-w/2)
else:
    bbox[:, :4] *= (w/640)
    bbox[:, 1] -= (w/2-h/2)

# 将二维数组转换为二维列表
my_list = [list(row) for row in bbox]
# 将 0~4 列转换为 int 型，5 列转换为 float 型
for i in range(len(my_list)):
    for j in range(len(my_list[i])):
        if j < 5:
            my_list[i][j] = int(my_list[i][j])
        else:
            my_list[i][j] = float(my_list[i][j])

8、筛选重叠面积

根据阈值去除同一缺陷种类的重复检测框

i = 0
bbox = sorted(bbox, key=lambda x: x[3])
while i < (len(bbox) - 1):
    if bbox[i][4] == bbox[i + 1][4]:
        # 计算两个框之间的重叠面积
        x1 = max(bbox[i][0], bbox[i + 1][0])
        y1 = max(bbox[i][1], bbox[i + 1][1])
        x2 = min(bbox[i][0] + bbox[i][2], bbox[i + 1][0] + bbox[i + 1][2])
        y2 = min(bbox[i][1] + bbox[i][3], bbox[i + 1][1] + bbox[i + 1][3])
        
        intersection = (x2 - x1) * (y2 - y1)
        area1 = bbox[i][2] * bbox[i][3]
        area2 = bbox[i + 1][2] * bbox[i + 1][3]
        nms = 1 - intersection / (area1 + area2 - intersection)
        # print(nms) 
        
        # 去除多余框
        if nms < threshold and bbox[i][5] >= bbox[i + 1][5]:
            del bbox[i + 1]
        elif nms < threshold and bbox[i][5] < bbox[i + 1][5]:
            del bbox[i]
        elif nms > threshold:
            i = i + 1
    else:
        i = i + 1

9、标记缺陷

根据处理完的缺陷位置信息，使用方框将缺陷标记出来

global colors
global labels
    
# 循环遍历 bbox 列表中的每一行
for bbox in bbox_list:
    # 获取方框的左上角坐标和宽度、高度
    x, y, w, h = bbox[:4]
    # 在方框左上角上加上缺陷类型和置信系数
    defect_type = bbox[4]
    confidence = bbox[5]
    
    # 绘制方框
    cv2.rectangle(img, (x, y), (x + w, y + h), colors[defect_type], 2)
    str_confidence = "{:.3f}".format(confidence)      
    cv2.putText(img, labels[defect_type] + ' ' + str_confidence, (x, y - 5),
                cv2.FONT_HERSHEY_SIMPLEX, 2, colors[defect_type], 3)

    cv2.imshow("result", img)

10、全局变量设置

# 初始化全局变量
colors = []
with open('type.names', 'r') as f:
    labels = f.read().splitlines()
# 生成缺陷种类数量的随机颜色值
for _ in range(len(labels)):
    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
    colors.append(color)

11、读取摄像头图像

# 读取摄像头cv.VideoCapture(设备号)
cap = cv2.VideoCapture(0)

while True: 
    # 得到每帧图像, cap.read(是否有图像True或者false, 图像)
    ret_flag, Vshow = cap.read()  
    
    。。。。

    # 连续读取的时候需要把参数设置为1或更高
    if cv2.waitKey(1) == 27:  
        break

12、完整代码

import onnxruntime
import numpy as np
import tkinter
from tkinter import filedialog
import random
import cv2

def resize_image(image, h, w):
    # 将BGR图像转换为RGB图像
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # 尺寸变换
    if h > w:
        img = cv2.resize(image, (int(w / h * 640) , 640))
    else:
        img = cv2.resize(image, (640 , int(h / w * 640)))

    # 创建单色背景图像
    background = np.zeros((640, 640, 3), np.uint8)
    background[:] = (255, 0, 0)  
    # 将图像居中放置
    x_offset = (640 - img.shape[1]) // 2
    y_offset = (640 - img.shape[0]) // 2
    background[y_offset:y_offset+img.shape[0], x_offset:x_offset+img.shape[1]] = img
    
    return background

def nchw_image(image):
    # 将像素值转换为浮点数，并将其归一化到0~1之间
    img = image.astype(np.float32) / 255.0   
    
    # 将图像从HWC格式转换为CHW格式
    img = np.transpose(img, (2, 0, 1))
    # 将图像从CHW格式转换为NCHW格式，批次大小为1
    img = np.expand_dims(img, axis=0)
    
    return img

def onnx(image, onnx_model_path):
    # onnx测试
    session = onnxruntime.InferenceSession(onnx_model_path)
    inputs = {session.get_inputs()[0].name: image}
    logits = session.run(None, inputs)[0]

    # 将输出转换为二维数组
    # 将(1, 9, 8400)的形状转换为(9, 8400)的形状
    output = logits.reshape((9, -1))
    # 将二维数组转置为(8400, 9)的形状
    output = output.transpose((1, 0))

    return output
   
def select(num, threshold):
    # 缺陷位置和缺陷置信系数
    selected = np.zeros((0, 9))
    # 缺陷置信系数
    Thresh = np.zeros((0, 1))
    # 缺陷类型
    typ = np.zeros((0, 1), dtype=int)

    i = 0
    # 循环遍历每一行,筛选大于阈值的缺陷
    for n in range(num.shape[0]):
        # 如果第4~8列中有大于阈值的元素
        if np.any(num[n, 4:] >= threshold):
            # 将这一行添加到selected数组中
            selected = np.vstack((selected, num[n]))

            # 如果第4列大于阈值
            if selected[i, 4] == max(selected[i, 4:]):
                # 将type数组第i个元素赋值为缺陷类型0
                typ = np.vstack((typ, 0))
                # 将Thresh数组第i个元素赋值为缺陷类型0的阈值
                Thresh = np.vstack((Thresh, selected[i, 4]))
            elif selected[i, 5] == max(selected[i, 4:]):
                typ = np.vstack((typ, 1))
                Thresh = np.vstack((Thresh, selected[i, 5]))
            elif selected[i, 6] == max(selected[i, 4:]):
                typ = np.vstack((typ, 2))
                Thresh = np.vstack((Thresh, selected[i, 6]))
            elif selected[i, 7] == max(selected[i, 4:]):
                typ = np.vstack((typ, 3))
                Thresh = np.vstack((Thresh, selected[i, 7]))
            elif selected[i, 8] == max(selected[i, 4:]):
                typ = np.vstack((typ, 4))
                Thresh = np.vstack((Thresh, selected[i, 8]))
            i = i + 1
        
    typ = typ.flatten()
    Thresh = Thresh.flatten()
    return selected , typ , Thresh

def back(select, typ, thresh, h , w):
    # 获取selected数组的第0、1、2和3列，分别对应缺陷中心x，y坐标，宽度，高度
    x_center = select[:, 0]
    y_center = select[:, 1]
    width = select[:, 2]
    height = select[:, 3]

    # 计算左上角坐标
    x_min = x_center - width / 2
    y_min = y_center - height / 2

    # 创建bbox数组，将左上角坐标和宽度、高度存储进去
    bbox = np.zeros((select.shape[0], 6))
    bbox[:, 0] = x_min
    bbox[:, 1] = y_min
    bbox[:, 2] = width
    bbox[:, 3] = height
    # 将type数组和Thresh数组分别添加到bbox数组的第4列和第5列
    bbox[:, 4] = typ
    bbox[:, 5] = thresh
    # 图像比例恢复
    if h > w:
        bbox[:, :4] *= (h/640)
        bbox[:, 0] -= (h/2-w/2)
    else:
        bbox[:, :4] *= (w/640)
        bbox[:, 1] -= (w/2-h/2)
    
    # 将二维数组转换为二维列表
    my_list = [list(row) for row in bbox]
    # 将 0~4 列转换为 int 型，5 列转换为 float 型
    for i in range(len(my_list)):
        for j in range(len(my_list[i])):
            if j < 5:
                my_list[i][j] = int(my_list[i][j])
            else:
                my_list[i][j] = float(my_list[i][j])
    
    return my_list

def nms_box(bbox, threshold):
    i = 0
    bbox = sorted(bbox, key=lambda x: x[3])
    while i < (len(bbox) - 1):
        if bbox[i][4] == bbox[i + 1][4]:
            # 计算两个框之间的重叠面积
            x1 = max(bbox[i][0], bbox[i + 1][0])
            y1 = max(bbox[i][1], bbox[i + 1][1])
            x2 = min(bbox[i][0] + bbox[i][2], bbox[i + 1][0] + bbox[i + 1][2])
            y2 = min(bbox[i][1] + bbox[i][3], bbox[i + 1][1] + bbox[i + 1][3])
            
            intersection = (x2 - x1) * (y2 - y1)
            area1 = bbox[i][2] * bbox[i][3]
            area2 = bbox[i + 1][2] * bbox[i + 1][3]
            nms = 1 - intersection / (area1 + area2 - intersection)
            # print(nms) 
            
            # 去除多余框
            if nms < threshold and bbox[i][5] >= bbox[i + 1][5]:
                del bbox[i + 1]
            elif nms < threshold and bbox[i][5] < bbox[i + 1][5]:
                del bbox[i]
            elif nms > threshold:
                i = i + 1
        else:
            i = i + 1
    
    return bbox

def draw_bbox(img, bbox_list):
    global colors
    global labels
        
    # 循环遍历 bbox 列表中的每一行
    for bbox in bbox_list:
        # 获取方框的左上角坐标和宽度、高度
        x, y, w, h = bbox[:4]
        # 在方框左上角上加上缺陷类型和置信系数
        defect_type = bbox[4]
        confidence = bbox[5]
        
        # 绘制方框
        cv2.rectangle(img, (x, y), (x + w, y + h), colors[defect_type], 2)
        str_confidence = "{:.3f}".format(confidence)      
        cv2.putText(img, labels[defect_type] + ' ' + str_confidence, (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, colors[defect_type], 3)

        cv2.imshow("result", img)

# 初始化全局变量
colors = []
with open('type.names', 'r') as f:
    labels = f.read().splitlines()
# 生成缺陷种类数量的随机颜色值
for _ in range(len(labels)):
    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
    colors.append(color)
if __name__ == "__main__":    
    # 读取摄像头cv.VideoCapture(设备号)
    cap = cv2.VideoCapture(0)
    
    while True: 
        # 得到每帧图像, cap.read(是否有图像True或者false, 图像)
        ret_flag, Vshow = cap.read()  

        # # 读取图片
        # im = cv2.imread(Vshow)
        # 获取图像尺寸
        y, x = Vshow.shape[:2]
        # 图像尺寸等比例变换
        image0 = resize_image(Vshow, y, x)
        # 图像归一化
        image1 = nchw_image(image0)
        # 模型推理
        onnx_model_path = "models\\best.onnx"
        result0 = onnx(image1, onnx_model_path)

        # 缺陷阈值
        threshold = 0.4
        # 筛选推理结果缺陷位置和缺陷置信系数、缺陷类型、缺陷置信系数一一对应
        select1, typ, thresh = select(result0, threshold)   

        # 缺陷位置还原
        result1 = back(select1, typ, thresh, y, x)
        # 去除重叠缺陷
        nms_threshold = 0.4
        result2 = nms_box(result1, nms_threshold)
        # print(result2)

        # 绘制缺陷方框
        draw_bbox(Vshow, result2)
        
        # 连续读取的时候需要把参数设置为1或更高
        if cv2.waitKey(1) == 27:  
            break


    # 释放摄像头
    cv2.destroyAllWindows()
    # 释放摄像头
    cap.release()