python调用OAK相机实现人脸检测的简单demo

最新推荐文章于 2024-05-23 17:56:24 发布

MFT小白

最新推荐文章于 2024-05-23 17:56:24 发布

阅读量166

点赞数

分类专栏：机器视觉文章标签：数码相机

本文链接：https://blog.csdn.net/zhangbing0116/article/details/133085693

版权

机器视觉专栏收录该内容

13 篇文章 0 订阅

订阅专栏

基本思想：学习oak双目相机的使用复现官网demo和简单的人脸检测

前言：

设备型号：OAK-D-Pro

一、环境配置参考

台式机ubuntu系统调用OAK相机_MFT小白的博客-CSDN博客

二、测试

2.1 test_depthai.py

按下 t 键可实现图像切换，观察视差。

import depthai as dai
import cv2
import numpy as np


# # 实例化管道对象
# pipeline = dai.Pipeline()

# mono = pipeline.createMonoCamera()
# mono.setBoardSocket(dai.CameraBoardSocket.LEFT)

# xout = pipeline.createXLinkOut()
# xout.setStreamName("left")
# mono.out.link(xout.input)

# with dai.Device(pipeline) as device:
#     queue = device.getOutputQueue(name="left")
#     frame = queue.get()
#     imOut = frame.getCvFrame()
#     cv2.imshow("imOut", imOut)
#     cv2.waitKey()

def getFrame(queue):
    # Get frame from queue
    frame = queue.get()
    # Convert frame to OpenCV format and return
    return frame.getCvFrame()


def getMonoCamera(pipeline, isLeft):

    # Configure mono camera
    mono = pipeline.createMonoCamera()

    # Set Camera Resolution
    mono.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)

    if isLeft:
        # Get left camera
        mono.setBoardSocket(dai.CameraBoardSocket.LEFT)

    else :
        # Get right camera
        mono.setBoardSocket(dai.CameraBoardSocket.RIGHT)

    return mono


def getStereoPair(pipeline, monoLeft, monoRight):

    # Configure stereo pair for depth estimation
    stereo = pipeline.createStereoDepth()

    # Checks occluded pixels and marks them as invalid
    stereo.setLeftRightCheck(True)

    # Configure left and right cameras to work as a stereo pair
    monoLeft.out.link(stereo.left)
    monoRight.out.link(stereo.right)

    return stereo


def mouseCallback(event, x, y, flags, param):
    global mouseX, mouseY
    if event == cv2.EVENT_LBUTTONDOWN:
        mouseX = x
        mouseY = y


if __name__ == '__main__':
    
    # pipeline = dai.Pipeline()

    # # Set up left and right cameras
    # monoLeft = getMonoCamera(pipeline, isLeft = True)
    # monoRight = getMonoCamera(pipeline, isLeft = False)

    # # Set output Xlink for left camera
    # xoutLeft = pipeline.createXLinkOut()

    # xoutLeft.setStreamName("left")


    # # Set output Xlink for right camera
    # xoutRight = pipeline.createXLinkOut()

    # xoutRight.setStreamName("right")

    # # Attach cameras to output Xlink

    # monoLeft.out.link(xoutLeft.input)

    # monoRight.out.link(xoutRight.input)
    
    
    mouseX = 0
    mouseY = 640

    # Start defining a pipeline
    pipeline = dai.Pipeline()

    # Set up left and right cameras
    monoLeft = getMonoCamera(pipeline, isLeft=True)
    monoRight = getMonoCamera(pipeline, isLeft=False)

    # Combine left and right cameras to form a stereo pair
    stereo = getStereoPair(pipeline, monoLeft, monoRight)
    
    xoutDisp = pipeline.createXLinkOut()
    xoutDisp.setStreamName("disparity")

    xoutRectifiedLeft = pipeline.createXLinkOut()
    xoutRectifiedLeft.setStreamName("rectifiedLeft")

    xoutRectifiedRight = pipeline.createXLinkOut()
    xoutRectifiedRight.setStreamName("rectifiedRight")
    
    stereo.disparity.link(xoutDisp.input)
    stereo.rectifiedLeft.link(xoutRectifiedLeft.input)
    stereo.rectifiedRight.link(xoutRectifiedRight.input)
    
    
    with dai.Device(pipeline) as device:
        # Output queues will be used to get the rgb frames and nn data 
        # from the outputs defined above
        disparityQueue = device.getOutputQueue(name="disparity", 
                                            maxSize=1, blocking=False)
        rectifiedLeftQueue = device.getOutputQueue(name="rectifiedLeft", 
                                            maxSize=1, blocking=False)
        rectifiedRightQueue=device.getOutputQueue(name="rectifiedRight", 
                                            maxSize=1, blocking=False)

        # Calculate a multiplier for color mapping disparity map
        disparityMultiplier = 255 / stereo.getMaxDisparity()

        cv2.namedWindow("Stereo Pair")
        cv2.setMouseCallback("Stereo Pair", mouseCallback)

        # Variable use to toggle between side by side view and one frame 
        # view.
        sideBySide = False
        
        while True:
            # Get the disparity map.
            disparity = getFrame(disparityQueue)

            # Colormap disparity for display.
            disparity = (disparity * 
                        disparityMultiplier).astype(np.uint8)
            disparity = cv2.applyColorMap(disparity, cv2.COLORMAP_JET)

            # Get the left and right rectified frame.
            leftFrame = getFrame(rectifiedLeftQueue);
            rightFrame = getFrame(rectifiedRightQueue)

            if sideBySide:
                # Show side by side view.
                imOut = np.hstack((leftFrame, rightFrame))
            else:
                # Show overlapping frames.
                imOut = np.uint8(leftFrame / 2 + rightFrame / 2)
            # Convert to RGB.
            imOut = cv2.cvtColor(imOut, cv2.COLOR_GRAY2RGB)
            # Draw scan line.
            imOut = cv2.line(imOut, (mouseX, mouseY), 
                            (1280, mouseY), (0, 0, 255), 2)
            # Draw clicked point.
            imOut = cv2.circle(imOut, (mouseX, mouseY), 2, 
                            (255, 255, 128), 2)
            cv2.imshow("Stereo Pair", imOut)
            cv2.imshow("Disparity", disparity)

            # Check for keyboard input
            key = cv2.waitKey(1)
            if key == ord('q') or key == 27:
                # Quit when q is pressed
                break
            elif key == ord('t'):
                # Toggle display when t is pressed
                sideBySide = not sideBySide

效果图

2.2 test_model_depthai.py

模型下载地址：Index of blobconverter-backup/blobs

import depthai as dai
import cv2
import time
import blobconverter
import numpy as np

FRAME_SIZE = (640, 360)

DET_INPUT_SIZE = (300, 300)
# model_name = "face-detection-retail-0004"
# zoo_type = "depthai"
blob_path = "face-detection-retail-0004_openvino_2021.4_10shave.blob"

pipeline = dai.Pipeline()

# 定义RGB
cam = pipeline.createColorCamera()
cam.setPreviewSize(FRAME_SIZE[0], FRAME_SIZE[1])
cam.setInterleaved(False)
cam.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
cam.setBoardSocket(dai.CameraBoardSocket.RGB)


# 定义左相机
mono_left = pipeline.createMonoCamera()
mono_left.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
mono_left.setBoardSocket(dai.CameraBoardSocket.LEFT)
# 定义右相机
mono_right = pipeline.createMonoCamera()
mono_right.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
mono_right.setBoardSocket(dai.CameraBoardSocket.RIGHT)

# 创建stereo node
stereo = pipeline.createStereoDepth()


# 深度相机输出连接stereo node
mono_left.out.link(stereo.left)
mono_right.out.link(stereo.right)

# 使用blobconverter获取模型blob文件
# if model_name is not None:
#     blob_path = blobconverter.from_zoo(
#         name=model_name,
#         shaves=6,
#         zoo_type=zoo_type
#     )


# 定义人脸检测神经网络节点及参数
face_spac_det_nn = pipeline.createMobileNetSpatialDetectionNetwork()
face_spac_det_nn.setConfidenceThreshold(0.75)
face_spac_det_nn.setBlobPath(blob_path)
face_spac_det_nn.setDepthLowerThreshold(100)
face_spac_det_nn.setDepthUpperThreshold(5000)

# 定义人脸检测输入配置
face_det_manip = pipeline.createImageManip()
face_det_manip.initialConfig.setResize(DET_INPUT_SIZE[0], DET_INPUT_SIZE[1])
face_det_manip.initialConfig.setKeepAspectRatio(False)

# 连接   rgb --> ImageManip node -> net
#       stereo --> net
cam.preview.link(face_det_manip.inputImage)
face_det_manip.out.link(face_spac_det_nn.input)
stereo.depth.link(face_spac_det_nn.inputDepth)


# 创建预览输出
x_preview_out = pipeline.createXLinkOut()
x_preview_out.setStreamName("preview")
cam.preview.link(x_preview_out.input)
# 创建检测输出
det_out = pipeline.createXLinkOut()
det_out.setStreamName("det_out")
face_spac_det_nn.out.link(det_out.input)

# 输出视差图
xoutDisp = pipeline.createXLinkOut()
xoutDisp.setStreamName("disparity")
stereo.disparity.link(xoutDisp.input)


# 定义显示功能
def display_info(frame, disparity, bbox, coordinates, status, status_color, fps):
    # Display bounding box
    cv2.rectangle(frame, bbox, status_color[status], 2)
    cv2.rectangle(disparity, bbox, status_color[status], 2)
    
    # Display coordinates
    if coordinates is not None:
        coord_x, coord_y, coord_z = coordinates
        cv2.putText(frame, f"X: {int(coord_x)} mm", (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
        cv2.putText(frame, f"Y: {int(coord_y)} mm", (bbox[0] + 10, bbox[1] + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
        cv2.putText(frame, f"Z: {int(coord_z)} mm", (bbox[0] + 10, bbox[1] + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
        cv2.putText(disparity, f"Depth: {int(coord_z)} mm",  (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255))
        
    # Create background for showing details
    cv2.rectangle(frame, (5, 5, 175, 100), (50, 0, 0), -1)

    # Display authentication status on the frame
    cv2.putText(frame, status, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, status_color[status])

    # Display instructions on the frame
    cv2.putText(frame, f'FPS: {fps:.2f}', (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255))

    
# Frame count
frame_count = 0

# Placeholder fps value
fps = 0

# Used to record the time when we processed last frames
prev_frame_time = 0

# Used to record the time at which we processed current frames
new_frame_time = 0

# Set status colors
status_color = {
    'Face Detected': (0, 255, 0),
    'No Face Detected': (0, 0, 255)
}



# Start pipeline
with dai.Device(pipeline) as device:

    # Output queue will be used to get the right camera frames from the outputs defined above
    q_cam = device.getOutputQueue(name="preview", maxSize=1, blocking=False)

    # Output queue will be used to get nn data from the video frames.
    q_det = device.getOutputQueue(name="det_out", maxSize=1, blocking=False)
    
    
    disparityQueue = device.getOutputQueue(name="disparity", maxSize=1, blocking=False)
    disparityMultiplier = 255 / stereo.getMaxDisparity()

    # # Output queue will be used to get nn data from the video frames.
    # q_bbox_depth_mapping = device.getOutputQueue(name="bbox_depth_mapping_out", maxSize=4, blocking=False)

    while True:
        # Get right camera frame
        in_cam = q_cam.get()
        frame = in_cam.getCvFrame()
        
        disparity_ = disparityQueue.get()
        disparity = disparity_.getCvFrame()
        disparity = (disparity * disparityMultiplier).astype(np.uint8)
        disparity = cv2.applyColorMap(disparity, cv2.COLORMAP_JET)
        
        bbox = None
        coordinates = None        
        
        inDet = q_det.tryGet()

        if inDet is not None:
            detections = inDet.detections

            # if face detected
            if len(detections) is not 0:
                detection = detections[0]

                # Correct bounding box
                xmin = max(0, detection.xmin)
                ymin = max(0, detection.ymin)
                xmax = min(detection.xmax, 1)
                ymax = min(detection.ymax, 1)

                # Calculate coordinates
                x = int(xmin*FRAME_SIZE[0])
                y = int(ymin*FRAME_SIZE[1])
                w = int(xmax*FRAME_SIZE[0]-xmin*FRAME_SIZE[0])
                h = int(ymax*FRAME_SIZE[1]-ymin*FRAME_SIZE[1])

                bbox = (x, y, w, h)

                # Get spacial coordinates
                coord_x = detection.spatialCoordinates.x
                coord_y = detection.spatialCoordinates.y
                coord_z = detection.spatialCoordinates.z

                coordinates = (coord_x, coord_y, coord_z)

        # Check if a face was detected in the frame
        if bbox:
            # Face detected
            status = 'Face Detected'
        else:
            # No face detected
            status = 'No Face Detected'

        # Display info on frame
        display_info(frame, disparity, bbox, coordinates, status, status_color, fps)

        # Calculate average fps
        if frame_count % 10 == 0:
            # Time when we finish processing last 100 frames
            new_frame_time = time.time()

            # Fps will be number of frame processed in one second
            fps = 1 / ((new_frame_time - prev_frame_time)/10)
            prev_frame_time = new_frame_time

        # Capture the key pressed
        key_pressed = cv2.waitKey(1) & 0xff

        # Stop the program if Esc key was pressed
        if key_pressed == 27 or key_pressed == ord('q'):
            break

        # Display the final frame
        cv2.imshow("Face Cam", frame)
        cv2.imshow("Disparity", disparity)
        
        
        # Increment frame count
        frame_count += 1

    cv2.destroyAllWindows()