OAK摄像机上手实操

Yabooo0

已于 2024-08-26 09:35:25 修改

阅读量125

点赞数 7

文章标签： python YOLO

于 2024-08-23 10:14:50 首次发布

本文链接：https://blog.csdn.net/yabooo0/article/details/141458815

版权

OAK介绍

https://www.oakchina.cn/2021/07/29/opencv-ai-kit/

OAK安装

官网链接：Manual DepthAI installation

其实给出了几个方案，有OAK中国的汉化版程序，OAK官网的安装程序，还有手动安装等。这里我是直接配置环境不下载他封装好的软件了，自己开发可能更多的还是调用api。在自己的环境里，pip install就可以。

pip install depthai

OAK官方hello world

可以看下结构和参数，不用下载他封装好的exe程序，直接调用api就可以，注释改成中文方便阅读，原link：https://docs.luxonis.com/software/depthai/hello-world/

# 首先，导入所有必需的模块
from pathlib import Path
 
import blobconverter
import cv2
import depthai
import time
import numpy as np
 
#Pipeline告诉DepthAI运行时要执行的操作-在此定义所有使用的资源和流
pipeline = depthai.Pipeline()
 
# 首先，我们想要彩色相机作为输出
cam_rgb = pipeline.createColorCamera()
cam_rgb.setPreviewSize(300, 300)  # 300x300将是预览帧大小，可用作节点的“预览”输出
cam_rgb.setInterleaved(False)
 
# 接下来，我们想要一个神经网络来产生检测
detection_nn = pipeline.createMobileNetDetectionNetwork()
#Blob是为MyriadX编译的神经网络文件。它包含模型的定义和权重
#我们正在使用blobcconverter工具从OpenVINO Model Zoo自动检索MobileNetSSD blob
detection_nn.setBlobPath(blobconverter.from_zoo(name='mobilenet-ssd', shaves=6))
# 接下来，我们过滤掉低于置信阈值的检测。置信度可以介于<0..1>之间
detection_nn.setConfidenceThreshold(0.5)
# 接下来，我们将相机的“预览”输出链接到神经网络检测输入，这样它就可以产生检测
cam_rgb.preview.link(detection_nn.input)
 
#XLinkOut是设备的“出路”。任何要传输到主机的数据都需要通过XLink发送
xout_rgb = pipeline.createXLinkOut()
# 对于rgb相机输出，我们希望XLink流命名为“rgb”
xout_rgb.setStreamName("rgb")
# 将相机预览链接到XLink输入，以便将帧发送到主机
cam_rgb.preview.link(xout_rgb.input)
 
# 相同的XLinkOut机制将用于接收nn结果
xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
detection_nn.out.link(xout_nn.input)
 
#管道现在已经完成，我们需要找到一个可用的设备来运行我们的管道
#我们在这里使用上下文管理器，它将在我们停止使用设备后处理设备
with depthai.Device(pipeline) as device:
#此时，设备将处于“运行”模式，并开始通过XLink发送数据
#为了使用设备结果，我们从设备中获得两个输出队列，其中包含我们之前分配的流名称
    q_rgb = device.getOutputQueue("rgb")
    q_nn = device.getOutputQueue("nn")
 
    # #这里定义了一些默认值。帧将是来自“rgb”流的图像，检测将包含nn个结果
    frame = None
    detections = []
    counter=0
    startTime = time.monotonic()
    color2 = (255, 255, 255)
 
#由于nn返回的检测值在＜0..1>范围内，因此需要将其乘以帧宽度/高度
#接收图像上边界框的实际位置
    def frameNorm(frame, bbox):
        normVals = np.full(len(bbox), frame.shape[0])
        normVals[::2] = frame.shape[1]
        return (np.clip(np.array(bbox), 0, 1) * normVals).astype(int)
 
 
    # 主机侧应用程序环路
    while True:
        # 我们尝试从nn/rgb队列中获取数据。tryGet将返回数据包或None（如果没有）
        in_rgb = q_rgb.tryGet()
        in_nn = q_nn.tryGet()
 
        if in_rgb is not None:
            # 如果存在来自RGB相机的数据包，我们将使用getCvFrame以OpenCV格式检索帧
            frame = in_rgb.getCvFrame()
 
        if in_nn is not None:
            # 当接收到来自nn的数据时，我们采用包含mobilenet ssd结果的检测阵列
            detections = in_nn.detections
 
        if frame is not None:
            for detection in detections:
                # 对于每个边界框，我们首先将其标准化以匹配帧大小
                bbox = frameNorm(frame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax))
                # 然后在框架上画一个矩形来显示实际结果
                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 0, 0), 2)
            # 所有绘图完成后，我们在屏幕上显示框架
            cv2.imshow("preview", frame)
 
        # 在任何时候，您都可以按“q”并退出主循环，从而退出程序本身
        if cv2.waitKey(1) == ord('q'):
            break

官方yolo示例

https://docs.luxonis.com/software/depthai/examples/spatial_tiny_yolo

这里他用的是yolov3和yolov4，但是yolov5也是可以导入的。在导入之前，我们需要将自己的yolo.pt文件转换成OAK需要的blob格式，可以通过网站https://tools.luxonis.com/转换格式。

其中Input image shape参数在yolo示例中可以看到如下代码，是416*416的。

camRgb.setPreviewSize(416, 416)

下面是导入了yolov5s_segment的改动后的代码，修改好路径直接run就可以，输入q退出循环。

#!/usr/bin/env python3

from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
import time

'''
Spatial Tiny-yolo example
  Performs inference on RGB camera and retrieves spatial location coordinates: x,y,z relative to the center of depth map.
  Can be used for tiny-yolo-v3 or tiny-yolo-v4 networks
'''

# Get argument first
nnBlobPath = str((Path(__file__).parent / Path('path/to/yolov5s-seg_openvino_2022.1_6shave.blob')).resolve().absolute())
if 1 < len(sys.argv):
    arg = sys.argv[1]
    if arg == "yolo3":
        nnBlobPath = str((Path(__file__).parent / Path('../models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob')).resolve().absolute())
    elif arg == "yolo5":
        nnBlobPath = str((Path(__file__).parent / Path('path/to/yolov5s-seg_openvino_2022.1_6shave.blob')).resolve().absolute())
    else:
        nnBlobPath = arg
else:
    print("Using Yolov5s model. If you wish to use Tiny YOLOv3, call 'tiny_yolo.py yolo3'")

if not Path(nnBlobPath).exists():
    import sys
    raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')

# Tiny yolo v3/4 label texts
labelMap = [
    "person",         "bicycle",    "car",           "motorbike",     "aeroplane",   "bus",           "train",
    "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",   "parking meter", "bench",
    "bird",           "cat",        "dog",           "horse",         "sheep",       "cow",           "elephant",
    "bear",           "zebra",      "giraffe",       "backpack",      "umbrella",    "handbag",       "tie",
    "suitcase",       "frisbee",    "skis",          "snowboard",     "sports ball", "kite",          "baseball bat",
    "baseball glove", "skateboard", "surfboard",     "tennis racket", "bottle",      "wine glass",    "cup",
    "fork",           "knife",      "spoon",         "bowl",          "banana",      "apple",         "sandwich",
    "orange",         "broccoli",   "carrot",        "hot dog",       "pizza",       "donut",         "cake",
    "chair",          "sofa",       "pottedplant",   "bed",           "diningtable", "toilet",        "tvmonitor",
    "laptop",         "mouse",      "remote",        "keyboard",      "cell phone",  "microwave",     "oven",
    "toaster",        "sink",       "refrigerator",  "book",          "clock",       "vase",          "scissors",
    "teddy bear",     "hair drier", "toothbrush"
]

syncNN = True

# Create pipeline
pipeline = dai.Pipeline()

# Define sources and outputs
camRgb = pipeline.create(dai.node.ColorCamera)
spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
nnNetworkOut = pipeline.create(dai.node.XLinkOut)

xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutNN = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)

xoutRgb.setStreamName("rgb")
xoutNN.setStreamName("detections")
xoutDepth.setStreamName("depth")
nnNetworkOut.setStreamName("nnNetwork")

# Properties
camRgb.setPreviewSize(416, 416)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)

monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setCamera("left")
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setCamera("right")

# setting node configs
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
# Align depth map to the perspective of RGB camera, on which inference is done
stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
stereo.setSubpixel(True)

spatialDetectionNetwork.setBlobPath(nnBlobPath)
spatialDetectionNetwork.setConfidenceThreshold(0.5)
spatialDetectionNetwork.input.setBlocking(False)
spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
spatialDetectionNetwork.setDepthLowerThreshold(100)
spatialDetectionNetwork.setDepthUpperThreshold(5000)

# Yolo specific parameters
spatialDetectionNetwork.setNumClasses(80)
spatialDetectionNetwork.setCoordinateSize(4)
spatialDetectionNetwork.setAnchors([10,14, 23,27, 37,58, 81,82, 135,169, 344,319])
spatialDetectionNetwork.setAnchorMasks({ "side26": [1,2,3], "side13": [3,4,5] })
spatialDetectionNetwork.setIouThreshold(0.5)

# Linking
monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)

camRgb.preview.link(spatialDetectionNetwork.input)
if syncNN:
    spatialDetectionNetwork.passthrough.link(xoutRgb.input)
else:
    camRgb.preview.link(xoutRgb.input)

spatialDetectionNetwork.out.link(xoutNN.input)

stereo.depth.link(spatialDetectionNetwork.inputDepth)
spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
spatialDetectionNetwork.outNetwork.link(nnNetworkOut.input)

# Connect to device and start pipeline
with dai.Device(pipeline) as device:

    # Output queues will be used to get the rgb frames and nn data from the outputs defined above
    previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
    detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
    depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
    networkQueue = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False)

    startTime = time.monotonic()
    counter = 0
    fps = 0
    color = (255, 255, 255)
    printOutputLayersOnce = True

    while True:
        inPreview = previewQueue.get()
        inDet = detectionNNQueue.get()
        depth = depthQueue.get()
        inNN = networkQueue.get()

        if printOutputLayersOnce:
            toPrint = 'Output layer names:'
            for ten in inNN.getAllLayerNames():
                toPrint = f'{toPrint} {ten},'
            print(toPrint)
            printOutputLayersOnce = False

        frame = inPreview.getCvFrame()
        depthFrame = depth.getFrame() # depthFrame values are in millimeters

        depth_downscaled = depthFrame[::4]
        if np.all(depth_downscaled == 0):
            min_depth = 0  # Set a default minimum depth value when all elements are zero
        else:
            min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
        max_depth = np.percentile(depth_downscaled, 99)
        depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
        depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)

        counter+=1
        current_time = time.monotonic()
        if (current_time - startTime) > 1 :
            fps = counter / (current_time - startTime)
            counter = 0
            startTime = current_time

        detections = inDet.detections

        # If the frame is available, draw bounding boxes on it and show the frame
        height = frame.shape[0]
        width  = frame.shape[1]
        for detection in detections:
            roiData = detection.boundingBoxMapping
            roi = roiData.roi
            roi = roi.denormalize(depthFrameColor.shape[1], depthFrameColor.shape[0])
            topLeft = roi.topLeft()
            bottomRight = roi.bottomRight()
            xmin = int(topLeft.x)
            ymin = int(topLeft.y)
            xmax = int(bottomRight.x)
            ymax = int(bottomRight.y)
            cv2.rectangle(depthFrameColor, (xmin, ymin), (xmax, ymax), color, 1)

            # Denormalize bounding box
            x1 = int(detection.xmin * width)
            x2 = int(detection.xmax * width)
            y1 = int(detection.ymin * height)
            y2 = int(detection.ymax * height)
            try:
                label = labelMap[detection.label]
            except:
                label = detection.label
            cv2.putText(frame, str(label), (x1 + 10, y1 + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
            cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
            cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)} mm", (x1 + 10, y1 + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
            cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)} mm", (x1 + 10, y1 + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)
            cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)} mm", (x1 + 10, y1 + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, 255)

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, cv2.FONT_HERSHEY_SIMPLEX)

        cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
        cv2.imshow("depth", depthFrameColor)
        cv2.imshow("rgb", frame)

        if cv2.waitKey(1) == ord('q'):
            break

Yabooo0

关注

7
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
OAK摄像机上手实操

这里他用的是yolov3和yolov4，但是yolov5也是可以导入的。在导入之前，我们需要将自己的yolo.pt文件转换成OAK需要的blob格式，可以通过网站https://tools.luxonis.com/转换格式（PS:用公司网可能不行，需要链接自己热点转换）。可以看下结构和参数，不用下载他封装好的exe程序，直接调用api就可以，注释改成中文方便阅读，原link：https://docs.luxonis.com/software/depthai/hello-world/
复制链接

扫一扫