语音报警+realsense相机目标+ssd识别算法+pyinstaller打包

最新推荐文章于 2022-10-29 22:37:58 发布

qq_45774774

最新推荐文章于 2022-10-29 22:37:58 发布

阅读量270

点赞数

分类专栏：测距文章标签： opencv ssd python exe tensorflow

本文链接：https://blog.csdn.net/qq_45774774/article/details/114946945

版权

测距专栏收录该内容

2 篇文章 0 订阅

订阅专栏

语音报警+realsense相机目标+ssd识别算法+pyinstaller打包

简介

1.目标: 用realsense相机实现目标测距，在一定范围内实现报警。本文实现的是检测到人后进行测距，10m范围内会发出报警声。

关于目标识别测距详细信息见前一篇文章：link

2.制作语音文件： http://app.xunjiepdf.com/text2voice link

3.载入模型： 上一篇文章用的faster-rcnn的模型，本文采用ssd算法的模型，cpu占用率降低，画面更加流畅。

代码

# -*- mode: python -*-
import pyrealsense2 as rs
import numpy as np
import cv2
import tensorflow as tf
import os
import _thread
from pydub import AudioSegment
from pydub.playback import play
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # 使用cpu

# gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.33)   # 限制gpu占用
# sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))

def audio():
    song1 = AudioSegment.from_wav(r"warning.wav")
    # 播放.wav文件用该语句
    play(song1)

W = 640
H = 480  # 1280 720

# Configure depth and color streams
pipeline=rs.pipeline()
config=rs.config()
config.enable_stream(rs.stream.depth, W, H, rs.format.z16, 30)
config.enable_stream(rs.stream.color, W, H, rs.format.bgr8, 30)

print("[INFO] start streaming...")
pipeline.start(config)

aligned_stream=rs.align(rs.stream.color)  # 创建对齐对象（深度对齐颜色）
point_cloud=rs.pointcloud()

print("[INFO] loading model...")
PATH_TO_CKPT="./frozen_inference_graph.pb"
# download model from: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API#run-network-in-opencv

# Load the Tensorflow model into memory.
detection_graph=tf.Graph()
with detection_graph.as_default():
    od_graph_def=tf.compat.v1.GraphDef()
    with tf.compat.v1.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph=fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.compat.v1.import_graph_def(od_graph_def, name='')
    sess=tf.compat.v1.Session(graph=detection_graph)

# Input tensor is the image
image_tensor=detection_graph.get_tensor_by_name('image_tensor:0')
# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes=detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores=detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes=detection_graph.get_tensor_by_name('detection_classes:0')
# Number of objects detected
num_detections=detection_graph.get_tensor_by_name('num_detections:0')
# code source of tensorflow model loading: https://www.geeksforgeeks.org/ml-training-image-classifier-using-tensorflow-object-detection-api/

a = 0

while True:
    a += 1
    frames=pipeline.wait_for_frames()
    frames=aligned_stream.process(frames)  # 获取对齐帧集
    depth_frame=frames.get_depth_frame()  # 获取对齐后的深度帧
    color_frame=frames.get_color_frame()  # 获取对齐后的彩色帧

    
    depth_profile=depth_frame.get_profile()
    color_profile=color_frame.get_profile()  # 获取颜色帧内参
    cvsprofile=rs.video_stream_profile(color_profile)
    dvsprofile=rs.video_stream_profile(depth_profile)
    color_intrin=cvsprofile.get_intrinsics()
    color_intrin_part=[color_intrin.ppx, color_intrin.ppy, color_intrin.fx, color_intrin.fy]
    # 提取ppx,ppy,fx,fy
    ppx=color_intrin_part[0]
    ppy=color_intrin_part[1]
    fx=color_intrin_part[2]
    fy=color_intrin_part[3]
    depth_intrin=dvsprofile.get_intrinsics()
    # print('depth_intrin:',depth_intrin)
    extrin=depth_profile.get_extrinsics_to(color_profile)

    if not depth_frame or not color_frame:  # 等待同一帧的彩色跟深度影像才继续执行后续图像处理，两者缺一不可
        continue
    # ------------------ #

    points=point_cloud.calculate(depth_frame)
    verts=np.asanyarray(points.get_vertices()).view(np.float32).reshape(-1, W, 3)  # xyz

    # Convert images to numpy arrays
    color_image=np.asanyarray(color_frame.get_data())
    scaled_size=(int(W), int(H))
    image_expanded=np.expand_dims(color_image, axis=0)
    (boxes, scores, classes, num)=sess.run([detection_boxes, detection_scores, detection_classes, num_detections],
                                           feed_dict={image_tensor: image_expanded})

    boxes=np.squeeze(boxes)
    classes=np.squeeze(classes).astype(np.int32)
    scores=np.squeeze(scores)

    print("[INFO] drawing bounding box on detected objects...")
    print("[INFO] each detected object has a unique color")

    for idx in range(int(num)):
        class_=classes[idx]
        score=scores[idx]
        box=boxes[idx]
        print(" [DEBUG] class : ", class_, "idx : ", idx, "num : ", num)

        if score > 0.8 and class_ == 1:  # 1 for human，检测其他东东自行更改数字
            left=box[1] * W
            top=box[0] * H
            right=box[3] * W
            bottom=box[2] * H

            width=right - left
            height=bottom - top
            bbox=(int(left), int(top), int(width), int(height))
            p1=(int(bbox[0]), int(bbox[1]))  # 左上角x y 坐标(原点在左上)
            p2=(int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))  # 右下角坐标
            print('p1和p2分别为:', p1, p2)
            # draw box
            cv2.rectangle(color_image, p1, p2, (255, 0, 0), 2, 1)
            
            target_xy_pixel=[int(round((p2[0] - p1[0]) / 2) + p1[0]), int(round((p2[1] - p1[1]) / 2) + p1[1])]
            target_depth=depth_frame.get_distance(target_xy_pixel[0], target_xy_pixel[1])

            target_xy_true=[(target_xy_pixel[0] - ppx) * target_depth / fx,
                            (target_xy_pixel[1] - ppy) * target_depth / fy]
            print('识别出目标：{} 中心点像素坐标：({}, {}) 实际坐标(mm)：（{:.3f}，{:.3f}） 深度(m)：{:.3f}'.format(classes[0],
                                                                                           target_xy_pixel[0],
                                                                                           target_xy_pixel[1],
                                                                                           target_xy_true[0] * 1000,
                                                                                           -target_xy_true[1] * 1000,
                                                                                           target_depth))

            target_depth=float("{:.3f}".format(target_depth))
            depth_txt=str(target_depth) + 'm'
            font=cv2.FONT_HERSHEY_SIMPLEX
            bottomLeftCornerOfText=(p1[0], p1[1] + 20)
            fontScale=1
            fontColor=(255, 255, 255)
            lineType=2
            cv2.putText(color_image, depth_txt,
                        bottomLeftCornerOfText,
                        font,
                        fontScale,
                        fontColor,
                        lineType)

            if (a == 1) or (a % 45 == 0):  # 每隔45帧，判断目标并报警
                if target_depth <= 10:  # 小于10m
                    t = _thread.start_new_thread(audio, ())
                    print("有人进入危险区域！！！")
                    
    # Show images
    cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
    cv2.imshow('RealSense', color_image)
    # cv2.waitKey(1)
    if cv2.waitKey(1) & 0xFF == ord('q'):  # 按q键退出程序
        break

# Stop streaming
pipeline.stop()
cv2.destroyAllWindows()