语音报警+realsense相机目标+ssd识别算法+pyinstaller打包
简介
1.目标: 用realsense相机实现目标测距,在一定范围内实现报警。本文实现的是检测到人后进行测距,10m范围内会发出报警声。
关于目标识别测距详细信息见前一篇文章:link
2.制作语音文件: http://app.xunjiepdf.com/text2voice link
3.载入模型: 上一篇文章用的faster-rcnn的模型,本文采用ssd算法的模型,cpu占用率降低,画面更加流畅。
代码
# -*- mode: python -*-
import pyrealsense2 as rs
import numpy as np
import cv2
import tensorflow as tf
import os
import _thread
from pydub import AudioSegment
from pydub.playback import play
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # 使用cpu
# gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.33) # 限制gpu占用
# sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
def audio():
song1 = AudioSegment.from_wav(r"warning.wav")
# 播放.wav文件用该语句
play(song1)
W = 640
H = 480 # 1280 720
# Configure depth and color streams
pipeline=rs.pipeline()
config=rs.config()
config.enable_stream(rs.stream.depth, W, H, rs.format.z16, 30)
config.enable_stream(rs.stream.color, W, H, rs.format.bgr8, 30)
print("[INFO] start streaming...")
pipeline.start(config)
aligned_stream=rs.align(rs.stream.color) # 创建对齐对象(深度对齐颜色)
point_cloud=rs.pointcloud()
print("[INFO] loading model...")
PATH_TO_CKPT="./frozen_inference_graph.pb"
# download model from: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API#run-network-in-opencv
# Load the Tensorflow model into memory.
detection_graph=tf.Graph()
with detection_graph.as_default():
od_graph_def=tf.compat.v1.GraphDef()
with tf.compat.v1.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph=fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.compat.v1.import_graph_def(od_graph_def, name='')
sess=tf.compat.v1.Session(graph=detection_graph)
# Input tensor is the image
image_tensor=detection_graph.get_tensor_by_name('image_tensor:0')
# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes=detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores=detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes=detection_graph.get_tensor_by_name('detection_classes:0')
# Number of objects detected
num_detections=detection_graph.get_tensor_by_name('num_detections:0')
# code source of tensorflow model loading: https://www.geeksforgeeks.org/ml-training-image-classifier-using-tensorflow-object-detection-api/
a = 0
while True:
a += 1
frames=pipeline.wait_for_frames()
frames=aligned_stream.process(frames) # 获取对齐帧集
depth_frame=frames.get_depth_frame() # 获取对齐后的深度帧
color_frame=frames.get_color_frame() # 获取对齐后的彩色帧
depth_profile=depth_frame.get_profile()
color_profile=color_frame.get_profile() # 获取颜色帧内参
cvsprofile=rs.video_stream_profile(color_profile)
dvsprofile=rs.video_stream_profile(depth_profile)
color_intrin=cvsprofile.get_intrinsics()
color_intrin_part=[color_intrin.ppx, color_intrin.ppy, color_intrin.fx, color_intrin.fy]
# 提取ppx,ppy,fx,fy
ppx=color_intrin_part[0]
ppy=color_intrin_part[1]
fx=color_intrin_part[2]
fy=color_intrin_part[3]
depth_intrin=dvsprofile.get_intrinsics()
# print('depth_intrin:',depth_intrin)
extrin=depth_profile.get_extrinsics_to(color_profile)
if not depth_frame or not color_frame: # 等待同一帧的彩色跟深度影像才继续执行后续图像处理,两者缺一不可
continue
# ------------------ #
points=point_cloud.calculate(depth_frame)
verts=np.asanyarray(points.get_vertices()).view(np.float32).reshape(-1, W, 3) # xyz
# Convert images to numpy arrays
color_image=np.asanyarray(color_frame.get_data())
scaled_size=(int(W), int(H))
image_expanded=np.expand_dims(color_image, axis=0)
(boxes, scores, classes, num)=sess.run([detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: image_expanded})
boxes=np.squeeze(boxes)
classes=np.squeeze(classes).astype(np.int32)
scores=np.squeeze(scores)
print("[INFO] drawing bounding box on detected objects...")
print("[INFO] each detected object has a unique color")
for idx in range(int(num)):
class_=classes[idx]
score=scores[idx]
box=boxes[idx]
print(" [DEBUG] class : ", class_, "idx : ", idx, "num : ", num)
if score > 0.8 and class_ == 1: # 1 for human,检测其他东东自行更改数字
left=box[1] * W
top=box[0] * H
right=box[3] * W
bottom=box[2] * H
width=right - left
height=bottom - top
bbox=(int(left), int(top), int(width), int(height))
p1=(int(bbox[0]), int(bbox[1])) # 左上角x y 坐标(原点在左上)
p2=(int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])) # 右下角坐标
print('p1和p2分别为:', p1, p2)
# draw box
cv2.rectangle(color_image, p1, p2, (255, 0, 0), 2, 1)
target_xy_pixel=[int(round((p2[0] - p1[0]) / 2) + p1[0]), int(round((p2[1] - p1[1]) / 2) + p1[1])]
target_depth=depth_frame.get_distance(target_xy_pixel[0], target_xy_pixel[1])
target_xy_true=[(target_xy_pixel[0] - ppx) * target_depth / fx,
(target_xy_pixel[1] - ppy) * target_depth / fy]
print('识别出目标:{} 中心点像素坐标:({}, {}) 实际坐标(mm):({:.3f},{:.3f}) 深度(m):{:.3f}'.format(classes[0],
target_xy_pixel[0],
target_xy_pixel[1],
target_xy_true[0] * 1000,
-target_xy_true[1] * 1000,
target_depth))
target_depth=float("{:.3f}".format(target_depth))
depth_txt=str(target_depth) + 'm'
font=cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText=(p1[0], p1[1] + 20)
fontScale=1
fontColor=(255, 255, 255)
lineType=2
cv2.putText(color_image, depth_txt,
bottomLeftCornerOfText,
font,
fontScale,
fontColor,
lineType)
if (a == 1) or (a % 45 == 0): # 每隔45帧,判断目标并报警
if target_depth <= 10: # 小于10m
t = _thread.start_new_thread(audio, ())
print("有人进入危险区域!!!")
# Show images
cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
cv2.imshow('RealSense', color_image)
# cv2.waitKey(1)
if cv2.waitKey(1) & 0xFF == ord('q'): # 按q键退出程序
break
# Stop streaming
pipeline.stop()
cv2.destroyAllWindows()
pyinstaller打包成可执行文件
- 在终端输入pyinstaller -F detect.py
- 然后将语音文件和模型文件一起放入生成的dist文件夹里即可