更多创客作品,请关注笔者网站园丁鸟,搜集全球极具创意,且有价值的创客作品
基于前面两节人脸检测,特征值获取及跟踪,我们进一步实现人脸的跟踪,我们可以让机器人跟踪人脸,暨当人脸移动时控制机器人转动,使得人脸始终在图像窗体的中间位置,基本流程如下
人脸检测,lesson 16中已经介绍过了,这里直接引用就可以。
特征获取,lesson 17中也已经实现,但要达到比较好的效果,需要对特征点做一些补偿及噪声点的剔除
人脸跟踪,需要根据ROI的位置,来判断机器人需要怎样移动才能达到跟踪的效果
- 人脸检测及特征获取
源文件face_tracker2.py请见github,代码已经在opencv3环境完成测试
FaceTracker继承FaceDetector, LKTracker两个类,并做了如下的主要修改和扩展
重写process_image函数
add_keypoints函数用于发现新的特征点,其调用goodFeaturesToTrack方法,并判断与当前特征点cluster的距离,以保证添加有效的新特征点
drop_keypoints函数采用聚类算法,将无效的特征点剔除
1.1 源代码
#!/usr/bin/env python
import roslib
import rospy
import cv2
import numpy as np
from face_detector import FaceDetector
from lk_tracker import LKTracker
class FaceTracker(FaceDetector, LKTracker):
def __init__(self, node_name):
super(FaceTracker, self).__init__(node_name)
self.n_faces = rospy.get_param("~n_faces", 1)
self.show_text = rospy.get_param("~show_text", True)
self.show_add_drop = rospy.get_param("~show_add_drop", False)
self.feature_size = rospy.get_param("~feature_size", 1)
self.use_depth_for_tracking = rospy.get_param("~use_depth_for_tracking", False)
self.min_keypoints = rospy.get_param("~min_keypoints", 20)
self.abs_min_keypoints = rospy.get_param("~abs_min_keypoints", 6)
self.std_err_xy = rospy.get_param("~std_err_xy", 2.5)
self.pct_err_z = rospy.get_param("~pct_err_z", 0.42)
self.max_mse = rospy.get_param("~max_mse", 10000)
self.add_keypoint_distance = rospy.get_param("~add_keypoint_distance", 10)
self.add_keypoints_interval = rospy.get_param("~add_keypoints_interval", 1)
self.drop_keypoints_interval = rospy.get_param("~drop_keypoints_interval", 1)
self.expand_roi_init = rospy.get_param("~expand_roi", 1.02)
self.expand_roi = self.expand_roi_init
self.face_tracking = True
self.frame_index = 0
self.add_index = 0
self.drop_index = 0
self.keypoints = list()
self.detect_box = None
self.track_box = None
self.grey = None
self.prev_grey = None
def process_image(self, cv_image):
try:
# Create a greyscale version of the image
self.grey = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
# Equalize the grey histogram to minimize lighting effects
self.grey = cv2.equalizeHist(self.grey)
# Step 1: Detect the face if we haven't already
if self.detect_box is None:
self.keypoints = list()
self.track_box = None
self.detect_box = self.detect_face(self.grey)
else:
# Step 2: If we aren't yet tracking keypoints, get them now
if not self.track_box or not self.is_rect_nonzero(self.track_box):
self.track_box = self.detect_box
self.keypoints = self.get_keypoints(self.grey, self.track_box)
# Store a copy of the current grey image used for LK tracking
if self.prev_grey is None:
self.prev_grey = self.grey
# Step 3: If we have keypoints, track them using optical flow
self.track_box = self.track_keypoints(self.grey, self.prev_grey)
# Step 4: Drop keypoints that are too far from the main cluster
if self.frame_index % self.drop_keypoints_interval == 0 and len(self.keypoints) > 0:
((cog_x, cog_y, cog_z), mse_xy, mse_z, score) = self.drop_keypoints(self.abs_min_keypoints, self.std_err_xy, self.max_mse)
if score == -1:
self.detect_box = None
self.track_box = None
return cv_image
# Step 5: Add keypoints if the number is getting too low
if self.frame_index % self.add_keypoints_interval == 0 and len(self.keypoints) < self.min_keypoints:
self.expand_roi = self.expand_roi_init * self.expand_roi
self.add_keypoints(self.track_box)
else:
self.frame_index += 1
self.expand_roi = self.expand_roi_init
# Store a copy of the current grey image used for LK tracking
self.prev_grey = self.grey
# Process any special keyboard commands for this module
self.prev_grey = self.grey
# Process any special keyboard commands for this module
if 32 <= self.keystroke and self.keystroke < 128:
cc = chr(self.keystroke).lower()
if cc == 'c':
self.keypoints = []
self.track_box = None
self.detect_box = None
elif cc == 'd':
self.show_add_drop = not self.show_add_drop
except AttributeError:
pass
return cv_image
def add_keypoints(self, track_box):
# Look for any new keypoints around the current keypoints
# Begin with a mask of all black pixels
mask = np.zeros_like(self.grey)
# Get the coordinates and dimensions of the current track box
try:
((x,y), (w,h), a) = track_box
except:
try:
x,y,w,h = track_box
except:
rospy.loginfo("Track box has shrunk to zero...")
return
x = int(x)
y = int(y)
# Expand the track box to look for new keypoints
w_new = int(self.expand_roi * w)
h_new = int(self.expand_roi * h)
pt1 = (x - int(w_new / 2), y - int(h_new / 2))
pt2 = (x + int(w_new / 2), y + int(h_new / 2))
mask_box = ((x, y), (w_new, h_new), a)
# Display the expanded ROI with a yellow rectangle
if self.show_add_drop:
cv2.rectangle(self.marker_image, pt1, pt2, (255, 255, 0))
# Create a filled white ellipse within the track_box to define the ROI
cv2.ellipse(mask, mask_box, (255,255, 255), cv2.FILLED)
if self.keypoints is not None:
# Mask the current keypoints
for x, y in [np.int32(p) for p in self.keypoints]:
cv2.circle(mask, (x, y), 5, 0, -1)
new_keypoints = cv2.goodFeaturesToTrack(self.grey, mask = mask, **self.gf_params)
# Append new keypoints to the current list if they are not
# too far from the current cluster
if new_keypoints is not None:
for x, y in np.float32(new_keypoints).reshape(-1, 2):
distance = self.distance_to_cluster((x,y), self.keypoints)
if distance > self.add_keypoint_distance:
self.keypoints.append((x,y))
# Briefly display a blue disc where the new point is added
if self.show_add_drop:
cv2.circle(self.marker_image, (x, y), 3, (255, 255, 0, 0), cv2.FILLED, 2, 0)
# Remove duplicate keypoints
self.keypoints = list(set(self.keypoints))
def distance_to_cluster(self, test_point, cluster):
min_distance = 10000
for point in cluster:
if point == test_point:
continue
# Use L1 distance since it is faster than L2
distance = abs(test_point[0] - point[0]) + abs(test_point[1] - point[1])
if distance < min_distance:
min_distance = distance
return min_distance
def drop_keypoints(self, min_keypoints, outlier_threshold, mse_threshold):
sum_x = 0
sum_y = 0
sum_z = 0
sse = 0
keypoints_xy = self.keypoints
keypoints_z = self.keypoints
n_xy = len(self.keypoints)
n_z = n_xy
if self.use_depth_for_tracking:
if self.depth_image is None:
return ((0, 0, 0), 0, 0, -1)
# If there are no keypoints left to track, start over
if n_xy == 0:
return ((0, 0, 0), 0, 0, -1)
# Compute the COG (center of gravity) of the cluster
for point in self.keypoints:
sum_x = sum_x + point[0]
sum_y = sum_y + point[1]
mean_x = sum_x / n_xy
mean_y = sum_y / n_xy
if self.use_depth_for_tracking:
for point in self.keypoints:
try:
z = cv2.get2D(self.depth_image, min(self.frame_height - 1, int(point[1])), min(self.frame_width - 1, int(point[0])))
except:
continue
z = z[0]
# Depth values can be NaN which should be ignored
if isnan(z):
continue
else:
sum_z = sum_z + z
mean_z = sum_z / n_z
else:
mean_z = -1
# Compute the x-y MSE (mean squared error) of the cluster in the camera plane
for point in self.keypoints:
sse = sse + (point[0] - mean_x) * (point[0] - mean_x) + (point[1] - mean_y) * (point[1] - mean_y)
#sse = sse + abs((point[0] - mean_x)) + abs((point[1] - mean_y))
# Get the average over the number of feature points
mse_xy = sse / n_xy
# The MSE must be > 0 for any sensible feature cluster
if mse_xy == 0 or mse_xy > mse_threshold:
return ((0, 0, 0), 0, 0, -1)
# Throw away the outliers based on the x-y variance
max_err = 0
for point in self.keypoints:
std_err = ((point[0] - mean_x) * (point[0] - mean_x) + (point[1] - mean_y) * (point[1] - mean_y)) / mse_xy
if std_err > max_err:
max_err = std_err
if std_err > outlier_threshold:
keypoints_xy.remove(point)
if self.show_add_drop:
# Briefly mark the removed points in red
cv2.circle(self.marker_image, (point[0], point[1]), 3, (0, 0, 255), cv2.FILLED, 2, 0)
try:
keypoints_z.remove(point)
n_z = n_z - 1
except:
pass
n_xy = n_xy - 1
# Now do the same for depth
if self.use_depth_for_tracking:
sse = 0
for point in keypoints_z:
try:
z = cv2.get2D(self.depth_image, min(self.frame_height - 1, int(point[1])), min(self.frame_width - 1, int(point[0])))
z = z[0]
sse = sse + (z - mean_z) * (z - mean_z)
except:
n_z = n_z - 1
if n_z != 0:
mse_z = sse / n_z
else:
mse_z = 0
# Throw away the outliers based on depth using percent error
# rather than standard error since depth values can jump
# dramatically at object boundaries
for point in keypoints_z:
try:
z = cv2.get2D(self.depth_image, min(self.frame_height - 1, int(point[1])), min(self.frame_width - 1, int(point[0])))
z = z[0]
except:
continue
try:
pct_err = abs(z - mean_z) / mean_z
if pct_err > self.pct_err_z:
keypoints_xy.remove(point)
if self.show_add_drop:
# Briefly mark the removed points in red
cv2.circle(self.marker_image, (point[0], point[1]), 2, (0, 0, 255), cv2.FILLED)
except:
pass
else:
mse_z = -1
self.keypoints = keypoints_xy
# Consider a cluster bad if we have fewer than min_keypoints left
if len(self.keypoints) < min_keypoints:
score = -1
else:
score = 1
return ((mean_x, mean_y, mean_z), mse_xy, mse_z, score)
if __name__ == '__main__':
try:
node_name = "face_tracker"
FaceTracker(node_name)
rospy.spin()
except KeyboardInterrupt:
print "Shutting down face tracker node."
cv2.destroyAllWindows()
1.2 launch文件
<launch>
<node pkg="diego_vision" name="face_tracker2" type="face_tracker2.py" output="screen">
<remap from="input_rgb_image" to="/camera/rgb/image_color" />
<remap from="input_depth_image" to="/camera/depth/image" />
<rosparam>
use_depth_for_tracking: True
min_keypoints: 20
abs_min_keypoints: 6
add_keypoint_distance: 10
std_err_xy: 2.5
pct_err_z: 0.42
max_mse: 10000
add_keypoints_interval: 1
drop_keypoints_interval: 1
show_text: True
show_features: True
show_add_drop: False
feature_size: 1
expand_roi: 1.02
gf_maxCorners: 200
gf_qualityLevel: 0.02
gf_minDistance: 7
gf_blockSize: 10
gf_useHarrisDetector: False
gf_k: 0.04
haar_scaleFactor: 1.3
haar_minNeighbors: 3
haar_minSize: 30
haar_maxSize: 150
</rosparam>
<param name="cascade_1" value="$(find diego_vision)/data/haar_detectors/haarcascade_frontalface_alt2.xml" />
<param name="cascade_2" value="$(find diego_vision)/data/haar_detectors/haarcascade_frontalface_alt.xml" />
<param name="cascade_3" value="$(find diego_vision)/data/haar_detectors/haarcascade_profileface.xml" />
</node>
</launch>
在Diego1 plus中使用深度相机,故把use_depth_for_tracking参数设置为True,实践表明使用深度相机会有更好的效果,这时我们分别在两个terminal中启动openni节点,和face_tracker2.py节点,会出现视频窗口,如果有人脸出现在面前就会被捕捉到,并被标识出来,而且效果也不错。没有想到是Diego1 plus的配置下,人脸识别任然会感觉到有滞后的现象。
roslaunch diego_vision openni_node.launch
roslaucn diego_vision face_tracker2.launch
2.人脸跟踪
源文件object_tracker2.py请见github,代码已经在opencv3环境完成测试
ObjectTracker类主要功能如下:
订阅ROI信息,此消息来由FaceTracker类发布,表示捕捉到的人脸的位置,ObjectTracker订阅此消息来判断人脸在画面中的位置
订阅/camera/depth/image消息,判断人脸距离摄像头的位置,以实现跟踪
发布Twist消息,控制机器人的移动
2.1 源代码
#!/usr/bin/env python
import roslib
import rospy
from sensor_msgs.msg import Image, RegionOfInterest, CameraInfo
from geometry_msgs.msg import Twist
from math import copysign, isnan
from cv_bridge import CvBridge, CvBridgeError
import numpy as np
class ObjectTracker():
def __init__(self):
rospy.init_node("object_tracker")
# Set the shutdown function (stop the robot)
rospy.on_shutdown(self.shutdown)
# How often should we update the robot's motion?
self.rate = rospy.get_param("~rate", 10)
r = rospy.Rate(self.rate)
# The maximum rotation speed in radians per second
self.max_rotation_speed = rospy.get_param("~max_rotation_speed", 2.0)
# The minimum rotation speed in radians per second
self.min_rotation_speed = rospy.get_param("~min_rotation_speed", 0.5)
# The x threshold (% of image width) indicates how far off-center
# the ROI needs to be in the x-direction before we react
self.x_threshold = rospy.get_param("~x_threshold", 0.1)
# The maximum distance a target can be from the robot for us to track
self.max_z = rospy.get_param("~max_z", 2.0)
# Initialize the global ROI
self.roi = RegionOfInterest()
# The goal distance (in meters) to keep between the robot and the person
self.goal_z = rospy.get_param("~goal_z", 0.6)
# How far away from the goal distance (in meters) before the robot reacts
self.z_threshold = rospy.get_param("~z_threshold", 0.05)
# How far away from being centered (x displacement) on the person
# before the robot reacts
self.x_threshold = rospy.get_param("~x_threshold", 0.05)
# How much do we weight the goal distance (z) when making a movement
self.z_scale = rospy.get_param("~z_scale", 1.0)
# How much do we weight x-displacement of the person when making a movement
self.x_scale = rospy.get_param("~x_scale", 2.0)
# The max linear speed in meters per second
self.max_linear_speed = rospy.get_param("~max_linear_speed", 0.3)
# The minimum linear speed in meters per second
self.min_linear_speed = rospy.get_param("~min_linear_speed", 0.1)
# Publisher to control the robot's movement
self.cmd_vel_pub = rospy.Publisher('cmd_vel', Twist)
# Intialize the movement command
self.move_cmd = Twist()
# We will get the image width and height from the camera_info topic
self.image_width = 0
self.image_height = 0
# We need cv_bridge to convert the ROS depth image to an OpenCV array
self.cv_bridge = CvBridge()
self.depth_array = None
# Set flag to indicate when the ROI stops updating
self.target_visible = False
# Wait for the camera_info topic to become available
rospy.loginfo("Waiting for camera_info topic...")
rospy.wait_for_message('camera_info', CameraInfo)
# Subscribe to the camera_info topic to get the image width and height
rospy.Subscriber('camera_info', CameraInfo, self.get_camera_info)
# Wait until we actually have the camera data
while self.image_width == 0 or self.image_height == 0:
rospy.sleep(1)
# Subscribe to the registered depth image
rospy.Subscriber("depth_image", Image, self.convert_depth_image)
# Wait for the depth image to become available
rospy.wait_for_message('depth_image', Image)
# Subscribe to the ROI topic and set the callback to update the robot's motion
rospy.Subscriber('roi', RegionOfInterest, self.set_cmd_vel)
# Wait until we have an ROI to follow
rospy.loginfo("Waiting for an ROI to track...")
rospy.wait_for_message('roi', RegionOfInterest)
rospy.loginfo("ROI messages detected. Starting tracker...")
# Begin the tracking loop
while not rospy.is_shutdown():
# If the target is not visible, stop the robot
if not self.target_visible:
self.move_cmd = Twist()
else:
# Reset the flag to False by default
self.target_visible = False
# Send the Twist command to the robot
self.cmd_vel_pub.publish(self.move_cmd)
# Sleep for 1/self.rate seconds
r.sleep()
def set_cmd_vel(self, msg):
# If the ROI has a width or height of 0, we have lost the target
if msg.width == 0 or msg.height == 0:
return
# If the ROI stops updating this next statement will not happen
self.target_visible = True
self.roi = msg
# Compute the displacement of the ROI from the center of the image
target_offset_x = msg.x_offset + msg.width / 2 - self.image_width / 2
try:
percent_offset_x = float(target_offset_x) / (float(self.image_width) / 2.0)
except:
percent_offset_x = 0
# Intialize the movement command
self.move_cmd = Twist()
# Rotate the robot only if the displacement of the target exceeds the threshold
if abs(percent_offset_x) > self.x_threshold:
# Set the rotation speed proportional to the displacement of the target
try:
speed = percent_offset_x * self.x_scale
self.move_cmd.angular.z = -copysign(max(self.min_rotation_speed,
min(self.max_rotation_speed, abs(speed))), speed)
except:
pass
# Now compute the depth component
n_z = sum_z = mean_z = 0
# Get the min/max x and y values from the ROI
min_x = self.roi.x_offset
max_x = min_x + self.roi.width
min_y = self.roi.y_offset
max_y = min_y + self.roi.height
# Get the average depth value over the ROI
for x in range(min_x, max_x):
for y in range(min_y, max_y):
try:
z = self.depth_array[y, x]
except:
continue
# Depth values can be NaN which should be ignored
if isnan(z) or z > self.max_z:
continue
else:
sum_z = sum_z + z
n_z += 1
try:
mean_z = sum_z / n_z
if mean_z < self.max_z and (abs(mean_z - self.goal_z) > self.z_threshold):
speed = (mean_z - self.goal_z) * self.z_scale
self.move_cmd.linear.x = copysign(min(self.max_linear_speed, max(self.min_linear_speed, abs(speed))), speed)
except:
pass
def convert_depth_image(self, ros_image):
# Use cv_bridge() to convert the ROS image to OpenCV format
try:
# The depth image is a single-channel float32 image
depth_image = self.cv_bridge.imgmsg_to_cv2(ros_image, "32FC1")
except CvBridgeError, e:
print e
# Convert the depth image to a Numpy array
self.depth_array = np.array(depth_image, dtype=np.float32)
def get_camera_info(self, msg):
self.image_width = msg.width
self.image_height = msg.height
def shutdown(self):
rospy.loginfo("Stopping the robot...")
self.cmd_vel_pub.publish(Twist())
rospy.sleep(1)
if __name__ == '__main__':
try:
ObjectTracker()
rospy.spin()
except rospy.ROSInterruptException:
rospy.loginfo("Object tracking node terminated.")
2.2 launch文件
<launch>
<param name="/camera/driver/depth_registration" value="True" />
<node pkg="diego_vision" name="object_tracker" type="object_tracker2.py" output="screen">
<remap from="camera_info" to="/camera/depth/camera_info" />
<remap from="depth_image" to="/camera/depth/image" />
<rosparam>
rate: 10
max_z: 2.0
goal_z: 0.6
z_threshold: 0.05
x_threshold: 0.1
z_scale: 1.0
x_scale: 0.1
max_rotation_speed: 0.2
min_rotation_speed: 0.02
max_linear_speed: 0.2
min_linear_speed: 0.05
</rosparam>
</node>
</launch>
在已经启动face_tracker2.py节点的情况下,执行如下代码启动object_tracker2.py节点,机器人就会跟随检测到的人脸移动。
roslaunch diego_vision object_tracker2.launch
需要特别说明的是在launch文件中有关速度的参数不能设置的太大,太大会导致机器人不停转来转去来跟踪人脸,原因是因为图像处理有滞后,速度乘以滞后时间后,就会导致机器人移动的位置超出了预期的位置,不停的矫正。