KeypointExtractUsingDlib
Extract the key point from the face point cloud.
使用Intel Realsense相机提取出包含人脸的点云中人脸的关键点,使用了dlib库对点云中的人脸进行识别。
识别点数为68点,也可使用5点,但需更改部分代码。
有些代码直接使用了librealsense的dlib示例。
github链接:https://github.com/binfenseca2969/KeypointExtractUsingDlib
参考代码
librealsense的dlib示例:https://github.com/IntelRealSense/librealsense/tree/master/wrappers/dlib
利用IntelRealSense D435i 提取一帧pcl::PointXYZRGB图像(C++):https://blog.csdn.net/m0_56838271/article/details/121157332
代码环境
Windows 10 22H2,PCL-1.12.0,librealsense-2.53.1,dlib-19.24,使用Visual Studio 2019,RelWithDebInfo,CMake-3.20,使用的Realsense相机为D405。
附件下载
shape_predictor_68_face_landmarks.dat下载网址:http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
结果
平板找的Lena图,红色为识别出的关键点,真人也可以(但是不想放自己的照片……)。
代码
CMakelists.txt
# CMakeList.txt: KeypointExtractUsingDlibNew 的 CMake 项目,在此处包括源代码并定义
# 项目特定的逻辑。
#
cmake_minimum_required (VERSION 3.12)
project ("KeypointExtractUsingDlib")
find_package(realsense2 REQUIRED)
if(NOT realsense2_FOUND)
message(FATAL_ERROR "Error: Unable to find the realsense2 folder.")
endif()
find_package(dlib REQUIRED)
if(NOT dlib_FOUND)
message(FATAL_ERROR "Error: Unable to find the dlib folder.")
endif()
find_package(PCL REQUIRED)
if(NOT PCL_FOUND)
message(FATAL_ERROR "Error: Unable to find the PCL folder.")
endif()
# 将源代码添加到此项目的可执行文件。
add_executable (KeypointExtractUsingDlib "KeypointExtractUsingDlibNew.cpp" "KeypointExtractUsingDlibNew.h" "rs_frame_image.h" "markup_68.h")
target_link_libraries(KeypointExtractUsingDlib PRIVATE
dlib::dlib ${realsense2_LIBRARY} ${PCL_LIBRARIES})
target_include_directories(KeypointExtractUsingDlib PRIVATE
${dlib_INCLUDE_DIRS} ${realsense2_INCLUDE_DIR} ${PCL_INCLUDE_DIRS})
# TODO: 如有需要,请添加测试并安装目标。
markup_68.h
// License: Apache 2.0. See LICENSE file in root directory.
// Copyright(c) 2019 Intel Corporation. All Rights Reserved.
#pragma once
/*
The 68-point annotations for the iBUG 300-W face landmark dataset.
See this picture:
https://ibug.doc.ic.ac.uk/media/uploads/images/annotpics/figure_68_markup.jpg
NOTE: the indexes in the picture are 1-based, so the actual C++ indexes are less 1.
NOTE: "Right" and "left" refer to the face being described, so are the mirror of the
side that an onlooker (from the front) would see.
*/
enum markup_68
{
// Starting with right ear, the jaw [1-17]
RIGHT_EAR, JAW_FROM = RIGHT_EAR, RIGHT_JAW_FROM = RIGHT_EAR,
RIGHT_1, RIGHT_2, RIGHT_3, RIGHT_4, RIGHT_5, RIGHT_6, RIGHT_7, RIGHT_JAW_TO = RIGHT_7,
CHIN, CHIN_FROM = CHIN - 1, CHIN_TO = CHIN + 1,
LEFT_7 = CHIN + 1, LEFT_JAW_FROM = LEFT_7, LEFT_6, LEFT_5, LEFT_4, LEFT_3, LEFT_2, LEFT_1,
LEFT_EAR, LEFT_JAW_TO = LEFT_EAR, JAW_TO = LEFT_EAR,
// Eyebrows [18-22] and [23-27]
RIGHT_EYEBROW_R, RIGHT_EYEBROW_FROM = RIGHT_EYEBROW_R, RIGHT_EYEBROW_1, RIGHT_EYEBROW_2, RIGHT_EYEBROW_3, RIGHT_EYEBROW_L, RIGHT_EYEBROW_TO = RIGHT_EYEBROW_L,
LEFT_EYEBROW_R, LEFT_EYEBROW_FROM = LEFT_EYEBROW_R, LEFT_EYEBROW_1, LEFT_EYEBROW_2, LEFT_EYEBROW_3, LEFT_EYEBROW_L, LEFT_EYEBROW_TO = LEFT_EYEBROW_L,
// Nose [28-36]
NOSE_RIDGE_TOP, NOSE_RIDGE_FROM = NOSE_RIDGE_TOP, NOSE_RIDGE_1, NOSE_RIDGE_2, NOSE_TIP, NOSE_RIDGE_TO = NOSE_TIP,
NOSE_BOTTOM_R, NOSE_BOTTOM_FROM = NOSE_BOTTOM_R, NOSE_BOTTOM_1, NOSE_BOTTOM_2, NOSE_BOTTOM_3, NOSE_BOTTOM_L, NOSE_BOTTOM_TO = NOSE_BOTTOM_L,
// Eyes [37-42] and [43-48]
RIGHT_EYE_R, RIGHT_EYE_FROM = RIGHT_EYE_R, RIGHT_EYE_1, RIGHT_EYE_2, RIGHT_EYE_L, RIGHT_EYE_4, RIGHT_EYE_5, RIGHT_EYE_TO = RIGHT_EYE_5,
LEFT_EYE_R, LEFT_EYE_FROM = LEFT_EYE_R, LEFT_EYE_1, LEFT_EYE_2, LEFT_EYE_L, LEFT_EYE_4, LEFT_EYE_5, LEFT_EYE_TO = LEFT_EYE_5,
// Mouth [49-68]
MOUTH_R, MOUTH_OUTER_R = MOUTH_R, MOUTH_OUTER_FROM = MOUTH_OUTER_R, MOUTH_OUTER_1, MOUTH_OUTER_2, MOUTH_OUTER_TOP, MOUTH_OUTER_4, MOUTH_OUTER_5,
MOUTH_L, MOUTH_OUTER_L = MOUTH_L, MOUTH_OUTER_7, MOUTH_OUTER_8, MOUTH_OUTER_BOTTOM, MOUTH_OUTER_10, MOUTH_OUTER_11, MOUTH_OUTER_TO = MOUTH_OUTER_11,
MOUTH_INNER_R, MOUTH_INNER_FROM = MOUTH_INNER_R, MOUTH_INNER_1, MOUTH_INNER_TOP, MOUTH_INNER_3,
MOUTH_INNER_L, MOUTH_INNER_5, MOUTH_INNER_BOTTOM, MOUTH_INNER_7, MOUTH_INNER_TO = MOUTH_INNER_7,
N_POINTS
};
rs_frame_image.h
// License: Apache 2.0. See LICENSE file in root directory.
// Copyright(c) 2019 Intel Corporation. All Rights Reserved.
#pragma once
/*
This encapsulates data needed for a dlib-compatible image that originates
in a librealsense color frame.
The data from the color frame is not copied, and so this object should not
live beyond the lifetime of the frame.
*/
template <
typename pixel_type, // The dlib type, e.g. rgb_pixel
int RS_FORMAT // The corresponding RS2_FORMAT, e.g. RS2_FORMAT_RGB8
>
class rs_frame_image
{
void * _data;
long _nr;
long _nc;
rs2::video_frame _frame; // To keep the frame data alive
public:
rs_frame_image( rs2::video_frame const & f )
: _data( const_cast< void * >( f.get_data() ))
, _nc( f.get_width() )
, _nr( f.get_height() )
, _frame( f )
{
if( f.get_profile().format() != RS_FORMAT )
{
throw std::runtime_error( "unsupported Frame format" );
}
}
rs_frame_image() : _data( nullptr ), _nr( 0 ), _nc( 0 ) {}
size_t size() const { return static_cast<size_t>(_nr *_nc); }
void * data() { return _data; }
void const * data() const { return _data; }
long nr() const { return _nr; }
long nc() const { return _nc; }
long width_step() const { return _nc * sizeof( pixel_type ); }
pixel_type const * operator[]( unsigned const row ) const
{
return reinterpret_cast< const pixel_type * >( _data + width_step() * row );
}
rs_frame_image & operator=( const rs_frame_image & ) = default;
};
/*!
In dlib, an "image" is any object that implements the generic image interface. In
particular, this simply means that an image type (let's refer to it as image_type
from here on) has the following seven global functions defined for it:
- long num_rows (const image_type& img)
- long num_columns (const image_type& img)
- void set_image_size( image_type& img, long rows, long cols)
- void* image_data ( image_type& img)
- const void* image_data (const image_type& img)
- long width_step (const image_type& img)
- void swap ( image_type& a, image_type& b)
And also provides a specialization of the image_traits template that looks like:
namespace dlib
{
template <>
struct image_traits<image_type>
{
typedef the_type_of_pixel_used_in_image_type pixel_type;
};
}
Additionally, an image object must be default constructable. This means that
expressions of the form:
image_type img;
Must be legal.
Finally, the type of pixel in image_type must have a pixel_traits specialization.
That is, pixel_traits<typename image_traits<image_type>::pixel_type> must be one of
the specializations of pixel_traits.
(see http://dlib.net/dlib/image_processing/generic_image.h.html)
*/
namespace dlib
{
// Define the global functions that make rs_frame_image a proper "generic image" according to
// $(DLIB_DIR)/image_processing/generic_image.h
// These are all template specializations for existing classes/structs/functions to adapt to the
// new class rs_frame_image that we defined above.
template <typename T, int RS>
struct image_traits< rs_frame_image< T, RS > >
{
typedef T pixel_type;
};
template <typename T, int RS>
inline long num_rows( const rs_frame_image<T,RS>& img ) { return img.nr(); }
template <typename T, int RS>
inline long num_columns( const rs_frame_image<T,RS>& img ) { return img.nc(); }
template <typename T, int RS>
inline void* image_data(
rs_frame_image<T,RS>& img
)
{
return img.data();
}
template <typename T, int RS>
inline const void* image_data(
const rs_frame_image<T,RS>& img
)
{
return img.data();
}
template <typename T, int RS>
inline long width_step(
rs_frame_image< T, RS > const & img
)
{
return img.width_step();
}
}
KeypointExtractUsingDlibNew.h
// KeypointExtractUsingDlibNew.h: 标准系统包含文件的包含文件
// 或项目特定的包含文件。
#pragma once
#include <iostream>
// TODO: 在此处引用程序需要的其他标头。
//librealsense2
#include <librealsense2/rs.hpp>
//pcl
#include <pcl/point_types.h>
#include <pcl/filters/passthrough.h>
#include <pcl/visualization/pcl_visualizer.h>
#include <pcl/visualization/cloud_viewer.h>
#include <pcl/io/ply_io.h>
//dlib
#include <dlib/image_processing/frontal_face_detector.h>
// #include <dlib/image_processing/render_face_detections.h>
#include <dlib/image_processing.h>
//others
#include <vector>
#include "rs_frame_image.h"
#include "markup_68.h"
KeypointExtractUsingDlibNew.cpp
// KeypointExtractUsingDlibNew.cpp: 定义应用程序的入口点。
//
#include "KeypointExtractUsingDlibNew.h"
std::tuple<uint8_t, uint8_t, uint8_t> get_texcolor(rs2::video_frame texture, rs2::texture_coordinate texcoords)
{
const int w = texture.get_width(), h = texture.get_height();
int x = std::min(std::max(int(texcoords.u * w + .5f), 0), w - 1);
int y = std::min(std::max(int(texcoords.v * h + .5f), 0), h - 1);
int idx = x * texture.get_bytes_per_pixel() + y * texture.get_stride_in_bytes();
const auto texture_data = reinterpret_cast<const uint8_t*>(texture.get_data());
return std::tuple<uint8_t, uint8_t, uint8_t>(
texture_data[idx], texture_data[idx + 1], texture_data[idx + 2]);
}
pcl::PointCloud<pcl::PointXYZRGB>::Ptr points_to_pcl(const rs2::points& points, const rs2::video_frame& color)
{
pcl::PointCloud<pcl::PointXYZRGB>::Ptr cloud(new pcl::PointCloud<pcl::PointXYZRGB>);
auto sp = points.get_profile().as<rs2::video_stream_profile>();
cloud->width = sp.width();
cloud->height = sp.height();
cloud->is_dense = false;
cloud->points.resize(points.size());
auto tex_coords = points.get_texture_coordinates();
auto vertices = points.get_vertices();
for (int i = 0; i < points.size(); ++i)
{
cloud->points[i].x = vertices[i].x;
cloud->points[i].y = vertices[i].y;
cloud->points[i].z = vertices[i].z;
std::tuple<uint8_t, uint8_t, uint8_t> current_color;
current_color = get_texcolor(color, tex_coords[i]);
cloud->points[i].r = std::get<0>(current_color);
cloud->points[i].g = std::get<1>(current_color);
cloud->points[i].b = std::get<2>(current_color);
if (i < 500)
std::cout << int(cloud->points[i].r) << "\t" << int(cloud->points[i].g) << "\t" << int(cloud->points[i].b) << "\t" << std::endl;
}
return cloud;
}
int main(int argc, char* argv[])
{
boost::shared_ptr<pcl::visualization::PCLVisualizer> viewer(new pcl::visualization::PCLVisualizer("3D Viewer"));
rs2::pointcloud pc;
rs2::points points;
rs2::pipeline pipe;
pipe.start();
pcl::PLYWriter writer;
for (int i = 1; i < 20; i++)
pipe.wait_for_frames();
auto frames = pipe.wait_for_frames();
auto depth = frames.get_depth_frame();
auto color = frames.get_color_frame();
dlib::frontal_face_detector face_bbox_detector = dlib::get_frontal_face_detector();//创建dlib检测器用于检测人脸
dlib::shape_predictor face_landmark_annotator;
dlib::deserialize("shape_predictor_68_face_landmarks.dat") >> face_landmark_annotator;
/*
shape_predictor_68_face_landmarks.dat下载网址:
http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
*/
rs_frame_image<dlib::rgb_pixel, RS2_FORMAT_RGB8>image(color);//Create a dlib image for face detection
std::vector<dlib::rectangle> face_bboxex = face_bbox_detector(image);
std::vector<dlib::full_object_detection> faces;
for (auto const& bbox : face_bboxex)
faces.push_back(face_landmark_annotator(image, bbox));
pc.map_to(color);
points = pc.calculate(depth);//这里顺序不能反,否则rgb信息不对
auto pcl_points = points_to_pcl(points, color);
for (auto const& face : faces)
{
for (int i = RIGHT_EAR; i <= N_POINTS; ++i)
{
auto pt = face.part(i);
std::cout << "number: " << i << " x: " << pt.x() << " y: " << pt.y() << std::endl;
pcl_points->points[pt.y() * depth.get_width() + pt.x()].r = 255;//将识别到的人脸关键点标红
pcl_points->points[pt.y() * depth.get_width() + pt.x()].g = 0;
pcl_points->points[pt.y() * depth.get_width() + pt.x()].b = 0;
std::cout << "x: " << pcl_points->points[pt.y() * depth.get_width() + pt.x()].x <<
" y: " << pcl_points->points[pt.y() * depth.get_width() + pt.x()].y << " z: " <<
pcl_points->points[pt.y() * depth.get_width() + pt.x()].z << std::endl;
}
}
writer.write("colorful.ply", *pcl_points);
viewer->removeAllPointClouds();
viewer->addPointCloud<pcl::PointXYZRGB>(pcl_points);
while (!viewer->wasStopped()) {
viewer->spinOnce(100);
// boost::this_thread::sleep(boost::posix_time::microseconds(1));
}
return 0;
}