windows 基于 MediaPipe 实现 Holistic

在这里插入图片描述
主页: https://google.github.io/mediapipe/solutions/holistic.html

MediaPipe Holistic pipelines 集成了姿势、面部和手部组件的独立模型,每个组件都针对其特定领域进行了优化,每个组件的推断输入图不同。

MediaPipe Holistic 首先通过 BlazePose 的姿势检测器和后续的关键点模型来估计人的姿势。然后,利用推断出的姿势关键点,为每只手和脸部推导出三个感兴趣区域(ROI)裁剪,并采用 re-crop 模型来改进 ROI

然后,pipelines 将全分辨率输入帧上裁剪这些 ROI,并应用特定任务的模型来估计它们对应的关键点。最后,将所有关键点与姿势模型的关键点合并,得出全部 540 多个关键点。


实现过程 🚀​

编译demo

  1. bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 --action_env PYTHON_BIN_PATH="C:xxx//python.exe" mediapipe/examples/desktop/holistic_tracking:holistic_tracking_cpu --verbose_failures
    
    bazel-bin\mediapipe\examples\desktop\holistic_tracking\holistic_tracking_cpu --calculator_graph_config_file=mediapipe/graphs/holistic_tracking/holistic_tracking_cpu.pbtxt
    
  2. 如果运行成功,则默认会调用摄像头

编写测试demo

  1. 环境配置参考:

  2. 修改 mediapipe\examples\desktop\holistic_trackingBUILD

  3. 增加个测试程序节点

    cc_binary(
        name = "holistic_tracking_sample",
        srcs = ["main.cpp","holistic_detect.h","holistic_detect.cpp"],
        # linkshared=True,
        deps = [
            "//mediapipe/graphs/holistic_tracking:holistic_tracking_cpu_graph_deps",
        ],
    )
    
  4. holistic_detect.h

    #ifndef _HOLISTIC_DETECT_H_
    #define _HOLISTIC_DETECT_H_
    
    #include <cstdlib>
    #include "absl/flags/flag.h"
    #include "absl/flags/parse.h"
    #include "mediapipe/framework/calculator_framework.h"
    #include "mediapipe/framework/formats/image_frame.h"
    #include "mediapipe/framework/formats/image_frame_opencv.h"
    #include "mediapipe/framework/port/file_helpers.h"
    #include "mediapipe/framework/port/opencv_highgui_inc.h"
    #include "mediapipe/framework/port/opencv_imgproc_inc.h"
    #include "mediapipe/framework/port/opencv_video_inc.h"
    #include "mediapipe/framework/port/parse_text_proto.h"
    #include "mediapipe/framework/port/status.h"
    #include "mediapipe/framework/formats/detection.pb.h"
    #include "mediapipe/framework/formats/landmark.pb.h"
    #include "mediapipe/framework/formats/rect.pb.h"
    #include "mediapipe/framework/formats/classification.pb.h"
    #include "mediapipe/framework/port/ret_check.h"
    
    namespace mp {
    
    	using holistic_callback_t = std::function<void()>;
    
    	class HolisticDetect
    	{
    	public:
    		int initModel(const char* model_path) noexcept;
    		int detectVideoCamera(holistic_callback_t call);
    		int detectVideo(const char* video_path, int show_image, holistic_callback_t call);
    		int detectVideo();
    		int release();
    	private:
    		absl::Status initGraph(const char* model_path);
    		absl::Status runMPPGraphVideo(const char* video_path, int show_image, holistic_callback_t call);
    		absl::Status releaseGraph();
    
    		void showFaceLandmarks(cv::Mat& image);
    		void showPoseLandmarks(cv::Mat& image);
    		void showLHandLandmarks(cv::Mat& image);
    		void showRHandLandmarks(cv::Mat& image);
    
    		const char* kInputStream = "input_video";
    		const char* kOutputStream = "output_video";
    		const char* kWindowName = "MediaPipe";
    		const char* kOutputLandmarks = "face_landmarks";
    		const char* kOutputPoseLandmarks = "pose_landmarks";
    		const char* kOutputLHandLandmarks = "left_hand_landmarks";
    		const char* kOutputRHandLandmarks = "right_hand_landmarks";
    
    		bool init_ = false;
    		mediapipe::CalculatorGraph graph_;
    
    		const float kMinFloat = std::numeric_limits<float>::lowest();
    		const float kMaxFloat = std::numeric_limits<float>::max();
    
    		std::unique_ptr<mediapipe::OutputStreamPoller> pPoller_;
    		std::unique_ptr<mediapipe::OutputStreamPoller> pPollerFaceLandmarks_;
    		std::unique_ptr<mediapipe::OutputStreamPoller> pPollerPoseLandmarks_;
    		std::unique_ptr<mediapipe::OutputStreamPoller> pPollerLHandLandmarks_;
    		std::unique_ptr<mediapipe::OutputStreamPoller> pPollerRHandLandmarks_;
    	};
    }
    
    #endif
    
    
  5. holistic_detect.cpp

    #include "holistic_detect.h"
    using namespace mp;
    
    int HolisticDetect::initModel(const char* model_path) noexcept {
    	absl::Status run_status = initGraph(model_path);
    	if (!run_status.ok())
    		return -1;
    	init_ = true;
    	return  1;
    }
    
    int HolisticDetect::detectVideoCamera(holistic_callback_t call) {
    	if (!init_)
    		return -1;
    
    	absl::Status run_status = runMPPGraphVideo("", true, call);
    	return run_status.ok() ? 1 : -1;
    }
    
    int HolisticDetect::detectVideo(const char* video_path, int show_image, holistic_callback_t call) {
    	if (!init_)
    		return -1;
    
    	absl::Status run_status = runMPPGraphVideo(video_path, show_image, call);
    	return run_status.ok() ? 1 : -1;
    }
    
    int HolisticDetect::detectVideo() {
    	if (!init_)
    		return -1;
    	absl::Status run_status = runMPPGraphVideo("", 1, nullptr);
    	return run_status.ok() ? 1 : -1;
    }
    
    int HolisticDetect::release() {
    	absl::Status run_status = releaseGraph();
    	return run_status.ok() ? 1 : -1;
    }
    
    absl::Status HolisticDetect::initGraph(const char* model_path) {
    
    	std::string calculator_graph_config_contents;
    	MP_RETURN_IF_ERROR(mediapipe::file::GetContents(model_path, &calculator_graph_config_contents));
    	mediapipe::CalculatorGraphConfig config =
    		mediapipe::ParseTextProtoOrDie<mediapipe::CalculatorGraphConfig>(
    			calculator_graph_config_contents);
    	MP_RETURN_IF_ERROR(graph_.Initialize(config));
    	auto sop = graph_.AddOutputStreamPoller(kOutputStream);
    	assert(sop.ok());
    	pPoller_ = std::make_unique<mediapipe::OutputStreamPoller>(std::move(sop.value()));
    
    	// 脸部特征
    	mediapipe::StatusOrPoller faceLandmark = graph_.AddOutputStreamPoller(kOutputLandmarks);
    	assert(faceLandmark.ok());
    	pPollerFaceLandmarks_ = std::make_unique<mediapipe::OutputStreamPoller>(std::move(faceLandmark.value()));
    
    	// 姿态特征
    	mediapipe::StatusOrPoller poseLandmark = graph_.AddOutputStreamPoller(kOutputPoseLandmarks);
    	assert(poseLandmark.ok());
    	pPollerPoseLandmarks_ = std::make_unique<mediapipe::OutputStreamPoller>(std::move(poseLandmark.value()));
    
    	// 左手特征点
    	mediapipe::StatusOrPoller leftHandLandmark = graph_.AddOutputStreamPoller(kOutputLHandLandmarks);
    	assert(leftHandLandmark.ok());
    	pPollerLHandLandmarks_ = std::make_unique<mediapipe::OutputStreamPoller>(std::move(leftHandLandmark.value()));
    
    	// 右手特征点
    	mediapipe::StatusOrPoller rightHandLandmark = graph_.AddOutputStreamPoller(kOutputRHandLandmarks);
    	assert(rightHandLandmark.ok());
    	pPollerRHandLandmarks_ = std::make_unique<mediapipe::OutputStreamPoller>(std::move(rightHandLandmark.value()));
    
    	MP_RETURN_IF_ERROR(graph_.StartRun({}));
    	std::cout << "======= graph_ StartRun success ============" << std::endl;
    	return absl::OkStatus();
    }
    
    void HolisticDetect::showFaceLandmarks(cv::Mat& image) {
    	mediapipe::Packet packet_landmarks;
    	if (pPollerFaceLandmarks_->QueueSize() == 0 || !pPollerFaceLandmarks_->Next(&packet_landmarks))
    		return;
    
    	//  468 face landmark
    	auto& output_landmarks = packet_landmarks.Get<mediapipe::NormalizedLandmarkList>();
    	for (int i = 0; i < output_landmarks.landmark_size(); ++i)
    	{
    		const mediapipe::NormalizedLandmark landmark = output_landmarks.landmark(i);
    		float x = landmark.x() * image.cols;
    		float y = landmark.y() * image.rows;
    		//cv::circle(image, cv::Point(x, y), 2, cv::Scalar(0, 255, 0));
    		// todo
    		// ...
    	}
    }
    
    void HolisticDetect::showPoseLandmarks(cv::Mat& image) {
    	mediapipe::Packet packet_landmarks;
    	if (pPollerPoseLandmarks_->QueueSize() == 0 || !pPollerPoseLandmarks_->Next(&packet_landmarks))
    		return;
    
    	//  33 pose landmark
    	auto& output_landmarks = packet_landmarks.Get<mediapipe::NormalizedLandmarkList>();
    	for (int i = 0; i < output_landmarks.landmark_size(); ++i)
    	{
    		const mediapipe::NormalizedLandmark landmark = output_landmarks.landmark(i);
    		float x = landmark.x() * image.cols;
    		float y = landmark.y() * image.rows;
    		//cv::circle(image, cv::Point(x, y), 2, cv::Scalar(0, 255, 0));
    		// todo
    		// ...
    	}
    }
    
    void HolisticDetect::showLHandLandmarks(cv::Mat& image) {
    	mediapipe::Packet packet_landmarks;
    	if (pPollerLHandLandmarks_->QueueSize() == 0 || !pPollerLHandLandmarks_->Next(&packet_landmarks))
    		return;
    	//  21 left hand landmark
    	auto& output_landmarks = packet_landmarks.Get<mediapipe::NormalizedLandmarkList>();
    	for (int i = 0; i < output_landmarks.landmark_size(); ++i)
    	{
    		const mediapipe::NormalizedLandmark landmark = output_landmarks.landmark(i);
    		float x = landmark.x() * image.cols;
    		float y = landmark.y() * image.rows;
    		//cv::circle(image, cv::Point(x, y), 2, cv::Scalar(0, 255, 0));
    		// todo
    		// ...
    	}
    }
    
    void HolisticDetect::showRHandLandmarks(cv::Mat& image) {
    	mediapipe::Packet packet_landmarks;
    	if (pPollerRHandLandmarks_->QueueSize() == 0 || !pPollerRHandLandmarks_->Next(&packet_landmarks))
    		return;
    	//  21 right hand landmark
    	auto& output_landmarks = packet_landmarks.Get<mediapipe::NormalizedLandmarkList>();
    	for (int i = 0; i < output_landmarks.landmark_size(); ++i)
    	{
    		const mediapipe::NormalizedLandmark landmark = output_landmarks.landmark(i);
    		float x = landmark.x() * image.cols;
    		float y = landmark.y() * image.rows;
    		//cv::circle(image, cv::Point(x, y), 2, cv::Scalar(0, 255, 0));
    		// todo
    		// ...
    	}
    }
    
    absl::Status HolisticDetect::runMPPGraphVideo(const char* video_path, int show_image, holistic_callback_t call) {
    	cv::VideoCapture capture(video_path);
    	RET_CHECK(capture.isOpened());
    #if (CV_MAJOR_VERSION >= 3) && (CV_MINOR_VERSION >= 2)
    	capture.set(cv::CAP_PROP_FRAME_WIDTH, 640);
    	capture.set(cv::CAP_PROP_FRAME_HEIGHT, 480);
    	capture.set(cv::CAP_PROP_FPS, 30);
    #endif
    	int tmp = 0;
    	bool grab_frames = true;
    	while (grab_frames) {
    		// Capture opencv camera or video frame.
    		cv::Mat camera_frame_raw;
    		capture >> camera_frame_raw;
    		if (camera_frame_raw.empty())
    			break;
    
    		cv::Mat camera_frame;
    		cv::cvtColor(camera_frame_raw, camera_frame, cv::COLOR_BGR2RGB);
    		cv::flip(camera_frame, camera_frame, /*flipcode=HORIZONTAL*/ 1);
    
    		// Wrap Mat into an ImageFrame.
    		auto input_frame = absl::make_unique<mediapipe::ImageFrame>(
    			mediapipe::ImageFormat::SRGB, camera_frame.cols, camera_frame.rows,
    			mediapipe::ImageFrame::kDefaultAlignmentBoundary);
    		cv::Mat input_frame_mat = mediapipe::formats::MatView(input_frame.get());
    		camera_frame.copyTo(input_frame_mat);
    
    		// Send image packet into the graph.
    		size_t frame_timestamp_us =
    			(double)cv::getTickCount() / (double)cv::getTickFrequency() * 1e6;
    
    		MP_RETURN_IF_ERROR(graph_.AddPacketToInputStream(
    			kInputStream, mediapipe::Adopt(input_frame.release())
    			.At(mediapipe::Timestamp(frame_timestamp_us))));
    
    		// Get the graph result packet, or stop if that fails.
    		mediapipe::Packet packet;
    		if (!pPoller_->Next(&packet)) break;
    
    		showFaceLandmarks(camera_frame);
    		showPoseLandmarks(camera_frame);
    		showLHandLandmarks(camera_frame);
    		showRHandLandmarks(camera_frame);
    
    		if (show_image) {
    			auto& output_frame = packet.Get<mediapipe::ImageFrame>();
    			// Convert back to opencv for display or saving.
    			cv::Mat output_frame_mat = mediapipe::formats::MatView(&output_frame);
    			cv::cvtColor(output_frame_mat, output_frame_mat, cv::COLOR_RGB2BGR);
    
    			cv::imshow(kWindowName, output_frame_mat);
    			cv::waitKey(1);
    			/*	cv::Mat output_frame_mat;
    				cv::cvtColor(camera_frame, output_frame_mat, cv::COLOR_RGB2BGR);
    				cv::imwrite(cv::format("out_image/%d.jpg", tmp++), output_frame_mat);*/
    		}
    	}
    	if (show_image)
    		cv::destroyWindow(kWindowName);
    	return absl::OkStatus();
    }
    
    absl::Status HolisticDetect::releaseGraph() {
    	MP_RETURN_IF_ERROR(graph_.CloseInputStream(kInputStream));
    	MP_RETURN_IF_ERROR(graph_.CloseInputStream(kOutputLandmarks));
    	return graph_.WaitUntilDone();
    }
    
  6. main.cpp

    #include "holistic_detect.h"
    using namespace mp;
    HolisticDetect holisticDetect_;
    void call() {
    }
    int main(int argc, char* argv[]) {
    	std::cout << "======= holistic ============" << std::endl;
    	const char* model = argv[1];
    	const char* video_path = argv[2];
    	int isShow = 1;
    	int res = holisticDetect_.initModel(model);
    	if (res < 0) {
    		std::cout << "======= initModel error ============" << std::endl;
    		return -1;
    	}
    	res = holisticDetect_.detectVideo(video_path, isShow , call);
    	if (res < 0)
    		return -1;
    	holisticDetect_.release();
    	return 0;
    }
    
  7. 编译

    bazel build -c opt --define MEDIAPIPE_DISABLE_GPU=1 --action_env PYTHON_BIN_PATH="C://xx//python.exe" mediapipe/examples/desktop/holistic_tracking:holistic_tracking_sample --verbose_failures
    
  8. 运行

    bazel-bin\mediapipe\examples\desktop\holistic_tracking\holistic_tracking_sample "mediapipe/graphs/holistic_tracking/holistic_tracking_cpu.pbtxt" "video/test.mp4"
    
  9. 运行成功, 则可获取人脸面部 468 个特征点 ,左手 21 个特征点 , 右手 21 个特征点 ,姿态 33 个特征点。

在这里插入图片描述 在这里插入图片描述 在这里插入图片描述 在这里插入图片描述


其他模块 ☀️

  1. IRIS: https://blog.csdn.net/haiyangyunbao813/article/details/122225445?spm=1001.2014.3001.5502
  2. Pose: https://blog.csdn.net/haiyangyunbao813/article/details/119192951?spm=1001.2014.3001.5502
  3. Hand: https://blog.csdn.net/haiyangyunbao813/article/details/122464972?spm=1001.2014.3001.5502
  4. 其他待续…

青春不过几届世界杯

输赢并不是足球的全部 , 真正的热爱或许是笑着庆祝, 哭着鼓掌 。加油马儿,愿 2026 再战一届。 🌟​🌟​🌟​🌟​🌟​

在这里插入图片描述

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值