计算机视觉入门(基础篇:利用mediapipe进行人体姿态识别)

本代码基于 Advance Computer Vision with Python 进行修改,更加适合中国宝宝体质

我的相关代码及数据集已经上传GitHub仓库,欢迎使用 Advance-Computer-Vision-with-Python

代码1:Basics.py

import cv2
import mediapipe as mp
import time

# 初始化 Mediapipe 的绘图工具和姿势检测模块
mpDraw = mp.solutions.drawing_utils # 导入 MediaPipe 的绘图工具,用于在图像上绘制检测到的姿势连接和关键点
mpPose = mp.solutions.pose  # 导入 MediaPipe 的姿势估计模块
pose = mpPose.Pose()  # 创建一个姿势检测对象,用于处理图像并检测人体姿势

# 打开视频文件
cap = cv2.VideoCapture("E:\\Advance Computer Vision with Python\\Chapter 2 Pose Estimation\\PoseVideos\\3.mp4")

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

pTime = 0  # 前一帧的时间

# 创建可调整大小的窗口
cv2.namedWindow("Image", cv2.WINDOW_NORMAL)

while True:
    success, img = cap.read()  # 读取视频帧

    if not success:
        print("Failed to read frame")
        break

    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # 将图像从 BGR 转换为 RGB
    results = pose.process(imgRGB)  # 处理图像,检测姿势

    if results.pose_landmarks:
        # 绘制姿势连接
        mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)

        # 遍历每个关键点
        for id, lm in enumerate(results.pose_landmarks.landmark):
            h, w, c = img.shape  # 获取图像尺寸
            cx, cy = int(lm.x * w), int(lm.y * h)  # 计算关键点在图像中的位置
            cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)# 在关键点位置绘制圆圈

    cTime = time.time()
    fps = 1 / (cTime - pTime)  # 计算帧率
    pTime = cTime

    # 在图像上显示帧率
    cv2.putText(img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 3)
    
    cv2.imshow("Image", img)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()

代码2:PoseModule.py

import cv2
import mediapipe as mp
import time
import math


# 姿势检测类
class poseDetector:
    # 以下为老版本代码,部分参数已经弃用
    # def __init__(
    #     self, mode=False, upBody=False, smooth=True, detectionCon=0.5, trackCon=0.5
    # ):
    #     # 初始化参数
    #     self.mode = mode  # 静态图像模式
    #     self.upBody = upBody  # 是否只检测上半身
    #     self.smooth = smooth  # 平滑处理
    #     self.detectionCon = detectionCon  # 检测置信度
    #     self.trackCon = trackCon  # 跟踪置信度
    #     self.mpDraw = mp.solutions.drawing_utils  # Mediapipe 绘图工具
    #     self.mpPose = mp.solutions.pose  # Mediapipe 姿势检测模块
    #     self.pose = self.mpPose.Pose(
    #         self.mode, self.upBody, self.smooth, self.detectionCon, self.trackCon
    #     )

    def __init__(self, static_image_mode=False, model_complexity=1, enable_segmentation=False, min_detection_confidence=0.5, min_tracking_confidence=0.5):
        # 初始化姿势检测器的参数
        self.static_image_mode = static_image_mode  # 是否处理静态图像,False 表示处理视频流
        self.model_complexity = model_complexity  # 模型复杂度,0、1 或 2,越高越准确但更慢
        self.enable_segmentation = enable_segmentation  # 是否启用分割功能
        self.min_detection_confidence = min_detection_confidence  # 姿势检测的最小置信度
        self.min_tracking_confidence = min_tracking_confidence  # 姿势跟踪的最小置信度

        # 设置 MediaPipe 工具
        self.mpDraw = mp.solutions.drawing_utils  # MediaPipe 绘图工具
        self.mpPose = mp.solutions.pose  # MediaPipe 姿势估计模块

        # 创建姿势估计对象
        self.pose = self.mpPose.Pose(
            static_image_mode=self.static_image_mode,  # 静态图像模式设置
            model_complexity=self.model_complexity,  # 模型复杂度设置
            enable_segmentation=self.enable_segmentation,  # 分割功能设置
            min_detection_confidence=self.min_detection_confidence,  # 检测置信度阈值
            min_tracking_confidence=self.min_tracking_confidence  # 跟踪置信度阈值
        )


    def findPose(self, img, draw=True):
        # 将图像从 BGR 转换为 RGB
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # 处理图像,检测姿势
        self.results = self.pose.process(imgRGB)
        if self.results.pose_landmarks:
            if draw:
                # 绘制姿势连接
                self.mpDraw.draw_landmarks(
                    img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS
                )
        return img

    def findPosition(self, img, draw=True):
        # 初始化列表存储关键点位置
        self.lmList = []
        if self.results.pose_landmarks:
            for id, lm in enumerate(self.results.pose_landmarks.landmark):
                h, w, c = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)  # 计算关键点在图像中的位置
                self.lmList.append([id, cx, cy])
                if draw:
                    # 在图像上绘制关键点
                    cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
        return self.lmList

    def findAngle(self, img, p1, p2, p3, draw=True):
        # 获取关键点坐标
        x1, y1 = self.lmList[p1][1:]
        x2, y2 = self.lmList[p2][1:]
        x3, y3 = self.lmList[p3][1:]

        # x1, y1 = self.lmList[p1][1], self.lmList[p1][2]
        # x2, y2 = self.lmList[p2][1], self.lmList[p2][2]
        # x3, y3 = self.lmList[p3][1], self.lmList[p3][2]

        # 计算角度
        angle = math.degrees(
            math.atan2(y3 - y2, x3 - x2) - math.atan2(y1 - y2, x1 - x2)
        )
        # 两个向量 (x1-x2, y1-y2), (x3-x2, y3-y2)
        # 使用 atan2 计算每个向量与 x 轴的角度
        if angle < 0:
            angle += 360

        # 绘制
        if draw:
            cv2.line(img, (x1, y1), (x2, y2), (255, 255, 255), 3)
            cv2.line(img, (x3, y3), (x2, y2), (255, 255, 255), 3)
            cv2.circle(img, (x1, y1), 10, (0, 0, 255), cv2.FILLED)
            cv2.circle(img, (x1, y1), 15, (0, 0, 255), 2)
            cv2.circle(img, (x2, y2), 10, (0, 0, 255), cv2.FILLED)
            cv2.circle(img, (x2, y2), 15, (0, 0, 255), 2)
            cv2.circle(img, (x3, y3), 10, (0, 0, 255), cv2.FILLED)
            cv2.circle(img, (x3, y3), 15, (0, 0, 255), 2)
            cv2.putText(
                img,
                str(int(angle)),
                (x2 - 50, y2 + 50),
                cv2.FONT_HERSHEY_PLAIN,
                2,
                (0, 0, 255),
                2,
            )
        return angle


# 主函数
def main():
    cap = cv2.VideoCapture(
        "E:\\Advance Computer Vision with Python\\Chapter 2 Pose Estimation\\PoseVideos\\5.mp4"
    )  # 打开视频文件
    pTime = 0  # 前一帧时间
    detector = poseDetector()  # 创建姿势检测器
    while True:
        success, img = cap.read()  # 读取视频帧
        if not cap.isOpened():
            print("Error: Could not open video.")
            return
        img = detector.findPose(img)  # 检测姿势
        if not success:
            print("Failed to read frame")
            break
        lmList = detector.findPosition(img, draw=False)  # 获取关键点位置
        if len(lmList) != 0:
            print(lmList[14])  # 打印关键点信息
            cv2.circle(img, (lmList[14][1], lmList[14][2]), 15, (0, 0, 255), cv2.FILLED)
        cTime = time.time()
        fps = 1 / (cTime - pTime)  # 计算帧率
        pTime = cTime
        cv2.putText(
            img, str(int(fps)), (70, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 3
        )
        cv2.namedWindow("Image", cv2.WINDOW_NORMAL)  # 创建可调整大小的窗口
        cv2.imshow("Image", img)  # 显示图像
        cv2.waitKey(1)  # 等待按键以显示下一帧


if __name__ == "__main__":
    main()

mp.solutions.pose

mpPose = mp.solutions.pose 导入 MediaPipe 的姿势估计模块

pose = mpPose.Pose() 创建一个姿势检测对象,用于处理图像并检测人体姿势

results = pose.process(imgRGB) 处理图像,检测姿势

mpPose.Pose() 对象的参数

static_image_mode=self.static_image_mode,  # 静态图像模式设置
model_complexity=self.model_complexity,  # 模型复杂度设置
enable_segmentation=self.enable_segmentation,  # 分割功能设置
min_detection_confidence=self.min_detection_confidence,  # 检测置信度阈值
min_tracking_confidence=self.min_tracking_confidence  # 跟踪置信度阈值

注意事项

用vscode执行代码时,请让终端在Advance Computer Vision with Python文件夹下(即vs打开这个整体的文件夹),以及视频路径请使用绝对路径,否则,将可能出现一些莫名其妙的报错

比如你让终端在Chapter 2 Pose Estimation文件夹下运行,opencv的GUI就要报错,日怪得很

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值