mediapipe的安装使用需要python >= 3.7的版本
# MediaPipe是一个用于构建机器学习管道的框架,用于处理视频、音频等时间序列数据。这个跨平台框架适用于桌面/服务器、Android、iOS和嵌入式设备,如Raspberry Pi和Jetson Nano。
# mediapipe.solutions.hands # 手部关键点检测
# mediapipe.solutions.pose # 人体姿态检测
# mediapipe.solutions.face_mesh# 人脸网状检测
# mediapipe.solutions.face_detection # 人脸识别
。。。。。。
人体姿态检测关键点绘制基础代码
import cv2 as cv import mediapipe as mp import time # 2 #mediapipe.solutions.drawing_utils.draw_landmarks()绘制关键点的连线 mpDraw = mp.solutions.drawing_utils #mediapipe.solutions.pose.Pose()姿态关键点检测函数 mpPose = mp.solutions.pose pose = mpPose.Pose() # pose = mpPose.Pose(static_image_mode=False, # 静态图模式,False代表置信度高时继续跟踪,True代表实时跟踪检测新的结果 # #upper_body_only=False, # 是否只检测上半身 # smooth_landmarks=True, # 平滑,一般为True # min_detection_confidence=0.5, # 检测置信度 # min_tracking_confidence=0.5) # 跟踪置信度 # # 检测置信度大于0.5代表检测到了,若此时跟踪置信度大于0.5就继续跟踪,小于就沿用上一次,避免一次又一次重复使用模型 pTime = 0 # 设置第一帧开始处理的起始时间 cap = cv.VideoCapture('video/JUMP.mp4') cv.namedWindow("frame", cv.WINDOW_NORMAL) # 创建一个可调整大小的窗口 # 或者设置窗口大小 #w,h = 640,480 #cap.set(3, w) #cap.set(4, h) while True: ret,frame = cap.read() frame_RGB = cv.cvtColor(frame,cv.COLOR_BGR2RGB) # 3.将图像传给姿态识别模型,返回关键点信息 res = pose.process(frame_RGB) # 查看体态关键点坐标: # landmark # { # x: 0.33543533086776733 # y: 0.7013387680053711 # z: 0.45365121960639954 # visibility: 0.9783190488815308 # } #print(res.pose_landmarks) if res.pose_landmarks: # 绘制姿态坐标点,frame为画板,传入姿态点坐标,坐标连线 #mediapipe.solutions.drawing_utils.draw_landmarks()绘制关键点的连线 # mpDraw.draw_landmarks(frame,res.pose_landmarks) mpDraw.draw_landmarks(frame,res.pose_landmarks,mpPose.POSE_CONNECTIONS) for id,lm in enumerate(res.pose_landmarks.landmark): h,w,c = frame.shape # print('id:',id) # print('lm:\n',lm) cx,cy = int(lm.x*w),int(lm.y*h) cv.circle(frame,(cx,cy),10,(255,0,0),cv.FILLED) #cv.LINE_AA:给出了抗锯齿的线条,非常适合曲线。(cv.LINE_AA=16) #cv.LINE_4: 4 连通线 #cv.LINE_8: 8 连通线,默认。 #cv.FILLED: cv.FILLED=-1 if ret== True: #变量“ cTime”,“ pTime”和“ fps”用于计算每秒的读取帧 cTime = time.time()#用于获取当前时间的时间戳(从 1970 年 1 月 1 日 00:00:00 到当前时间的秒数) fps = 1/(cTime-pTime) pTime = cTime cv.putText(frame,str(int(fps)),(70,50),cv.FONT_HERSHEY_PLAIN,3,(0,0,255),3) #1、cv.FONT_HERSHEY_SIMPLEX 正常尺寸无衬线 #2、cv.FONT_HERSHEY_PLAIN 小尺寸无衬线 #3、cv.FONT_HERSHEY_DUPLEX 正常尺寸(类型 1 的复制版) #4、cv.FONT_HERSHEY_COMPLEX 正常尺寸有衬线 #5、cv.FONT_HERSHEY_TRIPLEX 正常尺寸有衬线(类型 4 的复杂版) #6、cv.FONT_HERSHEY_COMPLEX_SMALL (类型 4 的小尺寸) #7、cv.FONT_HERSHEY_SCRIPT_SIMPLEX 手写风格 #8、cv.FONT_HERSHEY_SCRIPT_COMPLEX (类型 7 的复杂版) cv.imshow('frame',frame) if cv.waitKey(50) & 0xFF == ord('q'): break else: break cap.release() cv.destroyAllWindows()
人体姿态关键点识别
模块化开发
import cv2 as cv # MediaPipe是一个用于构建机器学习管道的框架,用于处理视频、音频等时间序列数据。这个跨平台框架适用于桌面/服务器、Android、iOS和嵌入式设备,如Raspberry Pi和Jetson Nano。 import mediapipe as mp import time import math class poseDetector(): def __init__(self,static_image_mode=False, # 静态图模式,False代表置信度高时继续跟踪,True代表实时跟踪检测新的结果 upper_body_only=False, # 是否只检测上半身 model_complexity=1, smooth_landmarks=True, # 平滑,一般为True min_detection_confidence=0.5, # 检测置信度# 检测置信度大于0.5代表检测到了,若此时跟踪置信度大于0.5就继续跟踪,小于就沿用上一次,避免一次又一次重复使用模型 min_tracking_confidence=0.5): # 跟踪置信度 self.static_image_mode = static_image_mode self.upper_body_only = upper_body_only self.model_complexity=model_complexity self.smooth_landmarks = smooth_landmarks self.min_detection_confidence = min_detection_confidence self.min_tracking_confidence = min_tracking_confidence # mediapipe.solutions.drawing_utils.draw_landmarks()绘制关键点的连线 self.mpDraw = mp.solutions.drawing_utils # mediapipe.solutions.pose.Pose()姿态关键点检测函数 self.mpPose = mp.solutions.pose self.pose = self.mpPose.Pose(self.static_image_mode,self.upper_body_only,self.model_complexity,self.smooth_landmarks,self.min_detection_confidence,self.min_tracking_confidence) # 检测关键点方法 def findPose(self, frame, draw=True): self.frame_RGB = cv.cvtColor(frame, cv.COLOR_BGR2RGB) # 3.将图像传给姿态识别模型 self.res = self.pose.process(self.frame_RGB) if self.res.pose_landmarks: if draw: # 绘制姿态坐标点,img为画板,传入姿态点坐标,坐标连线 # mediapipe.solutions.drawing_utils.draw_landmarks()绘制手部关键点的连线 # mpDraw.draw_landmarks(frame,res.pose_landmarks) self.mpDraw.draw_landmarks(frame, self.res.pose_landmarks, self.mpPose.POSE_CONNECTIONS) return frame #关键点信息,是否绘制关键点连线 def findPosition(self, frame, draw=True): self.lmList = [] if self.res.pose_landmarks: for id,lm in enumerate(self.res.pose_landmarks.landmark): h, w, c = frame.shape cx, cy = int(lm.x * w), int(lm.y * h) self.lmList.append([id,cx,cy]) if draw: cv.circle(frame, (cx, cy), 10, (255, 0, 0), cv.FILLED) return self.lmList #计算运动时关键点角度 def findAngle(self,frame,p1,p2,p3,draw=True): # 关键点坐标 x1,y1 = self.lmList[p1][1:] x2,y2 = self.lmList[p2][1:] x3,y3 = self.lmList[p3][1:] # 关键点角度 atan2 方法返回一个 -pi 到 pi 之间的数值,表示点 (x, y) 对应的偏移角度。这是一个逆时针角度,以弧度为单位,正X轴和点 (x, y) 与原点连线 之间 angle = math.degrees(math.atan2(y3-y2,x3-x2)-math.atan2(y1-y2,x1-x2)) # 绘制关键点 if draw: cv.line(frame,(x1,y1),(x2,y2),(255,0,255),3) cv.line(frame,(x3,y3),(x2,y2),(255,0,255),3) cv.circle(frame,(x1,y1),10,(0,0,255),cv.FILLED) cv.circle(frame,(x1,y1),15,(0,0,255),2) cv.circle(frame,(x2,y2),10,(0,0,255),cv.FILLED) cv.circle(frame,(x2,y2),15,(0,0,255),2) cv.circle(frame,(x3,y3),10,(0,0,255),cv.FILLED) cv.circle(frame,(x3,y3),15,(0,0,255),2) #主函数 def main(): pTime = 0 # 设置第一帧开始处理的起始时间 cap = cv.VideoCapture('../video/JUMP.mp4') cv.namedWindow("frame", cv.WINDOW_NORMAL) # 创建一个可调整大小的窗口 # 创建poseDetector对象 detector = poseDetector() while True: success, frame = cap.read() frame = detector.findPose(frame) lmList = detector.findPosition(frame) print(lmList) if success == True: # 变量“ cTime”,“ pTime”和“ fps”用于计算每秒的读取帧 cTime = time.time() # 用于获取当前时间的时间戳(从 1970 年 1 月 1 日 00:00:00 到当前时间的秒数) fps = 1 / (cTime - pTime) pTime = cTime cv.putText(frame, str(int(fps)), (70, 50), cv.FONT_HERSHEY_PLAIN, 3, (0, 0, 255), 3) cv.imshow('frame', frame) if cv.waitKey(50) & 0xFF == ord('q'): break else: break cap.release() cv.destroyAllWindows() if __name__ == '__main__': main()
手部关键点检测
import math import cv2 as cv import mediapipe as mp import time class handDetector(): def __init__(self, static_image_mode=False, max_num_hands=2, model_complexity=1, min_detection_confidence=0.5, min_tracking_confidence=0.5): self.static_image_mode=static_image_mode self.max_num_hands=max_num_hands self.model_complexity=model_complexity self.min_detection_confidence=min_detection_confidence self.min_tracking_confidence=min_tracking_confidence self.mpHands = mp.solutions.hands self.hands = self.mpHands.Hands(self.static_image_mode,self.max_num_hands,self.model_complexity, self.min_detection_confidence,self.min_tracking_confidence) self.mpDraw = mp.solutions.drawing_utils # 手指尖特征点列表 self.fgList = [4, 8, 12, 16, 20] def findHands(self,frame,draw=True): frameRGB = cv.cvtColor(frame,cv.COLOR_BGR2RGB) self.res = self.hands.process(frameRGB) if self.res.multi_hand_landmarks: for handLm in self.res.multi_hand_landmarks: # print(self.res.multi_hand_landmarks) if draw: self.mpDraw.draw_landmarks(frame,handLm,self.mpHands.HAND_CONNECTIONS) return frame #手部关键点检测 def findPosition(self,frame,handNo=0,draw=True): self.handList = [] if self.res.multi_hand_landmarks: myHand = self.res.multi_hand_landmarks[handNo] for id,lm in enumerate(myHand.landmark): h,w,c = frame.shape x,y = int(lm.x*w),int(lm.y*h) self.handList.append([id,x,y]) if draw: cv.circle(frame,(x,y),3,(255,0,255),cv.FILLED) return self.handList # 手指伸直用1表示,弯曲用0表示 def fingersUp(self): fingers = [] # print(handList) if len(self.handList) != 0: # 拇指 if self.handList[self.fgList[0]][1] > self.handList[self.fgList[0] - 1][1]: fingers.append(1) else: fingers.append(0) # 拇指外的手指 for i in range(1, 5): if self.handList[self.fgList[i]][2] < self.handList[self.fgList[i] - 2][2]: fingers.append(1) else: fingers.append(0) # print(fingers) return fingers def findDistance(self,p1,p2,frame,draw=True,r=15,t=3): x1,y1 = self.handList[p1][1:] x2,y2 = self.handList[p2][1:] cx,cy = (x1+x2)/2,(y1+y2)/2 if draw: cv.line(frame,(x1,y1),(x2,y2),(255,0,255),t) cv.circle(frame,(x1,y1),t,(255,0,255),cv.FILLED) cv.circle(frame,(x2,y2),t,(255,0,255),cv.FILLED) cv.circle(frame,(cx,cy),t,(0,0,255),cv.FILLED) length = math.hypot(x2-x1,y2-y1)#Math.hypot() 用于返回的指定的参数的平方和的平方根,没有中间溢出或下溢 return length,frame,[x1,y1,x2,y2,cx,cy] def main(): cap = cv.VideoCapture(0) cv.namedWindow('frame',cv.WINDOW_NORMAL) pTime = 0 detector = handDetector() while cap.isOpened(): ret,frame = cap.read() frame = detector.findHands(frame) handList = detector.findPosition(frame,False) if handList: print(handList) if ret == True: cTime = time.time() fps = 1/(cTime-pTime) pTime = cTime cv.putText(frame,f'fps:{int(fps)}',(30,70),cv.FONT_HERSHEY_PLAIN,2,(0,255,0),2) cv.imshow('frame',frame) if cv.waitKey(25) & 0xFF == ord('q'): break else: break cap.release() cv.destroyAllWindows() if __name__ == '__main__': main()
手指数字检测 调用handDetector类
import cv2 as cv import time import os import HandTrackingModule as htm ww,wh = 640,400 cap = cv.VideoCapture(0) cap.set(3,ww) cap.set(4,wh) pTime = 0 #照片 folderPath = 'fingerData' dataList = os.listdir(folderPath) # print(dataList)# ['01.jpg', '02.jpg', '03.jpg'] imgPathList = [] for data in dataList: imgPath = cv.imread(f'{folderPath}/{data}') # print(imgPath) imgPathList.append(imgPath) #手指尖特征点列表 fgList = [4,8,12,16,20] detector = htm.handDetector(min_detection_confidence=0.75) while cap.isOpened(): ret,frame = cap.read() frame = detector.findHands(frame) handList = detector.findPosition(frame,draw=False) fingers = [] # print(handList) if len(handList) !=0: #拇指 if handList[fgList[0]][1] > handList[fgList[0] - 1][1]: fingers.append(1) else: fingers.append(0) #拇指外的手指 for i in range(1,5): if handList[fgList[i]][2] < handList[fgList[i]-2][2]: fingers.append(1) else: fingers.append(0) # print(fingers) #手指数字:fingers中1的个数 fingerNO = fingers.count(1) cv.putText(frame,f'{fingerNO}',(40,250),cv.FONT_HERSHEY_PLAIN,3,(255,0,0),2) # 插入图片位置 h,w,c = imgPathList[fingerNO].shape frame[0:h,0:w] = imgPathList[fingerNO] if ret == True: cTime = time.time() fps = 1/(cTime-pTime) pTime = cTime cv.putText(frame,f'fps:{int(fps)}',(500,70),cv.FONT_HERSHEY_PLAIN,2,(255,0,0),3) cv.imshow('frame',frame) if cv.waitKey(25) & 0xFF == ord('q'): break else: break cap.release() cv.destroyAllWindows()
Video_20240121171416
f49ae23e