提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
前言
记录一下pyqt5界面显示人体姿态识别,环境配置win11+pycharm2023+torch2.2.1+cuda11.8 PyQt version: 5.15.10
需要配置openpose 参考[https://blog.csdn.net/weixin_44003104/article/details/132685437?spm=1001.2014.3001.5506]
一、主要步骤
1、qt designer画出ui图
2、ui文件导出为py文件:命令行里输入pyuic5 -o firstMainWin.py firstMainwin. ui
3、对播放按钮 识别按钮建立相应的槽函数
4、将st-gcn的demooffline部分结合起来用作识别的算法
二、代码
1.引入库
代码如下(示例):
`from PyQt5.QtWidgets import *
from PyQt5.QtMultimedia import *
from PyQt5.QtGui import *
from PyQt5.QtCore import *
from PyQt5.QtMultimediaWidgets import QVideoWidget
from test01 import Ui_MainWindow
import sys
import time
import cv2
import numpy as np
import torch
from net.st_gcn import Model
class myMainWindow(Ui_MainWindow, QMainWindow):
def __init__(self):
super(Ui_MainWindow, self).__init__()
self.setupUi(self)
# self.sld_video_pressed=False #判断当前进度条识别否被鼠标点击
# self.videoFullScreen = False # 判断当前widget是否全屏
# self.videoFullScreenWidget = myVideoWidget() # 创建一个全屏的widget
self.player = QMediaPlayer()
self.player.setVideoOutput(self.widget) # 视频播放输出的widget,就是上面定义的
self.pushButton_2.clicked.connect(self.openVideoFile) # 打开视频文件按钮
self.pushButton_4.clicked.connect(self.playVideo) # play
self.pushButton.clicked.connect(self.pauseVideo) # pause
self.player2 = QMediaPlayer()
self.player2.setVideoOutput(self.widget_2) # 视频播放输出的widget,就是上面定义的
self.player3 = QMediaPlayer()
self.player3.setVideoOutput(self.widget_3) # 视频播放输出的widget,就是上面定义的
self.pushButton_3.clicked.connect(self.Bone_recognition)
self.pushButton_5.clicked.connect(self.rgb_result_recognition)
graph_args = {'layout': 'openpose', 'strategy': 'spatial'}
self.model = Model(3,400,{'layout': 'openpose', 'strategy': 'spatial'},True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model.to(device)
self.dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
label_name_path = './resource/kinetics_skeleton/label_name.txt'
with open(label_name_path) as f:
label_name = f.readlines()
label_name = [line.rstrip() for line in label_name]
self.label_name = label_name
# print(self.model)
# self.btn_cast.clicked.connect(self.castVideo) # 视频截图
# self.player.positionChanged.connect(self.changeSlide) # change Slide
# self.videoFullScreenWidget.doubleClickedItem.connect(self.videoDoubleClicked) #双击响应
# self.wgt_video.doubleClickedItem.connect(self.videoDoubleClicked) #双击响应
# self.sld_video.setTracking(False)
# self.sld_video.sliderReleased.connect(self.releaseSlider)
# self.sld_video.sliderPressed.connect(self.pressSlider)
# self.sld_video.sliderMoved.connect(self.moveSlider) # 进度条拖拽跳转
# self.sld_video.ClickedValue.connect(self.clickedSlider) # 进度条点击跳转
# self.sld_audio.valueChanged.connect(self.volumeChange) # 控制声音播放
# self.btn_cast.hide()
def Bone_recognition(self):
video,data_numpy = self.pose_estimation()
edge=[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12), (13, 13), (14, 14), (15, 15), (16, 16), (17, 17), (4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 11), (10, 9), (9, 8), (11, 5), (8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0), (17, 15), (16, 14)]
# images =self.stgcn_visualize(data_numpy,edge,video)
images = list(self.stgcn_visualize(data_numpy, edge, video))
height, width, _ = images[0].shape
output_file_path = 'test.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 视频编码方式,可以根据需要更改
out = cv2.VideoWriter(output_file_path, fourcc, 5.0, (width, height)) # 最后一个参数是帧速率,可以根据需要更改
for image in images:
out.write(image)
out.release()
# 创建媒体内容对象
media_content = QMediaContent(QUrl.fromLocalFile(output_file_path))
# 使用QMediaPlayer播放视频
self.player2.setMedia(media_content)
self.player2.play()
# file_path = 'E:/work/qt/rgb/ta_chi.mp4' # 替换为你的视频文件路径
# media_content = QMediaContent(QUrl.fromLocalFile(file_path))
# self.player3.setMedia(media_content)
# self.player3.play()
def rgb_result_recognition(self):
video,data_numpy = self.pose_estimation()
edge=[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12), (13, 13), (14, 14), (15, 15), (16, 16), (17, 17), (4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 11), (10, 9), (9, 8), (11, 5), (8, 2), (5, 1), (2, 1), (0, 1), (15, 0), (14, 0), (17, 15), (16, 14)]
#images =self.stgcn_visualize(data_numpy,edge,video)
print(self.model.graph.edge)
data = torch.from_numpy(data_numpy)#通过torch.from_numpy()将NumPy数组data_numpy转换为PyTorch张量。
data = data.unsqueeze(0)
data = data.float().to(self.dev).detach() # (1, channel, frame, joint, person)
# model predict
voting_label_name, video_label_name, output, intensity = self.predict(data)
# images = self.stgcn_rgb_visualize(data_numpy, edge, intensity, video)
images = list(self.stgcn_rgb_visualize(data_numpy, edge,intensity, video,voting_label_name))
# for image in images:
# image = image.astype(np.uint8)
# cv2.imshow("ST-GCN", image)
# if cv2.waitKey(1) & 0xFF == ord('q'):
# break
height, width, _ = images[0].shape
output_file_path = 'test1.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 视频编码方式,可以根据需要更改
out = cv2.VideoWriter(output_file_path, fourcc, 5.0, (width, height)) # 最后一个参数是帧速率,可以根据需要更改
for image in images:
image = image.astype(np.uint8)
out.write(image)
out.release()
# 创建媒体内容对象
media_content = QMediaContent(QUrl.fromLocalFile(output_file_path))
# 使用QMediaPlayer播放视频
self.player3.setMedia(media_content)
self.player3.play()
self.player3.setPlaybackRate(1.0) # 设置为1.0表示正常速度
# file_path = 'E:/work/qt/rgb/ta_chi.mp4' # 替换为你的视频文件路径
# media_content = QMediaContent(QUrl.fromLocalFile(file_path))
# self.player3.setMedia(media_content)
# self.player3.play()
def predict(self, data):
# forward
output, feature = self.model.extract_feature(data)
output = output[0]
feature = feature[0]
intensity = (feature*feature).sum(dim=0)**0.5
intensity = intensity.cpu().detach().numpy()
# get result
# classification result of the full sequence
voting_label = output.sum(dim=3).sum(
dim=2).sum(dim=1).argmax(dim=0)
voting_label_name = self.label_name[voting_label]
# classification result for each person of the latest frame
num_person = data.size(4)
latest_frame_label = [output[:, :, :, m].sum(
dim=2)[:, -1].argmax(dim=0) for m in range(num_person)]
latest_frame_label_name = [self.label_name[l]
for l in latest_frame_label]
num_person = output.size(3)
num_frame = output.size(1)
video_label_name = list()
for t in range(num_frame):
frame_label_name = list()
for m in range(num_person):
person_label = output[:, t, :, m].sum(dim=1).argmax(dim=0)
person_label_name = self.label_name[person_label]
frame_label_name.append(person_label_name)
video_label_name.append(frame_label_name)
return voting_label_name, video_label_name, output, intensity
def put_text(self,img, text, position, scale_factor=1):
t_w, t_h = cv2.getTextSize(
text, cv2.FONT_HERSHEY_TRIPLEX, scale_factor, thickness=1)[0]
H, W, _ = img.shape
position = (int(W * position[1] - t_w * 0.5),
int(H * position[0] - t_h * 0.5))
params = (position, cv2.FONT_HERSHEY_TRIPLEX, scale_factor,
(255, 255, 255))
cv2.putText(img, text, *params)
def castVideo(self):
screen = QGuiApplication.primaryScreen()
cast_jpg = './'+QDateTime.currentDateTime().toString("yyyy-MM-dd hh-mm-ss-zzz")+'.jpg'
screen.grabWindow(self.wgt_video.winId()).save(cast_jpg)
def volumeChange(self, position):
volume= round(position/self.sld_audio.maximum()*100)
print("vlume %f" %volume)
self.player.setVolume(volume)
self.lab_audio.setText("volume:"+str(volume)+"%")
def clickedSlider(self, position):
if self.player.duration() > 0: # 开始播放后才允许进行跳转
video_position = int((position / 100) * self.player.duration())
self.player.setPosition(video_position)
self.lab_video.setText("%.2f%%" % position)
else:
self.sld_video.setValue(0)
def moveSlider(self, position):
self.sld_video_pressed = True
if self.player.duration() > 0: # 开始播放后才允许进行跳转
video_position = int((position / 100) * self.player.duration())
self.player.setPosition(video_position)
self.lab_video.setText("%.2f%%" % position)
def pressSlider(self):
self.sld_video_pressed = True
print("pressed")
def releaseSlider(self):
self.sld_video_pressed = False
def changeSlide(self, position):
if not self.sld_video_pressed: # 进度条被鼠标点击时不更新
self.vidoeLength = self.player.duration()+0.1
self.sld_video.setValue(round((position/self.vidoeLength)*100))
self.lab_video.setText("%.2f%%" % ((position/self.vidoeLength)*100))
def openVideoFile(self):
self.player.setMedia(QMediaContent(QFileDialog.getOpenFileUrl()[0])) # 选取视频文件
self.player.play() # 播放视频
print(self.player.availableMetaData())
def playVideo(self):
self.player.play()
def pauseVideo(self):
self.player.pause()
def videoDoubleClicked(self, text):
if self.player.duration() > 0: # 开始播放后才允许进行全屏操作
if self.videoFullScreen:
self.player.setVideoOutput(self.wgt_video)
self.videoFullScreenWidget.hide()
self.videoFullScreen = False
else:
self.videoFullScreenWidget.show()
self.player.setVideoOutput(self.videoFullScreenWidget)
self.videoFullScreenWidget.setFullScreen(1)
self.videoFullScreen = True
def pose_estimation(self):
# load openpose python api
# if self.arg.openpose is not None:
# # sys.path.append('{}/python'.format(self.arg.openpose))
# #sys.path.append('{}/build/python'.format(self.arg.openpose))
# sys.path.append('{}/build/python/openpose/Release'.format(self.arg.openpose))
# print("1")
try:
sys.path.append('E:/work/openpose-master/build/python/openpose/Release')
# from openpose import pyopenpose as op
import pyopenpose as op
except:
print('Can not find Openpose Python API.')
return
# video_name = self.arg.video.split('/')[-1].split('.')[0]
# initiate # 初始化OpenPose包装器
opWrapper = op.WrapperPython()
params = dict(model_folder='E:/work/qt/PyQt_Video_Demo-master/PyQt_Video_Demo-master/models', model_pose='COCO')
opWrapper.configure(params)
opWrapper.start()
# 设置模型为评估模式
self.model.eval()
# 打开视频文件并获取视频长度
video_capture = cv2.VideoCapture('E:/work/qt/rgb/play1.mp4')
video_length = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
pose_tracker = naive_pose_tracker(data_frame=video_length)
# pose estimation
start_time = time.time()
frame_index = 0
video = list()
while(True):
# get image
ret, orig_image = video_capture.read()
if orig_image is None:
break
source_H, source_W, _ = orig_image.shape
orig_image = cv2.resize(
orig_image, (256 * source_W // source_H, 256))
H, W, _ = orig_image.shape
video.append(orig_image)
# pose estimation
datum = op.Datum()
datum.cvInputData = orig_image
# opWrapper.emplaceAndPop([datum])
opWrapper.emplaceAndPop(op.VectorDatum([datum]))
multi_pose = datum.poseKeypoints # (num_person, num_joint, 3)
if len(multi_pose.shape) != 3:
continue
# normalization
multi_pose[:, :, 0] = multi_pose[:, :, 0]/W
multi_pose[:, :, 1] = multi_pose[:, :, 1]/H
multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5
multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0
multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0
# pose tracking
pose_tracker.update(multi_pose, frame_index)
frame_index += 1
print('Pose estimation ({}/{}).'.format(frame_index, video_length))
data_numpy = pose_tracker.get_skeleton_sequence()
return video, data_numpy
def stgcn_visualize(self,pose,
edge,
video,
height=1080,
fps=None):
_, T, V, M = pose.shape
T = len(video)
pos_track = [None] * M
for t in range(T):
frame = video[t]
# image resize
H, W, c = frame.shape
frame = cv2.resize(frame, (height * W // H // 2, height // 2))
H, W, c = frame.shape
scale_factor = 2 * height / 1080
# draw skeleton
skeleton = frame * 0
text = frame * 0
for m in range(M):
score = pose[2, t, :, m].max()
if score < 0.3:
continue
for i, j in edge:
xi = pose[0, t, i, m]
yi = pose[1, t, i, m]
xj = pose[0, t, j, m]
yj = pose[1, t, j, m]
if xi + yi == 0 or xj + yj == 0:
continue
else:
xi = int((xi + 0.5) * W)
yi = int((yi + 0.5) * H)
xj = int((xj + 0.5) * W)
yj = int((yj + 0.5) * H)
cv2.line(skeleton, (xi, yi), (xj, yj), (255, 255, 255),
int(np.ceil(2 * scale_factor)))#np.ceil 函数用于向上取整。
x_nose = int((pose[0, t, 0, m] + 0.5) * W)
y_nose = int((pose[1, t, 0, m] + 0.5) * H)
x_neck = int((pose[0, t, 1, m] + 0.5) * W)
y_neck = int((pose[1, t, 1, m] + 0.5) * H)
half_head = int(((x_neck - x_nose) ** 2 + (y_neck - y_nose) ** 2) ** 0.5)
pos = (x_nose + half_head, y_nose - half_head)
if pos_track[m] is None:
pos_track[m] = pos
else:
new_x = int(pos_track[m][0] + (pos[0] - pos_track[m][0]) * 0.2)
new_y = int(pos_track[m][1] + (pos[1] - pos_track[m][1]) * 0.2)
pos_track[m] = (new_x, new_y)
# img0 = np.concatenate((frame, skeleton), axis=1)
yield skeleton
def stgcn_rgb_visualize(self,pose,
edge,
feature,
video,
label=None,
height=1080,
fps=None):
_, T, V, M = pose.shape
T = len(video)
pos_track = [None] * M
for t in range(T):
frame = video[t]
# image resize
H, W, c = frame.shape
frame = cv2.resize(frame, (height * W // H // 2, height // 2))
H, W, c = frame.shape
scale_factor = 2 * height / 1080
# draw skeleton
skeleton = frame * 0
text = frame * 0
for m in range(M):
score = pose[2, t, :, m].max()
if score < 0.3:
continue
for i, j in edge:
xi = pose[0, t, i, m]
yi = pose[1, t, i, m]
xj = pose[0, t, j, m]
yj = pose[1, t, j, m]
if xi + yi == 0 or xj + yj == 0:
continue
else:
xi = int((xi + 0.5) * W)
yi = int((yi + 0.5) * H)
xj = int((xj + 0.5) * W)
yj = int((yj + 0.5) * H)
cv2.line(skeleton, (xi, yi), (xj, yj), (255, 255, 255),
int(np.ceil(2 * scale_factor)))#np.ceil 函数用于向上取整。
x_nose = int((pose[0, t, 0, m] + 0.5) * W)
y_nose = int((pose[1, t, 0, m] + 0.5) * H)
x_neck = int((pose[0, t, 1, m] + 0.5) * W)
y_neck = int((pose[1, t, 1, m] + 0.5) * H)
half_head = int(((x_neck - x_nose) ** 2 + (y_neck - y_nose) ** 2) ** 0.5)
pos = (x_nose + half_head, y_nose - half_head)
if pos_track[m] is None:
pos_track[m] = pos
else:
new_x = int(pos_track[m][0] + (pos[0] - pos_track[m][0]) * 0.2)
new_y = int(pos_track[m][1] + (pos[1] - pos_track[m][1]) * 0.2)
pos_track[m] = (new_x, new_y)
# generate mask
mask = frame * 0
feature = np.abs(feature)
feature = feature / feature.mean()
for m in range(M):
score = pose[2, t, :, m].max()
if score < 0.3:
continue
f = feature[t // 4, :, m] ** 5
if f.mean() != 0:
f = f / f.mean()
for v in range(V):
x = pose[0, t, v, m]
y = pose[1, t, v, m]
if x + y == 0:
continue
else:
x = int((x + 0.5) * W)
y = int((y + 0.5) * H)
cv2.circle(mask, (x, y), 0, (255, 255, 255),
int(np.ceil(f[v] ** 0.5 * 8 * scale_factor)))
blurred_mask = cv2.blur(mask, (12, 12))
skeleton_result = blurred_mask.astype(float) * 0.75
skeleton_result += skeleton.astype(float) * 0.25
skeleton_result += text.astype(float)
skeleton_result[skeleton_result > 255] = 255
skeleton_result.astype(np.uint8)
if label is not None:
label_0_name = 'voting result: ' + label
self.put_text(skeleton_result, label_0_name, (0.1, 0.5))
# img0 = np.concatenate((frame, skeleton), axis=1)
yield skeleton_result
class naive_pose_tracker():
""" A simple tracker for recording person poses and generating skeleton sequences.
For actual occasion, I recommend you to implement a robuster tracker.
Pull-requests are welcomed.
"""
def __init__(self, data_frame=128, num_joint=18, max_frame_dis=np.inf):
self.data_frame = data_frame
self.num_joint = num_joint
self.max_frame_dis = max_frame_dis
self.latest_frame = 0
self.trace_info = list()
def update(self, multi_pose, current_frame):
# multi_pose.shape: (num_person, num_joint, 3)
if current_frame <= self.latest_frame:
return
if len(multi_pose.shape) != 3:
return
score_order = (-multi_pose[:, :, 2].sum(axis=1)).argsort(axis=0)
for p in multi_pose[score_order]:
# match existing traces
matching_trace = None
matching_dis = None
for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
# trace.shape: (num_frame, num_joint, 3)
if current_frame <= latest_frame:
continue
mean_dis, is_close = self.get_dis(trace, p)
if is_close:
if matching_trace is None:
matching_trace = trace_index
matching_dis = mean_dis
elif matching_dis > mean_dis:
matching_trace = trace_index
matching_dis = mean_dis
# update trace information
if matching_trace is not None:
trace, latest_frame = self.trace_info[matching_trace]
# padding zero if the trace is fractured
pad_mode = 'interp' if latest_frame == self.latest_frame else 'zero'
pad = current_frame-latest_frame-1
new_trace = self.cat_pose(trace, p, pad, pad_mode)
self.trace_info[matching_trace] = (new_trace, current_frame)
else:
new_trace = np.array([p])
self.trace_info.append((new_trace, current_frame))
self.latest_frame = current_frame
def get_skeleton_sequence(self):
# remove old traces
valid_trace_index = []
for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
if self.latest_frame - latest_frame < self.data_frame:
valid_trace_index.append(trace_index)
self.trace_info = [self.trace_info[v] for v in valid_trace_index]
num_trace = len(self.trace_info)
if num_trace == 0:
return None
data = np.zeros((3, self.data_frame, self.num_joint, num_trace))
for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
end = self.data_frame - (self.latest_frame - latest_frame)
d = trace[-end:]
beg = end - len(d)
data[:, beg:end, :, trace_index] = d.transpose((2, 0, 1))
return data
# concatenate pose to a trace
def cat_pose(self, trace, pose, pad, pad_mode):
# trace.shape: (num_frame, num_joint, 3)
num_joint = pose.shape[0]
num_channel = pose.shape[1]
if pad != 0:
if pad_mode == 'zero':
trace = np.concatenate(
(trace, np.zeros((pad, num_joint, 3))), 0)
elif pad_mode == 'interp':
last_pose = trace[-1]
coeff = [(p+1)/(pad+1) for p in range(pad)]
interp_pose = [(1-c)*last_pose + c*pose for c in coeff]
trace = np.concatenate((trace, interp_pose), 0)
new_trace = np.concatenate((trace, [pose]), 0)
return new_trace
# calculate the distance between a existing trace and the input pose
def get_dis(self, trace, pose):
last_pose_xy = trace[-1, :, 0:2]
curr_pose_xy = pose[:, 0:2]
mean_dis = ((((last_pose_xy - curr_pose_xy)**2).sum(1))**0.5).mean()
wh = last_pose_xy.max(0) - last_pose_xy.min(0)
scale = (wh[0] * wh[1]) ** 0.5 + 0.0001
is_close = mean_dis < scale * self.max_frame_dis
return mean_dis, is_close
if __name__ == '__main__':
app = QApplication(sys.argv)
video_gui = myMainWindow()
video_gui.show()
sys.exit(app.exec_())`