手部姿态检测(按视频段)_v5(模拟现网 模型集成_stgcn)

目前最优:
1.2s-agcn放弃,清洗不清洗都比不上另外两个;
2.若是清洗,选择hcn更好;   2s-15帧
3.若是不清洗,选择stgcn更好。   2s-15帧,取所有64帧

最终使用:stgcn
视频获取规则:按视频帧推进,预测完一段视频后,滑窗推进n帧(5/10?),得到新的一段视频,进行预测。

预测规则:
1.预测n段视频,得到n个预测结果,取出现次数最多的label作为最终结果;
2.预测n段视频,把n个softmax的输出概率进行相加,取概率最大的作为最终结果。


代码工程:

data.py

from torch.utils import data
import torch
import os
import random
import numpy as np


list1=[16,21,24,32]   # 测试人物编号
list2=[22,33]   # 验证测试编号

forder='5'
forder2='5'
split_label=1   # 判断是否在标签中加入视频名字
file_name = 'dataset/test8/src/txt/'+str(forder)+'.txt'

save='dataset/test8/src/'+str(forder2)+"/"
if os.path.exists(save) == False:
	os.makedirs(save)


frame_nums=64
f = open(file_name)
lines = f.readlines()

prev_video = int(lines[0].strip().split(' ')[1])   # 视频编号
prev_categ = int(lines[0].strip().split(' ')[2])   # 类别标签

datas=[]
datas_label=[]
frames = []
train = []
valid = []
test  = []
train_label = []
valid_label = []
test_label  = []
m=0

for line in lines:
	line = line.strip().split(' ')
	vid = int(line[1])   # 视频编号
	aid = int(line[0])   # 任务编号
	cid = int(line[2])   # 类别标签
	label=list(map(int, line[:3]))
	features = list(map(float, line[3:]))   # 21个特征点
	
	# 若是视频标签相同,则都放入数组中,作为一条训练数据
	if prev_video == vid:
		frames.append(np.reshape(np.asarray(features), (-1,3)))   # 把一维转换成[15,3]的格式
	else:
		# 如果一条视频帧数过多,则选取前frame_nums帧,并连接起来,转成torch格式
		if len(frames) >= frame_nums:
			# frames = random.sample(frames, frame_nums)   # 随机取帧
			frames = frames[0:frame_nums]    # 按顺序取帧
			frames = torch.from_numpy(np.stack(frames, 0))  # 把每一帧在0维连接起来,转成torch格式
			
		# 若是视频帧数不够多,则利用线性插值,把数据补充到frame_nums帧
		else:
			frames = np.stack(frames, 0) # 把每一帧连接起来,如:n帧 n*[1,15,3]=[n,15,3] 作为一条数据
			xloc = np.arange(frames.shape[0])   # np.arange:生成n个自然数,即等于frame_nums帧数
			new_xloc = np.linspace(0, frames.shape[0], frame_nums)  # 生成start和end之间frame_nums个等差间隔的元素,如:1、2、··n
			frames = np.reshape(frames, (frames.shape[0], -1)).transpose()  # transpose:矩阵转置
			# print(frames.shape,xloc.shape,new_xloc.shape)
			
			new_datas = []
			for data in frames:
				new_datas.append(np.interp(new_xloc, xloc, data))   # interp:进行线性插值, 获得frame_nums帧数据
			frames = torch.from_numpy(np.stack(new_datas, 0)).t()  # 把n帧数据再次连接起来,转换torch格式
			
		frames = frames.view(frame_nums, -1, 3)  # 强制reshape矩阵形状
		datas.append(frames)   #数据
		if split_label==1:
			datas_label.append(label)   #标签
		else:
			datas_label.append(prev_categ)   #标签

		# m+=1
		# # 2.按人物编号分
		# if aid in list1:
		# 	test.append(frames)
		# 	test_label.append(prev_categ)
		# elif aid in list2:
		# 	valid.append(frames)
		# 	valid_label.append(prev_categ)
		# else:
		# 	train.append(frames)
		# 	train_label.append(prev_categ)

		frames = [np.reshape(np.asarray(features), (-1,3))]  # frames重置,等于每条视频的第一帧的关键点 

	prev_actor = aid   # 人物编号重置
	prev_video = vid   # 视频编号重置
	prev_categ = cid   # 标签重置


# 3.随机划分
lens=len(datas)
num=random.sample(range(lens),lens)   #获取随机数

for i in range(lens):
	index=num[i]
	if i <=int(lens*0.7):
		train.append(datas[index])
		train_label.append(datas_label[index])
	elif i <=int(lens*0.9):
		valid.append(datas[index])
		valid_label.append(datas_label[index])
	else:
		test.append(datas[index])
		test_label.append(datas_label[index])



train_label = torch.from_numpy(np.asarray(train_label))
valid_label = torch.from_numpy(np.asarray(valid_label))
test_label  = torch.from_numpy(np.asarray(test_label))
print(len(train_label),len(valid_label),len(test_label))
print(test_label.shape)


torch.save((torch.stack(train, 0), train_label), save+'train.pkl')
torch.save((torch.stack(valid, 0), valid_label), save+'valid.pkl')
torch.save((torch.stack(test, 0),  test_label),  save+'test.pkl')


# 数据处理:
# 1.一段2min的视频,每1s剪裁下来,编号1-n,得到n段视频(n条数据);加上人物编号1-m;每段加动作类型标签:xx
# 2.每一帧得到关键坐标点,以某个稳定的中心点作为坐标原点,其他的点都减去这个原点
# 原点坐标改为(0,0),得到整体相对坐标;
# 3. 把坐标变为一维存储,后面要用再从代码reshape成(n,-1,2)

main.py

import os
import numpy as np
import torch
import torch.optim as optim
import torch.utils.data as data
import time
from model import *
from metric import accuracy
from config import get_args
args = get_args()

# 判断是否有gpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# torch的数据加载方法
train_tensor, train_label = torch.load(args.train_path)
valid_tensor, valid_label = torch.load(args.valid_path)
test_tensor , test_label  = torch.load(args.test_path)

# 数据加载器,一次性加载所有数据,每次取出batch个数据
train_loader = data.DataLoader(data.TensorDataset(train_tensor.to(device)),
							   batch_size = args.batch_size, shuffle=False)
valid_loader = data.DataLoader(data.TensorDataset(valid_tensor.to(device)),
							   batch_size = args.batch_size, shuffle=False)
test_loader  = data.DataLoader(data.TensorDataset(test_tensor.to(device)),
							   batch_size = args.batch_size, shuffle=False)
train_label = train_label.to(device)
valid_label = valid_label.to(device)
test_label  = test_label.to(device)

# # 把视频名字和标签分离
# def get_label_vid(data):
# 	vid=[]
# 	label=[]
# 	for i in  data:
# 		label.append(int(i[2]))
# 		name=str(int(i[0]))+'_'+str(int(i[1]))+'.mp4'
# 		vid.append(name)
		
# 	labels=torch.from_numpy(np.array(label))
# 	return labels,np.array(vid)

# train_label, train_vid = get_label_vid(train_label)
# valid_label, vaild_vid = get_label_vid(valid_label)
# test_label, test_vid = get_label_vid(test_label)
# print(test_label.shape,test_vid.shape)

# 权重矩阵
A = [[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],
	 [1,0,1,0,0,0,0,0,0,0,0,0,0,0,0],
	 [0,1,0,1,0,0,1,0,0,1,0,0,0,0,0],
	 [0,0,1,0,1,0,0,0,0,0,0,0,0,0,0],
	 [0,0,0,1,0,1,0,0,0,0,0,0,0,0,0],
	 [0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
	 [0,0,1,0,0,0,0,1,0,0,0,0,0,0,0],
	 [0,0,0,0,0,0,1,0,1,0,0,0,0,0,0],
	 [0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],
	 [0,0,1,0,0,0,0,0,0,0,1,0,1,0,0],
	 [0,0,0,0,0,0,0,0,0,1,0,1,0,0,0],
	 [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],
	 [0,0,0,0,0,0,0,0,0,1,0,0,1,0,0],
	 [0,0,0,0,0,0,0,0,0,0,0,1,0,1,0],
	 [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0]]
A = torch.from_numpy(np.asarray(A)).to(device)

# 定义GCN模型
model = GGCN(A, train_tensor.size(3), args.num_classes,
			 [train_tensor.size(3), train_tensor.size(3)*3], [train_tensor.size(3)*3, 16, 32, 64],
			 args.feat_dims, args.frame_nums, args.dropout_rate)
# print([train_tensor.size(3), train_tensor.size(3)*3], [train_tensor.size(3)*3, 16, 32, 64])

if device == 'cuda':
	model.cuda()

# 查看模型参数
num_params = 0
for p in model.parameters():
	num_params += p.numel()
# print(model)

# 定义Loss,优化器,学习率衰减
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = args.learning_rate,
					   betas=[args.beta1, args.beta2], weight_decay = args.weight_decay)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma = 0.1)

best_epoch = 0
best_acc = 0
def train():
	global best_epoch, best_acc

	# 恢复模型,训练
	if args.start_epoch:
		model.load_state_dict(torch.load(os.path.join(args.model_path, 'model-%d.pkl'%(args.start_epoch))))

	# Training
	for epoch in range(args.start_epoch, args.num_epochs):
		train_loss = 0
		train_acc  = 0
		scheduler.step()   # 用了scheduler.step(),按epoch更新lr
		model.train()  # model.train():启用 BatchNormalization、Dropout    model.eval():不启用


		for i, x in enumerate(train_loader):
			logit = model(x[0].float())   # 模型预测结果
			target = train_label[i]  # 真实标签
			loss = criterion(logit, target.view(1).long())   #计算loss
			
			model.zero_grad()  # 每个batch清除一次梯度
			loss.backward()    # 反向传播loss,计算梯度
			optimizer.step()   # 优化器根据梯度更新网络参数

			train_loss += loss.item()   # 按epoch统计loss
			train_acc  += accuracy(logit, target.view(1).long())   # 按epoch统计acc
		print('[epoch',epoch+1,'] Train loss:',train_loss/(i+1), 'Train Acc:',train_acc/(i+1))

		# 保存模型
		if os.path.exists(args.model_path) == False:
			os.makedirs(args.model_path)
		if (epoch+1) % 20 ==0:
			torch.save(model, os.path.join(args.model_path, 'model-%d.pkl'%(epoch+1)))

		# 训练中进行验证
		if (epoch+1) % args.val_step == 0:
			model.eval()
			val_loss = 0
			val_acc  = 0
			with torch.no_grad():
				for i, x in enumerate(valid_loader):
					logit = model(x[0].float())
					target = valid_label[i]

					val_loss += criterion(logit, target.view(1).long()).item()
					val_acc += accuracy(logit, target.view(1).long())

				if best_acc >= (val_acc/(i+1)):
					best_epoch = epoch+1
					torch.save(model.state_dict(), os.path.join(args.model_path, 'best_model-%d.pkl'%(best_epoch)))
				best_acc = (val_acc/(i+1))

			print('Val loss:',val_loss/(i+1), 'Val Acc:',val_acc/(i+1))

def test():
	global best_epoch
	
	model.load_state_dict(torch.load(os.path.join(args.model_path, 
												  'model-%d.pkl'%(best_epoch))))
	print("load model from 'model-%d.pkl'"%(best_epoch))

	model.eval()
	test_loss = 0
	test_acc  = 0
	with torch.no_grad():
		for i, x in enumerate(test_loader):
			star=time.time()
			logit = model(x[0].float())
			target = test_label[i]

			test_loss += criterion(logit, target.view(1).long()).item()
			test_acc  += accuracy(logit, target.view(1).long())

			end=int((time.time()-star)*1000)
			# print('pred:',torch.max(logit, 1)[1].float()
			# .cpu().numpy(),  'true:',target.cpu().numpy(),'time:',end, 'index:',i)
	print('Test loss:',test_loss/(i+1), 'Test Acc:',test_acc/(i+1))



train()

test.py

import os
import numpy as np
import torch
import torch.optim as optim
import torch.utils.data as data
import time
from model import *
from metric import accuracy
from test_config import get_args
args = get_args()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_tensor, train_label = torch.load(args.train_path)
valid_tensor, valid_label = torch.load(args.valid_path)
test_tensor , test_label  = torch.load(args.test_path)

train_loader = data.DataLoader(data.TensorDataset(train_tensor.to(device)),
							   batch_size = args.batch_size, shuffle=False)
valid_loader = data.DataLoader(data.TensorDataset(valid_tensor.to(device)),
							   batch_size = args.batch_size, shuffle=False)
test_loader  = data.DataLoader(data.TensorDataset(test_tensor.to(device)),
							   batch_size = args.batch_size, shuffle=False)
train_label = train_label.to(device)
valid_label = valid_label.to(device)
test_label  = test_label.to(device)

# 把视频名字和标签分离
def get_label_vid(data):
	vid=[]
	label=[]
	for i in  data:
		label.append(int(i[2]))
		name=str(int(i[0]))+'_'+str(int(i[1]))+'.mp4'
		vid.append(name)
		
	labels=torch.from_numpy(np.array(label))
	return labels,np.array(vid)

# train_label, train_vid = get_label_vid(train_label)
# valid_label, vaild_vid = get_label_vid(valid_label)
# test_label, test_vid = get_label_vid(test_label)
# print(test_label.shape,test_vid.shape)

A = [[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],
	 [1,0,1,0,0,0,0,0,0,0,0,0,0,0,0],
	 [0,1,0,1,0,0,1,0,0,1,0,0,0,0,0],
	 [0,0,1,0,1,0,0,0,0,0,0,0,0,0,0],
	 [0,0,0,1,0,1,0,0,0,0,0,0,0,0,0],
	 [0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
	 [0,0,1,0,0,0,0,1,0,0,0,0,0,0,0],
	 [0,0,0,0,0,0,1,0,1,0,0,0,0,0,0],
	 [0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],
	 [0,0,1,0,0,0,0,0,0,0,1,0,1,0,0],
	 [0,0,0,0,0,0,0,0,0,1,0,1,0,0,0],
	 [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],
	 [0,0,0,0,0,0,0,0,0,1,0,0,1,0,0],
	 [0,0,0,0,0,0,0,0,0,0,0,1,0,1,0],
	 [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0]]
A = torch.from_numpy(np.asarray(A)).to(device)

model = GGCN(A, train_tensor.size(3), args.num_classes, 
			 [train_tensor.size(3), train_tensor.size(3)*3], [train_tensor.size(3)*3, 16, 32, 64], 
			 args.feat_dims, args.frame_nums, args.dropout_rate)
if device == 'cuda':
	model.cuda()

num_params = 0
for p in model.parameters():
	num_params += p.numel()
# print(model)
# print('The number of parameters: {}'.format(num_params))

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = args.learning_rate,
					   betas=[args.beta1, args.beta2], weight_decay = args.weight_decay)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma = 0.1)

best_epoch = 0
best_acc = 0

def test():
	global best_epoch
	#
	# model.load_state_dict(torch.load(os.path.join(args.model_path,
	# 											  'model-%d.pkl'%(best_epoch))))
	# print("load model from 'model-%d.pkl'"%(best_epoch))
	#
	# model.eval()
	model = torch.load(os.path.join(args.model_path, 'model-%d.pkl'%(best_epoch)))
	test_loss = 0
	test_acc  = 0
	with torch.no_grad():
		for i, x in enumerate(test_loader):
			star=time.time()
			logit = model(x[0].float())
			target = test_label[i]

			test_loss += criterion(logit, target.view(1).long()).item()
			test_acc  += accuracy(logit, target.view(1).long())

			end=int((time.time()-star)*1000)
			# print('pred:',torch.max(logit, 1)[1].float().cpu().numpy(),  'true:',target.cpu().numpy(),'time:',end, 'index:',i)
	print('Test loss:',test_loss/(i+1), 'Test Acc:',test_acc/(i+1))

if __name__ == '__main__':
	# if args.mode == 'train':
	# 	train()
	# elif args.mode == 'test':
	# 	best_epoch = args.test_epoch
	# 	test()
	
	# train()
	best_epoch = 100
	test()

test--模拟现网模型集成_v3.py   

(预测n段视频,加权求值_按帧推进)

import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
import torch


def test(model_path, test_data):
    model=torch.load(model_path)
    print("load model...")

    model.eval()
    with torch.no_grad():
        output = model(test_data.float())
        logit=output.data.cpu().numpy()
        pred = torch.max(output.data, 1)[1].numpy()
        print("预测结果为:",logit,pred)

    return logit,pred
        

def crop(img, point_face):
    top=point_face[24][1]
    mins=top-200
    if mins<=0:
        mins=0

    h=img.shape[0]
    w=img.shape[1]
    img2=img[mins:h,0:w]
    
    return img2

def face_detect(pic):
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('test/class/shape_predictor_68_face_landmarks.dat')

    img = np.copy(pic)
    # img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    # 人脸数rects
    rects = detector(img, 1)
    if len(rects)!=1:
        # print(len(rects),"face detection fail!")
        return -1,-1

    landmarks = np.matrix([[p.x, p.y] for p in predictor(img,rects[0]).parts()])

    point_face=[]
    for idx, point in enumerate(landmarks):
        x=point[0, 0]
        y=point[0, 1]
        point_face.append((x,y))
    
        # # 画图和点
        # cv2.circle(img, (x,y), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
        # cv2.putText(img, str(idx), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 1,cv2.LINE_AA)
        
    return img, point_face

def hand_detect(pic, img_face):
    protoFile = "test/class/hand/pose_deploy.prototxt"
    weightsFile = "test/class/hand/pose_iter_102000.caffemodel"
    nPoints = 22
    POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12],
                  [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
    
    img_hand=np.copy(img_face)   #用来画手的坐标点
    frame = np.copy(pic)   #用来做手部检测
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    aspect_ratio = frameWidth / frameHeight

    threshold = 0.1

    t = time.time()
    # input image dimensions for the network
    inHeight = 368
    inWidth = int(((aspect_ratio * inHeight) * 8) // 8)
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)

    net.setInput(inpBlob)

    output = net.forward()
    # print("time taken by network : {:.3f}".format(time.time() - t))

    # Empty list to store the detected keypoints
    point_hand = []

    for i in range(nPoints):
        # 对应身体部位的置信度图
        probMap = output[0, i, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
        # cv2.minMaxLoc:返回矩阵的最小值,最大值,并得到最大值,最小值的索引
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)   #找最大值及其下标(即置信度最大的点)

        # 获取坐标点
        x,y=(int(point[0]), int(point[1]))
        point_hand.append((x,y,prob))
    
        # # 画图和点
        # cv2.circle(img_hand, (x,y), 2, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
        # cv2.putText(img_hand, str(i), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)

    return img_hand,point_hand

def get_feature(point_face, point_hand,img):
    circle_center=point_face[30]

    # 1.只取15个关键点
    face_key_point=[36,45,2,14,39,42,30,57]
    hand_key_point=[8,12,16,6,10,14,9]
    
    line=''
    for i in face_key_point:
        x=point_face[i][0]-circle_center[0]
        y=point_face[i][1]-circle_center[1]
        confidence=1
        line+=str(x)+' '+str(y)+' '+str(confidence)+' '

        # 画图和点
        cv2.circle(img, (point_face[i][0],point_face[i][1]), 2, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
        cv2.putText(img, str(i), (point_face[i][0],point_face[i][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
        
    for i in hand_key_point:
        x=point_hand[i][0]-circle_center[0]
        y=point_hand[i][1]-circle_center[1]
        confidence=point_hand[i][2]
        line+=str(x)+' '+str(y)+' '+str(confidence)+' '

        # 画图和点
        cv2.circle(img, (point_hand[i][0],point_hand[i][1]), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
        cv2.putText(img, str(i), (point_hand[i][0],point_hand[i][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
    
    return img,line

def count_pass(point):
    n=0
    points=point[8:]
    for data in points:
        confidence=float(data[2])
        # print(confidence)
        if confidence>=0.1:
            n+=1

    # # 类别3的数据不做判断,直接通过
    # if label == 3:
    #     n=7

    # 判断人脸,若人脸检测变形,则不通过
    w=int(point[3][0])-int(point[2][0])   # 14-2
    if w<=50:
        n=-1

    return n



model_path='model/test9/clear/2+3+4+5+6_v1/model-100.pkl'
save_path = "test/vedio/save/"
file_root = 'test/vedio/1/11_10.mp4'
cap = cv2.VideoCapture(file_root)
hasFrame, frame = cap.read()
w = frame.shape[1]
h = frame.shape[0]


interval_mask=0
nums=64
frames=[0]*nums
feature=[0]*nums
pass_mask=[0]*nums
pred_list=[]
pred_list2=[]
#开始进入循环取帧
n=0
while (hasFrame):
    hasFrame, frame = cap.read()   #读取视频帧
    old_frame=np.copy(frame)

    if hasFrame==True:
        times=int(n/nums)
        index=(n%nums)
        n += 1
        print("第{}组,第{}帧".format(times+1,index+1))

        # 预测一条数据后,需要间隔5组再预测
        if interval_mask==1:
            interval_time=times-old_time   #间隔组数
            if interval_time<=1:
                print("处于间隔期,间隔:{}/5组,第{}帧.".format(interval_time, index+1))
                continue
            else:
                interval_mask=0

        h,w,_=frame.shape
        frame=cv2.resize(frame,(int(w*0.5),int(h*0.5)))

        img_face, point_face=face_detect(frame)   #人脸检测
        if point_face ==-1:
            n=n-1
            continue
        else:
            img=crop(frame, point_face)
            img=cv2.resize(img,(500,700))
            img_face2, point_face2=face_detect(img)

            if point_face2 ==-1:
                n=n-1
                continue
            else:
                img_hand, point_hand=hand_detect(img, img_face2)   #手部检测
                draw_img,line=get_feature(point_face2, point_hand,img)

                #坐标点数据
                line=line.strip().split(" ")
                point = list(map(float, line))
                point = np.reshape(np.asarray(point), (-1,3))
                
                #判断是否通过了阈值
                pass_nums=count_pass(point)
                if pass_nums>=0:
                    flag=1
                    print("本帧数据通过:",pass_nums,point.shape,np.sum(pass_mask)+1)
                elif pass_nums==-1:
                    n=n-1
                    print("本帧数据没通过,人脸变形了!")
                    continue
                else:
                    flag=0
                    print("本帧数据没通过:",pass_nums,point.shape)
                
                # 写入标签、坐标点数据、视频帧
                pass_mask[index]=flag
                feature[index]=point
                frames[index]=old_frame

                # 判断一组数据是否通过
                if index==nums-1:  # or len(pass_nums)==nums-1
                    pass_nums=np.sum(pass_mask)   #计算通过的帧数
                    print("通过的帧数为:{}".format(pass_nums))

                    if pass_nums>=32:
                        print("[数据通过!]")

                        # 预测数据
                        test_data = torch.from_numpy(np.stack(feature, 0))
                        test_data=test_data.unsqueeze(0)
                        logit, pred=test(model_path, test_data)
                        pred_list.append(logit)
                        pred_list2.append(pred[0])
                        print(test_data.size())
                        
                        # 把测试的视频段,记录下来
                        vid=file_root.split("/")[-1].split(".")[0]
                        videoWriter =cv2.VideoWriter(
                            save_path+'{}-({})-{}.mp4'.format(vid,len(pred_list2),pred[0]),
                            cv2.VideoWriter_fourcc(*"mp4v"),30,(w, h))
                        for frame in frames:
                            videoWriter.write(frame)
                        
                        # 回退n条数据
                        back=5
                        n=n-back
                        pass_mask[0:nums-back]=pass_mask[back:nums]
                        feature[0:nums-back]=feature[back:nums]
                        frames[0:nums-back]=frames[back:nums]
                        # interval_mask=1  # 间隔标志,开始间隔
                        # old_time=times

                        # 初始化最后n条数据
                        for i in range(nums-back,nums):
                            pass_mask[i]=0
                            feature[i]=0
                            frames[i]=0

                        # # 清空数组
                        # frames=[0]*nums
                        # feature=[0]*nums
                        # pass_mask=[0]*nums
                        # interval_mask=1  # 间隔标志,开始间隔
                        # old_time=times

                    else:
                        # 回退一条数据
                        back=1
                        pass_mask[0:nums-back]=pass_mask[back:nums]
                        feature[0:nums-back]=feature[back:nums]
                        frames[0:nums-back]=frames[back:nums]
                        # 初始化最后1条数据
                        for i in range(nums-back,nums):
                            pass_mask[i]=0
                            feature[i]=0
                            frames[i]=0

                        n=n-back
                        print("[数据没通过,继续往下走!]")

                    if len(pred_list)==5:
                        pred_list=np.array(pred_list)
                        pred_list=np.sum(pred_list,axis=0)
                        result=np.max(pred_list,1)
                        result2=np.argmax(np.bincount(pred_list2))
                        print("预测结束,result:{}-{}, result2:{}-{}".format(pred_list, result, pred_list2, result2))
                        
                        break
                        
                        # # 清空数组
                        # pred_list=[]
                        # pred_list2=[]
                        # frames=[0]*nums
                        # feature=[0]*nums
                        # pass_mask=[0]*nums
                        # interval_mask=1  # 间隔标志,开始间隔
                        # old_time=times
                        
cap.release()

config.py

import argparse

def get_args():
	parser = argparse.ArgumentParser()

	parser.add_argument('--mode',  type=str, default='train')
	parser.add_argument('--test_epoch',type=int, default=80)
	
	parser.add_argument('--start_epoch',type=int, default=0)
	parser.add_argument('--num_epochs',type=int, default=100)
	parser.add_argument('--val_step',type=int, default=20)
	
	model_forder = "2+3+4+5+6_v3"
	data_forder = "2+3+4+5+6_v3"
	parser.add_argument('--train_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/train.pkl')
	parser.add_argument('--valid_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/valid.pkl')
	parser.add_argument('--test_path',  type=str, default='dataset/test9/clear/'+str(data_forder)+'/test.pkl')

	parser.add_argument('--model_path',  type=str, default='model/test9/clear/'+str(model_forder)+'/')

	parser.add_argument('--batch_size',  type=int, default=1)
	parser.add_argument('--learning_rate',type=int, default=0.01)
	parser.add_argument('--beta1',type=int, default=0.5)
	parser.add_argument('--beta2',type=int, default=0.99)
	parser.add_argument('--dropout_rate',type=int, default=0.5)
	parser.add_argument('--weight_decay',type=int, default=0.0)

	parser.add_argument('--frame_nums',type=int, default=64)   #32-192, 64-960, 128-2496
	parser.add_argument('--num_classes',type=int, default=4)
	parser.add_argument('--feat_dims',type=int, default=13)
	
	
	args = parser.parse_args()

	return args

test_config.py

import argparse

def get_args():
	parser = argparse.ArgumentParser()

	parser.add_argument('--mode',  type=str, default='train')
	parser.add_argument('--test_epoch',type=int, default=80)

	parser.add_argument('--start_epoch',type=int, default=0)
	parser.add_argument('--num_epochs',type=int, default=100)
	parser.add_argument('--val_step',type=int, default=20)
	
	model_forder = "2+3+4+5+6_v3"
	data_forder = "2+3+4+5+6_v3"
	parser.add_argument('--train_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/train.pkl')
	parser.add_argument('--valid_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/valid.pkl')
	parser.add_argument('--test_path',  type=str, default='dataset/test9/clear/'+str(data_forder)+'/test.pkl')

	parser.add_argument('--model_path',  type=str, default='model/test9/clear/'+str(model_forder)+'/')

	parser.add_argument('--batch_size',  type=int, default=1)
	parser.add_argument('--learning_rate',type=int, default=0.01)
	parser.add_argument('--beta1',type=int, default=0.5)
	parser.add_argument('--beta2',type=int, default=0.99)
	parser.add_argument('--dropout_rate',type=int, default=0.5)
	parser.add_argument('--weight_decay',type=int, default=0.0)

	parser.add_argument('--frame_nums',type=int, default=64)   #32-192, 64-960, 128-2496
	parser.add_argument('--num_classes',type=int, default=4)
	parser.add_argument('--feat_dims',type=int, default=13)
	

	args = parser.parse_args()

	return args

model.py

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch
import numpy as np

from layer import GraphConvolution, StandConvolution, StandRecurrent

class GGCN(nn.Module):
	def __init__(self, adj, num_v, num_classes, gc_dims, sc_dims, feat_dims, frame_nums, dropout=0.5):
		super(GGCN, self).__init__()
		terminal_cnt = 5
		actor_cnt = 1
		self.frame_nums=frame_nums
		
		# torch.eye:返回一个2维张量,对角线位置全1,其它位置全0。 shape:adj.size(0)*adj.size(0)
		# detach():创建一个新的tensor,将其从当前的计算图中分离出来.新的tensor与之前的共享data,但是不具有梯度.(可解决爆显存问题)
		adj = adj + torch.eye(adj.size(0)).to(adj).detach()   # [15,15]
		ident = torch.eye(adj.size(0)).to(adj)   # [15,15]
		zeros = torch.zeros(adj.size(0), adj.size(1)).to(adj)   # [15,15]
		a=torch.cat([adj, ident, zeros], 1)   # [15,15]*3=[15,45]
		b=torch.cat([ident, adj, ident], 1)   # [15,15]*3=[15,45]
		c=torch.cat([zeros, ident, adj], 1)   # [15,15]*3=[15,45]
		self.adj = torch.cat([a,b,c], 0).float()   # [15,45]*3=[45,45]


		# nn.Parameter:把xx转成模型中根据训练可以改动的参数
		# randn:返回一个正态分布的随机数的张量,均值为“0”,方差为“1”
		self.terminal = nn.Parameter(torch.randn(terminal_cnt, actor_cnt, feat_dims))   #[5,1,13]
		self.gcl = GraphConvolution(gc_dims[0]+feat_dims, gc_dims[1], num_v, dropout=dropout) #3+13、9、3
		self.conv= StandConvolution(sc_dims, num_classes, dropout=dropout)   # 使用标准卷积,自定义模型
		# self.conv= StandRecurrent(sc_dims, num_classes, dropout=dropout)   # 使用LSTM模型
		
		nn.init.xavier_normal_(self.terminal)   # Xavier正态分布初始化
		
	def forward(self, x):
		# F.interpolate(input,x,mode='nearest'):
		# 采样函数,x为采样倍数(1/2则为下采样,2为上采样); model为采样方法,默认nearest
		head_la = F.interpolate(torch.stack([self.terminal[0],self.terminal[1]],2), 6)  # [1,13,6]
		head_ra = F.interpolate(torch.stack([self.terminal[0],self.terminal[2]],2), 6)  # [1,13,6]
		lw_ra = F.interpolate(torch.stack([self.terminal[3],self.terminal[4]],2), 6)  # [1,13,6]
		# print(lw_ra.shape)
		
		# 在2维度合并以下5个矩阵, node_features=[1,13,3]*5--[1,13,15]   x=[1, 32, 15, 3]
		node_features = torch.cat([
								   (head_la[:,:,:3] + head_ra[:,:,:3])/2,   # [1,13,3]   head_la、head_ra的0-3相加
								   torch.stack((lw_ra[:,:,2], lw_ra[:,:,1], lw_ra[:,:,0]), 2),  # [1,13,3]  lw_ra的0-2合并
								   lw_ra[:,:,3:], head_la[:,:,3:], head_ra[:,:,3:]], 2).to(x)   # [1,13,3]  三者都是取3-5
		
		
		# permute:将tensor的维度换位;  unsqueeze:指定位置1(或者n),加上一个维数为1的维度; repeat:沿着纵轴(1)方向,重复增加n倍列数
		# 数据变化:[1,13,15]-[1,15,13]-[1,1,15,13]-[1,32,15,13]
		node_features=node_features.permute(0,2,1).unsqueeze(1).repeat(1,self.frame_nums,1,1)
		# [1,32,15,3]+[1,32,15,13]=[1,32,15,16]
		x = torch.cat((x, node_features), 3)
		# [1,30,15,16]*3=[1,30,45,16]
		concat_seq = torch.cat([x[:,:-2], x[:,1:-1], x[:,2:]], 2)
		
		# print(self.adj.shape, concat_seq.shape)
		multi_conv = self.gcl(self.adj, concat_seq)
		logit = self.conv(multi_conv)
		
		return logit
		

layer.py

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 图卷积结构
class GraphConvolution(nn.Module):
	def __init__(self, input_dim, output_dim, num_vetex, act=F.relu, dropout=0.5, bias=True):
		super(GraphConvolution, self).__init__()

		self.alpha = 1.

		self.act = act
		self.dropout = nn.Dropout(dropout)
		self.weight = nn.Parameter(torch.randn(input_dim, output_dim)).to(device)  # [16,9]
		if bias:
			self.bias = nn.Parameter(torch.randn(output_dim)).to(device)
		else:
			self.bias = None

		for w in [self.weight]:
			nn.init.xavier_normal_(w)

	# gcn的网络结构---计算公式
	def normalize(self, m):
		rowsum = torch.sum(m, 0)
		r_inv = torch.pow(rowsum, -0.5)
		r_mat_inv = torch.diag(r_inv).float()

		m_norm = torch.mm(r_mat_inv, m)
		m_norm = torch.mm(m_norm, r_mat_inv)

		return m_norm

	def forward(self, adj, x):
		x = self.dropout(x)  # [1,30,45,16]
		adj_norm = self.normalize(adj)  # [45,45]
		# 如果mat1 是一个n×m张量,mat2 是一个 m×p 张量,将会输出一个 n×p 张量
		sqr_norm = self.normalize(torch.mm(adj,adj))  # [45,45]
		m_norm = self.alpha*adj_norm + (1.-self.alpha)*sqr_norm  # [45,45]
		
		x_tmp = np.einsum('abcd,de->abce', x, self.weight)  # # [1,30,45,16]+[16,9]=[1, 30, 45, 9]
		x_out = np.einsum('ij,abid->abjd', m_norm, x_tmp)  # [1, 30, 45, 9]
		# x_tmp = torch.einsum('abcd,de->abce', x, self.weight)
		# x_out = torch.einsum('ij,abid->abjd', m_norm, x_tmp)
		
		if self.bias is not None:
			x_out += self.bias

		x_out=torch.from_numpy(x_out)
		# x_out=torch.from_numpy(x_out).cuda()
		x_out = self.act(x_out)

		return x_out
		

# 使用标准卷积,自定义模型
class StandConvolution(nn.Module):
	def __init__(self, dims, num_classes, dropout):
		super(StandConvolution, self).__init__()
		# h/w = (h/w - kennel_size + 2padding) / stride + 1
		# x = ([10,16,30,32]),其中h=30,w=32,对于卷积核长分别是 h:3,w:2 ;对于步长分别是h:2,w:1;padding默认0;
		# h = (30 - 3 + 20)/ 2 +1 = 27/2 +1 = 13+1 =14
		# w =(32 - 2 + 2*0)/ 1 +1 = 30/1 +1 = 30+1 =31
		# batch = 10, out_channel = 33
		# 故: y= ([10, 33, 14, 31])

		self.num_classes=num_classes
		self.dropout = nn.Dropout(dropout)
		self.conv = nn.Sequential(
								   nn.Conv2d(dims[0], dims[1], kernel_size=5, stride=2),
								   nn.InstanceNorm2d(dims[1]),
								   nn.ReLU(inplace=True),
								   # nn.AvgPool2d(5, stride=1),
								   nn.Conv2d(dims[1], dims[2], kernel_size=5, stride=2),
								   nn.InstanceNorm2d(dims[2]),
								   nn.ReLU(inplace=True),
								   #nn.AvgPool2d(3, stride=1),
								   nn.Conv2d(dims[2], dims[3], kernel_size=5, stride=2),
								   nn.InstanceNorm2d(dims[3]),
								   nn.ReLU(inplace=True),
								   #nn.AvgPool2d(3, stride=2)
								   ).to(device)

		# self.fc = nn.Linear(dims[3]*3, num_classes).to(device)
		self.fc = nn.Linear(960, num_classes).to(device)   # 32-192, 64-960, 128-2496
		

	def forward(self, x):
		x = self.dropout(x.permute(0,3,1,2))  # [1, 9, 30, 45]  [1,9,62,45]
		x_tmp = self.conv(x)   # [1, 64, 1, 3]  [1,64,5,3]
		# view:按照行优先的顺序排成一个一维的数据,再重新组成需要的形状  [1, 64, 1, 3]---[1, 192]
		x_tmp = x_tmp.view(x.size(0), -1)  # [1, 64, 1, 3]---[1, 192]
		# print(x_tmp.shape)
		
		x_out = self.fc(x_tmp)   # [1,4],返回分类标签
		
		return x_out


# 使用LSTM模型
class StandRecurrent(nn.Module):
	def __init__(self, dims, num_classes, dropout):
		super(StandRecurrent, self).__init__()
		self.lstm = nn.LSTM(dims[0]*45, dims[1], batch_first=True,dropout=0.5).to(device)
		self.fc = nn.Linear(dims[1], num_classes).to(device)

	def forward(self, x):
		x_tmp,_ = self.lstm(x.contiguous().view(x.size(0), x.size(1), -1))
		x_out = self.fc(x_tmp[:,-1])

		return x_out

metric.py

import torch
from sklearn.metrics import accuracy_score

def accuracy(preds, target):
	# torch.max(a,dim):返回torch中的最大值a、以及a对应的下标;   dim=0行对比最大值,dim=1则是列对比最大值
	preds = torch.max(preds, 1)[1].float()
	acc = accuracy_score(preds.cpu().numpy(), target.cpu().numpy())   # 返回准度率

	# print(preds.cpu().numpy(), target.cpu().numpy())
	return acc

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值