目前最优:
1.2s-agcn放弃,清洗不清洗都比不上另外两个;
2.若是清洗,选择hcn更好; 2s-15帧
3.若是不清洗,选择stgcn更好。 2s-15帧,取所有64帧
最终使用:stgcn
视频获取规则:按视频帧推进,预测完一段视频后,滑窗推进n帧(5/10?),得到新的一段视频,进行预测。
预测规则:
1.预测n段视频,得到n个预测结果,取出现次数最多的label作为最终结果;
2.预测n段视频,把n个softmax的输出概率进行相加,取概率最大的作为最终结果。
代码工程:
data.py
from torch.utils import data
import torch
import os
import random
import numpy as np
list1=[16,21,24,32] # 测试人物编号
list2=[22,33] # 验证测试编号
forder='5'
forder2='5'
split_label=1 # 判断是否在标签中加入视频名字
file_name = 'dataset/test8/src/txt/'+str(forder)+'.txt'
save='dataset/test8/src/'+str(forder2)+"/"
if os.path.exists(save) == False:
os.makedirs(save)
frame_nums=64
f = open(file_name)
lines = f.readlines()
prev_video = int(lines[0].strip().split(' ')[1]) # 视频编号
prev_categ = int(lines[0].strip().split(' ')[2]) # 类别标签
datas=[]
datas_label=[]
frames = []
train = []
valid = []
test = []
train_label = []
valid_label = []
test_label = []
m=0
for line in lines:
line = line.strip().split(' ')
vid = int(line[1]) # 视频编号
aid = int(line[0]) # 任务编号
cid = int(line[2]) # 类别标签
label=list(map(int, line[:3]))
features = list(map(float, line[3:])) # 21个特征点
# 若是视频标签相同,则都放入数组中,作为一条训练数据
if prev_video == vid:
frames.append(np.reshape(np.asarray(features), (-1,3))) # 把一维转换成[15,3]的格式
else:
# 如果一条视频帧数过多,则选取前frame_nums帧,并连接起来,转成torch格式
if len(frames) >= frame_nums:
# frames = random.sample(frames, frame_nums) # 随机取帧
frames = frames[0:frame_nums] # 按顺序取帧
frames = torch.from_numpy(np.stack(frames, 0)) # 把每一帧在0维连接起来,转成torch格式
# 若是视频帧数不够多,则利用线性插值,把数据补充到frame_nums帧
else:
frames = np.stack(frames, 0) # 把每一帧连接起来,如:n帧 n*[1,15,3]=[n,15,3] 作为一条数据
xloc = np.arange(frames.shape[0]) # np.arange:生成n个自然数,即等于frame_nums帧数
new_xloc = np.linspace(0, frames.shape[0], frame_nums) # 生成start和end之间frame_nums个等差间隔的元素,如:1、2、··n
frames = np.reshape(frames, (frames.shape[0], -1)).transpose() # transpose:矩阵转置
# print(frames.shape,xloc.shape,new_xloc.shape)
new_datas = []
for data in frames:
new_datas.append(np.interp(new_xloc, xloc, data)) # interp:进行线性插值, 获得frame_nums帧数据
frames = torch.from_numpy(np.stack(new_datas, 0)).t() # 把n帧数据再次连接起来,转换torch格式
frames = frames.view(frame_nums, -1, 3) # 强制reshape矩阵形状
datas.append(frames) #数据
if split_label==1:
datas_label.append(label) #标签
else:
datas_label.append(prev_categ) #标签
# m+=1
# # 2.按人物编号分
# if aid in list1:
# test.append(frames)
# test_label.append(prev_categ)
# elif aid in list2:
# valid.append(frames)
# valid_label.append(prev_categ)
# else:
# train.append(frames)
# train_label.append(prev_categ)
frames = [np.reshape(np.asarray(features), (-1,3))] # frames重置,等于每条视频的第一帧的关键点
prev_actor = aid # 人物编号重置
prev_video = vid # 视频编号重置
prev_categ = cid # 标签重置
# 3.随机划分
lens=len(datas)
num=random.sample(range(lens),lens) #获取随机数
for i in range(lens):
index=num[i]
if i <=int(lens*0.7):
train.append(datas[index])
train_label.append(datas_label[index])
elif i <=int(lens*0.9):
valid.append(datas[index])
valid_label.append(datas_label[index])
else:
test.append(datas[index])
test_label.append(datas_label[index])
train_label = torch.from_numpy(np.asarray(train_label))
valid_label = torch.from_numpy(np.asarray(valid_label))
test_label = torch.from_numpy(np.asarray(test_label))
print(len(train_label),len(valid_label),len(test_label))
print(test_label.shape)
torch.save((torch.stack(train, 0), train_label), save+'train.pkl')
torch.save((torch.stack(valid, 0), valid_label), save+'valid.pkl')
torch.save((torch.stack(test, 0), test_label), save+'test.pkl')
# 数据处理:
# 1.一段2min的视频,每1s剪裁下来,编号1-n,得到n段视频(n条数据);加上人物编号1-m;每段加动作类型标签:xx
# 2.每一帧得到关键坐标点,以某个稳定的中心点作为坐标原点,其他的点都减去这个原点
# 原点坐标改为(0,0),得到整体相对坐标;
# 3. 把坐标变为一维存储,后面要用再从代码reshape成(n,-1,2)
main.py
import os
import numpy as np
import torch
import torch.optim as optim
import torch.utils.data as data
import time
from model import *
from metric import accuracy
from config import get_args
args = get_args()
# 判断是否有gpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# torch的数据加载方法
train_tensor, train_label = torch.load(args.train_path)
valid_tensor, valid_label = torch.load(args.valid_path)
test_tensor , test_label = torch.load(args.test_path)
# 数据加载器,一次性加载所有数据,每次取出batch个数据
train_loader = data.DataLoader(data.TensorDataset(train_tensor.to(device)),
batch_size = args.batch_size, shuffle=False)
valid_loader = data.DataLoader(data.TensorDataset(valid_tensor.to(device)),
batch_size = args.batch_size, shuffle=False)
test_loader = data.DataLoader(data.TensorDataset(test_tensor.to(device)),
batch_size = args.batch_size, shuffle=False)
train_label = train_label.to(device)
valid_label = valid_label.to(device)
test_label = test_label.to(device)
# # 把视频名字和标签分离
# def get_label_vid(data):
# vid=[]
# label=[]
# for i in data:
# label.append(int(i[2]))
# name=str(int(i[0]))+'_'+str(int(i[1]))+'.mp4'
# vid.append(name)
# labels=torch.from_numpy(np.array(label))
# return labels,np.array(vid)
# train_label, train_vid = get_label_vid(train_label)
# valid_label, vaild_vid = get_label_vid(valid_label)
# test_label, test_vid = get_label_vid(test_label)
# print(test_label.shape,test_vid.shape)
# 权重矩阵
A = [[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],
[1,0,1,0,0,0,0,0,0,0,0,0,0,0,0],
[0,1,0,1,0,0,1,0,0,1,0,0,0,0,0],
[0,0,1,0,1,0,0,0,0,0,0,0,0,0,0],
[0,0,0,1,0,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,1,0,0,0,0,0,0,0],
[0,0,0,0,0,0,1,0,1,0,0,0,0,0,0],
[0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,0,0,0,1,0,1,0,0],
[0,0,0,0,0,0,0,0,0,1,0,1,0,0,0],
[0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,1,0,0,1,0,0],
[0,0,0,0,0,0,0,0,0,0,0,1,0,1,0],
[0,0,0,0,0,0,0,0,0,0,0,0,1,0,0]]
A = torch.from_numpy(np.asarray(A)).to(device)
# 定义GCN模型
model = GGCN(A, train_tensor.size(3), args.num_classes,
[train_tensor.size(3), train_tensor.size(3)*3], [train_tensor.size(3)*3, 16, 32, 64],
args.feat_dims, args.frame_nums, args.dropout_rate)
# print([train_tensor.size(3), train_tensor.size(3)*3], [train_tensor.size(3)*3, 16, 32, 64])
if device == 'cuda':
model.cuda()
# 查看模型参数
num_params = 0
for p in model.parameters():
num_params += p.numel()
# print(model)
# 定义Loss,优化器,学习率衰减
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = args.learning_rate,
betas=[args.beta1, args.beta2], weight_decay = args.weight_decay)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma = 0.1)
best_epoch = 0
best_acc = 0
def train():
global best_epoch, best_acc
# 恢复模型,训练
if args.start_epoch:
model.load_state_dict(torch.load(os.path.join(args.model_path, 'model-%d.pkl'%(args.start_epoch))))
# Training
for epoch in range(args.start_epoch, args.num_epochs):
train_loss = 0
train_acc = 0
scheduler.step() # 用了scheduler.step(),按epoch更新lr
model.train() # model.train():启用 BatchNormalization、Dropout model.eval():不启用
for i, x in enumerate(train_loader):
logit = model(x[0].float()) # 模型预测结果
target = train_label[i] # 真实标签
loss = criterion(logit, target.view(1).long()) #计算loss
model.zero_grad() # 每个batch清除一次梯度
loss.backward() # 反向传播loss,计算梯度
optimizer.step() # 优化器根据梯度更新网络参数
train_loss += loss.item() # 按epoch统计loss
train_acc += accuracy(logit, target.view(1).long()) # 按epoch统计acc
print('[epoch',epoch+1,'] Train loss:',train_loss/(i+1), 'Train Acc:',train_acc/(i+1))
# 保存模型
if os.path.exists(args.model_path) == False:
os.makedirs(args.model_path)
if (epoch+1) % 20 ==0:
torch.save(model, os.path.join(args.model_path, 'model-%d.pkl'%(epoch+1)))
# 训练中进行验证
if (epoch+1) % args.val_step == 0:
model.eval()
val_loss = 0
val_acc = 0
with torch.no_grad():
for i, x in enumerate(valid_loader):
logit = model(x[0].float())
target = valid_label[i]
val_loss += criterion(logit, target.view(1).long()).item()
val_acc += accuracy(logit, target.view(1).long())
if best_acc >= (val_acc/(i+1)):
best_epoch = epoch+1
torch.save(model.state_dict(), os.path.join(args.model_path, 'best_model-%d.pkl'%(best_epoch)))
best_acc = (val_acc/(i+1))
print('Val loss:',val_loss/(i+1), 'Val Acc:',val_acc/(i+1))
def test():
global best_epoch
model.load_state_dict(torch.load(os.path.join(args.model_path,
'model-%d.pkl'%(best_epoch))))
print("load model from 'model-%d.pkl'"%(best_epoch))
model.eval()
test_loss = 0
test_acc = 0
with torch.no_grad():
for i, x in enumerate(test_loader):
star=time.time()
logit = model(x[0].float())
target = test_label[i]
test_loss += criterion(logit, target.view(1).long()).item()
test_acc += accuracy(logit, target.view(1).long())
end=int((time.time()-star)*1000)
# print('pred:',torch.max(logit, 1)[1].float()
# .cpu().numpy(), 'true:',target.cpu().numpy(),'time:',end, 'index:',i)
print('Test loss:',test_loss/(i+1), 'Test Acc:',test_acc/(i+1))
train()
test.py
import os
import numpy as np
import torch
import torch.optim as optim
import torch.utils.data as data
import time
from model import *
from metric import accuracy
from test_config import get_args
args = get_args()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_tensor, train_label = torch.load(args.train_path)
valid_tensor, valid_label = torch.load(args.valid_path)
test_tensor , test_label = torch.load(args.test_path)
train_loader = data.DataLoader(data.TensorDataset(train_tensor.to(device)),
batch_size = args.batch_size, shuffle=False)
valid_loader = data.DataLoader(data.TensorDataset(valid_tensor.to(device)),
batch_size = args.batch_size, shuffle=False)
test_loader = data.DataLoader(data.TensorDataset(test_tensor.to(device)),
batch_size = args.batch_size, shuffle=False)
train_label = train_label.to(device)
valid_label = valid_label.to(device)
test_label = test_label.to(device)
# 把视频名字和标签分离
def get_label_vid(data):
vid=[]
label=[]
for i in data:
label.append(int(i[2]))
name=str(int(i[0]))+'_'+str(int(i[1]))+'.mp4'
vid.append(name)
labels=torch.from_numpy(np.array(label))
return labels,np.array(vid)
# train_label, train_vid = get_label_vid(train_label)
# valid_label, vaild_vid = get_label_vid(valid_label)
# test_label, test_vid = get_label_vid(test_label)
# print(test_label.shape,test_vid.shape)
A = [[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],
[1,0,1,0,0,0,0,0,0,0,0,0,0,0,0],
[0,1,0,1,0,0,1,0,0,1,0,0,0,0,0],
[0,0,1,0,1,0,0,0,0,0,0,0,0,0,0],
[0,0,0,1,0,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,1,0,0,0,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,1,0,0,0,0,0,0,0],
[0,0,0,0,0,0,1,0,1,0,0,0,0,0,0],
[0,0,0,0,0,0,0,1,0,0,0,0,0,0,0],
[0,0,1,0,0,0,0,0,0,0,1,0,1,0,0],
[0,0,0,0,0,0,0,0,0,1,0,1,0,0,0],
[0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],
[0,0,0,0,0,0,0,0,0,1,0,0,1,0,0],
[0,0,0,0,0,0,0,0,0,0,0,1,0,1,0],
[0,0,0,0,0,0,0,0,0,0,0,0,1,0,0]]
A = torch.from_numpy(np.asarray(A)).to(device)
model = GGCN(A, train_tensor.size(3), args.num_classes,
[train_tensor.size(3), train_tensor.size(3)*3], [train_tensor.size(3)*3, 16, 32, 64],
args.feat_dims, args.frame_nums, args.dropout_rate)
if device == 'cuda':
model.cuda()
num_params = 0
for p in model.parameters():
num_params += p.numel()
# print(model)
# print('The number of parameters: {}'.format(num_params))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = args.learning_rate,
betas=[args.beta1, args.beta2], weight_decay = args.weight_decay)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma = 0.1)
best_epoch = 0
best_acc = 0
def test():
global best_epoch
#
# model.load_state_dict(torch.load(os.path.join(args.model_path,
# 'model-%d.pkl'%(best_epoch))))
# print("load model from 'model-%d.pkl'"%(best_epoch))
#
# model.eval()
model = torch.load(os.path.join(args.model_path, 'model-%d.pkl'%(best_epoch)))
test_loss = 0
test_acc = 0
with torch.no_grad():
for i, x in enumerate(test_loader):
star=time.time()
logit = model(x[0].float())
target = test_label[i]
test_loss += criterion(logit, target.view(1).long()).item()
test_acc += accuracy(logit, target.view(1).long())
end=int((time.time()-star)*1000)
# print('pred:',torch.max(logit, 1)[1].float().cpu().numpy(), 'true:',target.cpu().numpy(),'time:',end, 'index:',i)
print('Test loss:',test_loss/(i+1), 'Test Acc:',test_acc/(i+1))
if __name__ == '__main__':
# if args.mode == 'train':
# train()
# elif args.mode == 'test':
# best_epoch = args.test_epoch
# test()
# train()
best_epoch = 100
test()
test--模拟现网模型集成_v3.py
(预测n段视频,加权求值_按帧推进)
import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
import torch
def test(model_path, test_data):
model=torch.load(model_path)
print("load model...")
model.eval()
with torch.no_grad():
output = model(test_data.float())
logit=output.data.cpu().numpy()
pred = torch.max(output.data, 1)[1].numpy()
print("预测结果为:",logit,pred)
return logit,pred
def crop(img, point_face):
top=point_face[24][1]
mins=top-200
if mins<=0:
mins=0
h=img.shape[0]
w=img.shape[1]
img2=img[mins:h,0:w]
return img2
def face_detect(pic):
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('test/class/shape_predictor_68_face_landmarks.dat')
img = np.copy(pic)
# img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# 人脸数rects
rects = detector(img, 1)
if len(rects)!=1:
# print(len(rects),"face detection fail!")
return -1,-1
landmarks = np.matrix([[p.x, p.y] for p in predictor(img,rects[0]).parts()])
point_face=[]
for idx, point in enumerate(landmarks):
x=point[0, 0]
y=point[0, 1]
point_face.append((x,y))
# # 画图和点
# cv2.circle(img, (x,y), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
# cv2.putText(img, str(idx), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 1,cv2.LINE_AA)
return img, point_face
def hand_detect(pic, img_face):
protoFile = "test/class/hand/pose_deploy.prototxt"
weightsFile = "test/class/hand/pose_iter_102000.caffemodel"
nPoints = 22
POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12],
[0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
img_hand=np.copy(img_face) #用来画手的坐标点
frame = np.copy(pic) #用来做手部检测
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
aspect_ratio = frameWidth / frameHeight
threshold = 0.1
t = time.time()
# input image dimensions for the network
inHeight = 368
inWidth = int(((aspect_ratio * inHeight) * 8) // 8)
inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
net.setInput(inpBlob)
output = net.forward()
# print("time taken by network : {:.3f}".format(time.time() - t))
# Empty list to store the detected keypoints
point_hand = []
for i in range(nPoints):
# 对应身体部位的置信度图
probMap = output[0, i, :, :]
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
# cv2.minMaxLoc:返回矩阵的最小值,最大值,并得到最大值,最小值的索引
minVal, prob, minLoc, point = cv2.minMaxLoc(probMap) #找最大值及其下标(即置信度最大的点)
# 获取坐标点
x,y=(int(point[0]), int(point[1]))
point_hand.append((x,y,prob))
# # 画图和点
# cv2.circle(img_hand, (x,y), 2, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
# cv2.putText(img_hand, str(i), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
return img_hand,point_hand
def get_feature(point_face, point_hand,img):
circle_center=point_face[30]
# 1.只取15个关键点
face_key_point=[36,45,2,14,39,42,30,57]
hand_key_point=[8,12,16,6,10,14,9]
line=''
for i in face_key_point:
x=point_face[i][0]-circle_center[0]
y=point_face[i][1]-circle_center[1]
confidence=1
line+=str(x)+' '+str(y)+' '+str(confidence)+' '
# 画图和点
cv2.circle(img, (point_face[i][0],point_face[i][1]), 2, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img, str(i), (point_face[i][0],point_face[i][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
for i in hand_key_point:
x=point_hand[i][0]-circle_center[0]
y=point_hand[i][1]-circle_center[1]
confidence=point_hand[i][2]
line+=str(x)+' '+str(y)+' '+str(confidence)+' '
# 画图和点
cv2.circle(img, (point_hand[i][0],point_hand[i][1]), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img, str(i), (point_hand[i][0],point_hand[i][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
return img,line
def count_pass(point):
n=0
points=point[8:]
for data in points:
confidence=float(data[2])
# print(confidence)
if confidence>=0.1:
n+=1
# # 类别3的数据不做判断,直接通过
# if label == 3:
# n=7
# 判断人脸,若人脸检测变形,则不通过
w=int(point[3][0])-int(point[2][0]) # 14-2
if w<=50:
n=-1
return n
model_path='model/test9/clear/2+3+4+5+6_v1/model-100.pkl'
save_path = "test/vedio/save/"
file_root = 'test/vedio/1/11_10.mp4'
cap = cv2.VideoCapture(file_root)
hasFrame, frame = cap.read()
w = frame.shape[1]
h = frame.shape[0]
interval_mask=0
nums=64
frames=[0]*nums
feature=[0]*nums
pass_mask=[0]*nums
pred_list=[]
pred_list2=[]
#开始进入循环取帧
n=0
while (hasFrame):
hasFrame, frame = cap.read() #读取视频帧
old_frame=np.copy(frame)
if hasFrame==True:
times=int(n/nums)
index=(n%nums)
n += 1
print("第{}组,第{}帧".format(times+1,index+1))
# 预测一条数据后,需要间隔5组再预测
if interval_mask==1:
interval_time=times-old_time #间隔组数
if interval_time<=1:
print("处于间隔期,间隔:{}/5组,第{}帧.".format(interval_time, index+1))
continue
else:
interval_mask=0
h,w,_=frame.shape
frame=cv2.resize(frame,(int(w*0.5),int(h*0.5)))
img_face, point_face=face_detect(frame) #人脸检测
if point_face ==-1:
n=n-1
continue
else:
img=crop(frame, point_face)
img=cv2.resize(img,(500,700))
img_face2, point_face2=face_detect(img)
if point_face2 ==-1:
n=n-1
continue
else:
img_hand, point_hand=hand_detect(img, img_face2) #手部检测
draw_img,line=get_feature(point_face2, point_hand,img)
#坐标点数据
line=line.strip().split(" ")
point = list(map(float, line))
point = np.reshape(np.asarray(point), (-1,3))
#判断是否通过了阈值
pass_nums=count_pass(point)
if pass_nums>=0:
flag=1
print("本帧数据通过:",pass_nums,point.shape,np.sum(pass_mask)+1)
elif pass_nums==-1:
n=n-1
print("本帧数据没通过,人脸变形了!")
continue
else:
flag=0
print("本帧数据没通过:",pass_nums,point.shape)
# 写入标签、坐标点数据、视频帧
pass_mask[index]=flag
feature[index]=point
frames[index]=old_frame
# 判断一组数据是否通过
if index==nums-1: # or len(pass_nums)==nums-1
pass_nums=np.sum(pass_mask) #计算通过的帧数
print("通过的帧数为:{}".format(pass_nums))
if pass_nums>=32:
print("[数据通过!]")
# 预测数据
test_data = torch.from_numpy(np.stack(feature, 0))
test_data=test_data.unsqueeze(0)
logit, pred=test(model_path, test_data)
pred_list.append(logit)
pred_list2.append(pred[0])
print(test_data.size())
# 把测试的视频段,记录下来
vid=file_root.split("/")[-1].split(".")[0]
videoWriter =cv2.VideoWriter(
save_path+'{}-({})-{}.mp4'.format(vid,len(pred_list2),pred[0]),
cv2.VideoWriter_fourcc(*"mp4v"),30,(w, h))
for frame in frames:
videoWriter.write(frame)
# 回退n条数据
back=5
n=n-back
pass_mask[0:nums-back]=pass_mask[back:nums]
feature[0:nums-back]=feature[back:nums]
frames[0:nums-back]=frames[back:nums]
# interval_mask=1 # 间隔标志,开始间隔
# old_time=times
# 初始化最后n条数据
for i in range(nums-back,nums):
pass_mask[i]=0
feature[i]=0
frames[i]=0
# # 清空数组
# frames=[0]*nums
# feature=[0]*nums
# pass_mask=[0]*nums
# interval_mask=1 # 间隔标志,开始间隔
# old_time=times
else:
# 回退一条数据
back=1
pass_mask[0:nums-back]=pass_mask[back:nums]
feature[0:nums-back]=feature[back:nums]
frames[0:nums-back]=frames[back:nums]
# 初始化最后1条数据
for i in range(nums-back,nums):
pass_mask[i]=0
feature[i]=0
frames[i]=0
n=n-back
print("[数据没通过,继续往下走!]")
if len(pred_list)==5:
pred_list=np.array(pred_list)
pred_list=np.sum(pred_list,axis=0)
result=np.max(pred_list,1)
result2=np.argmax(np.bincount(pred_list2))
print("预测结束,result:{}-{}, result2:{}-{}".format(pred_list, result, pred_list2, result2))
break
# # 清空数组
# pred_list=[]
# pred_list2=[]
# frames=[0]*nums
# feature=[0]*nums
# pass_mask=[0]*nums
# interval_mask=1 # 间隔标志,开始间隔
# old_time=times
cap.release()
config.py
import argparse
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--mode', type=str, default='train')
parser.add_argument('--test_epoch',type=int, default=80)
parser.add_argument('--start_epoch',type=int, default=0)
parser.add_argument('--num_epochs',type=int, default=100)
parser.add_argument('--val_step',type=int, default=20)
model_forder = "2+3+4+5+6_v3"
data_forder = "2+3+4+5+6_v3"
parser.add_argument('--train_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/train.pkl')
parser.add_argument('--valid_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/valid.pkl')
parser.add_argument('--test_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/test.pkl')
parser.add_argument('--model_path', type=str, default='model/test9/clear/'+str(model_forder)+'/')
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--learning_rate',type=int, default=0.01)
parser.add_argument('--beta1',type=int, default=0.5)
parser.add_argument('--beta2',type=int, default=0.99)
parser.add_argument('--dropout_rate',type=int, default=0.5)
parser.add_argument('--weight_decay',type=int, default=0.0)
parser.add_argument('--frame_nums',type=int, default=64) #32-192, 64-960, 128-2496
parser.add_argument('--num_classes',type=int, default=4)
parser.add_argument('--feat_dims',type=int, default=13)
args = parser.parse_args()
return args
test_config.py
import argparse
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--mode', type=str, default='train')
parser.add_argument('--test_epoch',type=int, default=80)
parser.add_argument('--start_epoch',type=int, default=0)
parser.add_argument('--num_epochs',type=int, default=100)
parser.add_argument('--val_step',type=int, default=20)
model_forder = "2+3+4+5+6_v3"
data_forder = "2+3+4+5+6_v3"
parser.add_argument('--train_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/train.pkl')
parser.add_argument('--valid_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/valid.pkl')
parser.add_argument('--test_path', type=str, default='dataset/test9/clear/'+str(data_forder)+'/test.pkl')
parser.add_argument('--model_path', type=str, default='model/test9/clear/'+str(model_forder)+'/')
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--learning_rate',type=int, default=0.01)
parser.add_argument('--beta1',type=int, default=0.5)
parser.add_argument('--beta2',type=int, default=0.99)
parser.add_argument('--dropout_rate',type=int, default=0.5)
parser.add_argument('--weight_decay',type=int, default=0.0)
parser.add_argument('--frame_nums',type=int, default=64) #32-192, 64-960, 128-2496
parser.add_argument('--num_classes',type=int, default=4)
parser.add_argument('--feat_dims',type=int, default=13)
args = parser.parse_args()
return args
model.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch
import numpy as np
from layer import GraphConvolution, StandConvolution, StandRecurrent
class GGCN(nn.Module):
def __init__(self, adj, num_v, num_classes, gc_dims, sc_dims, feat_dims, frame_nums, dropout=0.5):
super(GGCN, self).__init__()
terminal_cnt = 5
actor_cnt = 1
self.frame_nums=frame_nums
# torch.eye:返回一个2维张量,对角线位置全1,其它位置全0。 shape:adj.size(0)*adj.size(0)
# detach():创建一个新的tensor,将其从当前的计算图中分离出来.新的tensor与之前的共享data,但是不具有梯度.(可解决爆显存问题)
adj = adj + torch.eye(adj.size(0)).to(adj).detach() # [15,15]
ident = torch.eye(adj.size(0)).to(adj) # [15,15]
zeros = torch.zeros(adj.size(0), adj.size(1)).to(adj) # [15,15]
a=torch.cat([adj, ident, zeros], 1) # [15,15]*3=[15,45]
b=torch.cat([ident, adj, ident], 1) # [15,15]*3=[15,45]
c=torch.cat([zeros, ident, adj], 1) # [15,15]*3=[15,45]
self.adj = torch.cat([a,b,c], 0).float() # [15,45]*3=[45,45]
# nn.Parameter:把xx转成模型中根据训练可以改动的参数
# randn:返回一个正态分布的随机数的张量,均值为“0”,方差为“1”
self.terminal = nn.Parameter(torch.randn(terminal_cnt, actor_cnt, feat_dims)) #[5,1,13]
self.gcl = GraphConvolution(gc_dims[0]+feat_dims, gc_dims[1], num_v, dropout=dropout) #3+13、9、3
self.conv= StandConvolution(sc_dims, num_classes, dropout=dropout) # 使用标准卷积,自定义模型
# self.conv= StandRecurrent(sc_dims, num_classes, dropout=dropout) # 使用LSTM模型
nn.init.xavier_normal_(self.terminal) # Xavier正态分布初始化
def forward(self, x):
# F.interpolate(input,x,mode='nearest'):
# 采样函数,x为采样倍数(1/2则为下采样,2为上采样); model为采样方法,默认nearest
head_la = F.interpolate(torch.stack([self.terminal[0],self.terminal[1]],2), 6) # [1,13,6]
head_ra = F.interpolate(torch.stack([self.terminal[0],self.terminal[2]],2), 6) # [1,13,6]
lw_ra = F.interpolate(torch.stack([self.terminal[3],self.terminal[4]],2), 6) # [1,13,6]
# print(lw_ra.shape)
# 在2维度合并以下5个矩阵, node_features=[1,13,3]*5--[1,13,15] x=[1, 32, 15, 3]
node_features = torch.cat([
(head_la[:,:,:3] + head_ra[:,:,:3])/2, # [1,13,3] head_la、head_ra的0-3相加
torch.stack((lw_ra[:,:,2], lw_ra[:,:,1], lw_ra[:,:,0]), 2), # [1,13,3] lw_ra的0-2合并
lw_ra[:,:,3:], head_la[:,:,3:], head_ra[:,:,3:]], 2).to(x) # [1,13,3] 三者都是取3-5
# permute:将tensor的维度换位; unsqueeze:指定位置1(或者n),加上一个维数为1的维度; repeat:沿着纵轴(1)方向,重复增加n倍列数
# 数据变化:[1,13,15]-[1,15,13]-[1,1,15,13]-[1,32,15,13]
node_features=node_features.permute(0,2,1).unsqueeze(1).repeat(1,self.frame_nums,1,1)
# [1,32,15,3]+[1,32,15,13]=[1,32,15,16]
x = torch.cat((x, node_features), 3)
# [1,30,15,16]*3=[1,30,45,16]
concat_seq = torch.cat([x[:,:-2], x[:,1:-1], x[:,2:]], 2)
# print(self.adj.shape, concat_seq.shape)
multi_conv = self.gcl(self.adj, concat_seq)
logit = self.conv(multi_conv)
return logit
layer.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 图卷积结构
class GraphConvolution(nn.Module):
def __init__(self, input_dim, output_dim, num_vetex, act=F.relu, dropout=0.5, bias=True):
super(GraphConvolution, self).__init__()
self.alpha = 1.
self.act = act
self.dropout = nn.Dropout(dropout)
self.weight = nn.Parameter(torch.randn(input_dim, output_dim)).to(device) # [16,9]
if bias:
self.bias = nn.Parameter(torch.randn(output_dim)).to(device)
else:
self.bias = None
for w in [self.weight]:
nn.init.xavier_normal_(w)
# gcn的网络结构---计算公式
def normalize(self, m):
rowsum = torch.sum(m, 0)
r_inv = torch.pow(rowsum, -0.5)
r_mat_inv = torch.diag(r_inv).float()
m_norm = torch.mm(r_mat_inv, m)
m_norm = torch.mm(m_norm, r_mat_inv)
return m_norm
def forward(self, adj, x):
x = self.dropout(x) # [1,30,45,16]
adj_norm = self.normalize(adj) # [45,45]
# 如果mat1 是一个n×m张量,mat2 是一个 m×p 张量,将会输出一个 n×p 张量
sqr_norm = self.normalize(torch.mm(adj,adj)) # [45,45]
m_norm = self.alpha*adj_norm + (1.-self.alpha)*sqr_norm # [45,45]
x_tmp = np.einsum('abcd,de->abce', x, self.weight) # # [1,30,45,16]+[16,9]=[1, 30, 45, 9]
x_out = np.einsum('ij,abid->abjd', m_norm, x_tmp) # [1, 30, 45, 9]
# x_tmp = torch.einsum('abcd,de->abce', x, self.weight)
# x_out = torch.einsum('ij,abid->abjd', m_norm, x_tmp)
if self.bias is not None:
x_out += self.bias
x_out=torch.from_numpy(x_out)
# x_out=torch.from_numpy(x_out).cuda()
x_out = self.act(x_out)
return x_out
# 使用标准卷积,自定义模型
class StandConvolution(nn.Module):
def __init__(self, dims, num_classes, dropout):
super(StandConvolution, self).__init__()
# h/w = (h/w - kennel_size + 2padding) / stride + 1
# x = ([10,16,30,32]),其中h=30,w=32,对于卷积核长分别是 h:3,w:2 ;对于步长分别是h:2,w:1;padding默认0;
# h = (30 - 3 + 20)/ 2 +1 = 27/2 +1 = 13+1 =14
# w =(32 - 2 + 2*0)/ 1 +1 = 30/1 +1 = 30+1 =31
# batch = 10, out_channel = 33
# 故: y= ([10, 33, 14, 31])
self.num_classes=num_classes
self.dropout = nn.Dropout(dropout)
self.conv = nn.Sequential(
nn.Conv2d(dims[0], dims[1], kernel_size=5, stride=2),
nn.InstanceNorm2d(dims[1]),
nn.ReLU(inplace=True),
# nn.AvgPool2d(5, stride=1),
nn.Conv2d(dims[1], dims[2], kernel_size=5, stride=2),
nn.InstanceNorm2d(dims[2]),
nn.ReLU(inplace=True),
#nn.AvgPool2d(3, stride=1),
nn.Conv2d(dims[2], dims[3], kernel_size=5, stride=2),
nn.InstanceNorm2d(dims[3]),
nn.ReLU(inplace=True),
#nn.AvgPool2d(3, stride=2)
).to(device)
# self.fc = nn.Linear(dims[3]*3, num_classes).to(device)
self.fc = nn.Linear(960, num_classes).to(device) # 32-192, 64-960, 128-2496
def forward(self, x):
x = self.dropout(x.permute(0,3,1,2)) # [1, 9, 30, 45] [1,9,62,45]
x_tmp = self.conv(x) # [1, 64, 1, 3] [1,64,5,3]
# view:按照行优先的顺序排成一个一维的数据,再重新组成需要的形状 [1, 64, 1, 3]---[1, 192]
x_tmp = x_tmp.view(x.size(0), -1) # [1, 64, 1, 3]---[1, 192]
# print(x_tmp.shape)
x_out = self.fc(x_tmp) # [1,4],返回分类标签
return x_out
# 使用LSTM模型
class StandRecurrent(nn.Module):
def __init__(self, dims, num_classes, dropout):
super(StandRecurrent, self).__init__()
self.lstm = nn.LSTM(dims[0]*45, dims[1], batch_first=True,dropout=0.5).to(device)
self.fc = nn.Linear(dims[1], num_classes).to(device)
def forward(self, x):
x_tmp,_ = self.lstm(x.contiguous().view(x.size(0), x.size(1), -1))
x_out = self.fc(x_tmp[:,-1])
return x_out
metric.py
import torch
from sklearn.metrics import accuracy_score
def accuracy(preds, target):
# torch.max(a,dim):返回torch中的最大值a、以及a对应的下标; dim=0行对比最大值,dim=1则是列对比最大值
preds = torch.max(preds, 1)[1].float()
acc = accuracy_score(preds.cpu().numpy(), target.cpu().numpy()) # 返回准度率
# print(preds.cpu().numpy(), target.cpu().numpy())
return acc