一, 大致框架
data-get(N, C, T, V, M)(已经包含时间和空间信息)(样本数,channel,时间帧数,num_node,人数)。
joint_data;- graph_spatial(A)- (agcn.py)model(A,B,C;AGCN) -(main.py)train - test- softmax score a
bone_data;- graph_spatial(A)- (agcn.py)model(A,B,C;AGCN) -(main.py)train - test- softmax score b。
(ensemble.py)a+b -> fused score , action label。
① 针对graph文件夹,就是为了return A,即得到邻接矩阵。论文中使用的N*N表示Ak,即代码中的V(num_node)
class Graph:
def __init__(self, labeling_mode='spatial'):
self.A = self.get_adjacency_matrix(labeling_mode)
...
def get_adjacency_matrix(self, labeling_mode=None):
if labeling_mode is None:
return self.A
if labeling_mode == 'spatial':
A = tools.get_spatial_graph(num_node, self_link, inward, outward)
else:
raise ValueError()
return A
tools.py
def get_spatial_graph(num_node, self_link, inward, outward):
I = edge2mat(self_link, num_node)
In = normalize_digraph(edge2mat(inward, num_node))
Out = normalize_digraph(edge2mat(outward, num_node)) #inward, outward是列表,列表里是(a,b)这种坐标类型 的数据
A = np.stack((I, In, Out)) #3×V*V
return A
def edge2mat(link, num_node):
A = np.zeros((num_node, num_node)) #论文中使用的N*N表示Ak,即代码中的V
for i, j in link:
A[j, i] = 1
return A
def normalize_digraph(A): # 除以每列的和(归一化)
Dl = np.sum(A, 0) #对每一列相加
h, w = A.shape #即代码中V*V
Dn = np.zeros((w, w))
for i in range(w):
if Dl[i] > 0:
Dn[i, i] = Dl[i] ** (-1)
AD = np.dot(A, Dn) #h×w w×w -> h×w 即V*V
return AD
determines whether there are connections between two vertexes, It represents the physical structure of the human body.
ntu_rgb_d.py(kinetics.py,num_node = 18,inward本身从0开始)
num_node = 25
self_link = [(i, i) for i in range(num_node)] #相同关节点的连接
inward_ori_index = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5), (7, 6),
(8, 7), (9, 21), (10, 9), (11, 10), (12, 11), (13, 1),
(14, 13), (15, 14), (16, 15), (17, 1), (18, 17), (19, 18),
(20, 19), (22, 23), (23, 8), (24, 25), (25, 12)] #关节点间的可连接方式
inward = [(i - 1, j - 1) for (i, j) in inward_ori_index] #为了从0开始
outward = [(j, i) for (i, j) in inward] #反过来,为了构建无向图
neighbor = inward + outward
② 针对model文件夹,分别对应论文中 adaptive graph convolutional network > 4.1layer(unit_gcn,unit_tcn)4.2block(TCN_GCN_unit);4.3network(Model)
#For the temporal dimension,it is straightforward to perform the graph convolution similar to the classical convolution operation.
class unit_tcn(nn.Module): #temporal GCN(Kt × 1 convolution on the C ×T ×N feature maps) + bn
def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):
super(unit_tcn, self).__init__()
pad = int((kernel_size - 1) / 2) #输入输出维度不变
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0),stride=(stride, 1))
self.bn = nn.BatchNorm2d(out_channels)
...
def forward(self, x):
x = self.bn(self.conv(x))
return x
class unit_gcn(nn.Module): #spatial GCN + bn + relu
def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3, adaptive=True, attention=True):
super(unit_gcn, self).__init__()
inter_channels = out_channels // coff_embedding
num_jpts = A.shape[-1]
self.conv_d = nn.ModuleList() #容器 append
for i in range(self.num_subset):
self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))
if adaptive:
self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))
self.conv_a = nn.ModuleList()
self.conv_b = nn.ModuleList()
for i in range(self.num_subset):
self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))
self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))
else:
self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)
self.adaptive = adaptive
if attention:
self.conv_ta = nn.Conv1d(out_channels, 1, 9, padding=4)
nn.init.constant_(self.conv_ta.weight, 0)
nn.init.cons