邻域采样
import numpy as np
def sampling(src_nodes, sample_num, neighbor_table):
'''
根据源节点采样指定数量的邻居节点,注意使用的是有放回的采样;
某个节点的邻居节点数量少于采样数量时,采样结果出现重复的节点
:param src_nodes {list, ndarray}: 源节点列表
:param sample_num {int}: 需要采样的节点数
:param neighbor_table {dict}: 节点到其邻居节点的映射表
:return: 采样结果构成的列表
'''
results = []
for sid in src_nodes:
res = np.random.choice(neighbor_table[sid], size=(sample_num, ))
results.append(res)
return np.asarray(results).flatten()
def multihop_sampling(src_nodes, sample_nums, neighbor_table):
'''
根据源节点进行多阶采样
:param src_nodes {list, np.ndarray}: 源节点id
:param sample_nums {list of int}: 每一阶需要采样的个数
:param neighbor_table {dict}: 节点到其邻居节点的映射
:return [list of ndarray]: 每一阶的采样结果
'''
sampling_result = [src_nodes]
for k, hopk_num in enumerate(sample_nums):
hopk_result = sampling(sampling_result[k], hopk_num, neighbor_table)
sampling_result.append(hopk_result)
return sampling_result
GCN_ATT
import torch.nn as nn
import torch
import torch.nn.init as init
import scipy.sparse as sp
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from random import shuffle
class GraphConvolutionLayerWithAttention(nn.Module):
'''定义一个图卷积层'''
def __init__(self, nodes_num, input_dim, output_dim, use_bias = True):
'''
节点输入特征维度
:param input_dim: int
输出特征维度
:param output_dim: int
是否使用偏置项
:param use_bias: bool, optional
'''
super(GraphConvolutionLayerWithAttention, self).__init__()
self.input = input_dim
self.labels = output_dim
self.use_bias = use_bias
self.weight = nn.Parameter(torch.Tensor(input_dim, output_dim))
self.attention = nn.Parameter(torch.Tensor(nodes_num, nodes_num))
if self.use_bias:
self.bias = nn.Parameter(torch.Tensor(output_dim))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
init.kaiming_uniform_(self.weight)
init.kaiming_uniform_(self.attention)
if self.use_bias:
init.zeros_(self.bias)
def forward(self, adjacency, input_feature):
'''
邻接矩阵
邻接矩阵是稀疏矩阵,因此在计算时使用稀疏矩阵乘法。
:param adjacency: torch.sparse.FloatTensor
输入特征
:param input_feature: torch.Tensor
:return:
'''
support = torch.mm(input_feature, self.weight)
output = torch.mm(self.attention * adjacency, support)
if self.use_bias:
output += self.bias
return output
class GCN_ATT(nn.Module):
def __init__(self, BatchNodesNum, input_dim):
super(GCN_ATT, self).__init__()
self.BatchNodesNum = BatchNodesNum
self.gcn1 = GraphConvolutionLayerWithAttention(BatchNodesNum, input_dim, 16)
self.gcn2 = GraphConvolutionLayerWithAttention(BatchNodesNum, 16, 2)
def forward(self, adjacency, feature):
h = F.relu(self.gcn1(adjacency, feature))
logits = self.gcn2(adjacency, h)
return logits[0]
def LoadByIndices(url, Indices):
BatchSamples = []
for idx in Indices:
file = url + str(idx) + '.npy'
BatchSamples.append(np.load(file))
return BatchSamples
def normalization(adjacency):
'''计算 L = A+I'''
adjacency += sp.eye(adjacency.shape[0])
return adjacency.tocoo()
learning_rate = 0.1
weight_decay = 5e-4
epochs = 200
device = "cuda" if torch.cuda.is_available() else "cpu"
model = GCN_ATT().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
AllSampleNum = 20000
train_per = 0.3
train_sample_num = int(AllSampleNum*train_per)
indices = np.random.permutation(AllSampleNum)
train_indices = indices[:train_sample_num]
test_indeces = indices[train_sample_num:]
samples_url = 'D:\\Samples\\Sample'
adjacencys_url = 'D:\\Adjacencys\\Adjacencys'
y_url = 'D:\\y\\y'
def train():
loss_history = []
val_acc_history = []
train_y = LoadByIndices(y_url, train_indices)
test_y = LoadByIndices(y_url, test_indeces)
for epoch in range(epochs):
shuffle(train_indices)
for idx in train_indices:
tensor_x = LoadByIndices(samples_url, idx)[0]
tensor_adjacency = LoadByIndices(adjacencys_url, idx)[0]
normal_adjacency = normalization(tensor_adjacency)
logits = model(normal_adjacency, tensor_x)
loss = criterion(logits, train_y[idx])
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_acc = test(train_indices, train_y)
val_acc = test(test_indeces, test_y)
loss_history.append(loss.item())
val_acc_history.append(val_acc.item())
print('Epoch {:03d}: Loss{:.4f}, TrainAcc{:.4}, ValAcc{:.4f}'.format(epoch, loss.item(), train_acc.item(), val_acc.item()))
return loss_history, val_acc_history
def test(indices_now, y):
model.eval()
acc = []
with torch.no_grad():
for idx in indices_now:
tensor_x = LoadByIndices(samples_url, idx)[0]
tensor_adjacency = LoadByIndices(adjacencys_url, idx)[0]
normal_adjacency = normalization(tensor_adjacency)
logits = model(normal_adjacency, tensor_x)
predict_y = logits.max(1)[1]
acc.append(torch.eq(predict_y, y[idx]).float())
accuracy = np.mean(acc)
return accuracy