多图融合
1. 论文题目Graph-Revised Convolutional Network
动机:
由于现实世界的图通常是不完整且嘈杂的,因此将它们视为真实信息(这是大多数 GCN 中的常见做法)不可避免地会导致次优解决方案。解决这个问题的现有努力要么涉及难以扩展的过度参数化模型,要么只是简单地重新加权观察到的边缘而不处理缺失边缘的问题。贡献:本文提出了一种名为图修正卷积网络 (GRCN) 的新颖框架,它避免了这两个极端。具体来说,引入了基于 GCN 的图修正模块,用于预测缺失边和修正边权重
节点相似度微调原始拓扑,计算相似度,选取ktop,直接将两个矩阵相加后转变为拉普拉斯阵
图生成方法
相似度计算+正规化,直接相加
代码
def forward(self, input, Adj):
Adj.requires_grad = False
node_embeddings = self._node_embeddings(input, Adj, self.sparse)
Adj_new = self.cal_similarity_graph(node_embeddings)
if not self.sparse:
Adj_new = self.sparse_graph(Adj_new, self.K, self.sparse)
Adj_new = self.normalize(Adj + Adj_new, self.norm_mode)
else:
Adj_new_indices, Adj_new_values = self._sparse_graph(Adj_new, self.K, self.sparse)
new_inds = torch.cat([Adj.indices(), Adj_new_indices], dim=1)
new_values = torch.cat([Adj.values(), Adj_new_values])
Adj_new = torch.sparse.FloatTensor(new_inds, new_values, Adj.size()).to(self.device)
Adj_new = self.normalize(Adj_new, self.norm_mode, self.sparse)
topk
def sparse_graph(self, raw_graph, K, sparse):
if self.reduce == "knn":
values, indices = raw_graph.topk(k=int(K), dim=-1)
# print(values, indices)
assert torch.sum(torch.isnan(values)) == 0
assert torch.max(indices) < raw_graph.shape[1]
if not sparse:
self.mask = torch.zeros(raw_graph.shape).to(self.device)
self.mask[torch.arange(raw_graph.shape[0]).view(-1,1), indices] = 1.
self.mask[indices, torch.arange(raw_graph.shape[1]).view(-1,1)] = 1.
else:
inds = torch.stack([torch.arange(raw_graph.shape[0]).view(-1,1).expand(-1,int(K)).contiguous().view(1,-1)[0].to(self.device),
indices.view(1,-1)[0]])
inds = torch.cat([inds, torch.stack([inds[1], inds[0]])], dim=1)
values = torch.cat([values.view(1,-1)[0], values.view(1,-1)[0]])
return inds, values
else:
exit("wrong sparsification method")
self.mask.requires_grad = False
sparse_graph = raw_graph * self.mask
return sparse_graph
normalize
def normalize(self, adj, mode="sym" ,sparse=False):
if not sparse:
if mode == "sym":
inv_sqrt_degree = 1. / (torch.sqrt(adj.sum(dim=1, keepdim=False)) + EOS)
return inv_sqrt_degree[:, None] * adj * inv_sqrt_degree[None, :]
elif mode == "row":
inv_degree = 1. / (adj.sum(dim=1, keepdim=False) + EOS)
return inv_degree[:, None] * adj
else:
exit("wrong norm mode")
else:
adj = adj.coalesce()
if mode == "sym":
inv_sqrt_degree = 1. / (torch.sqrt(torch.sparse.sum(adj, dim=1).values()) + EOS)
D_value = inv_sqrt_degree[adj.indices()[0]] * inv_sqrt_degree[adj.indices()[1]]
elif mode == "row":
inv_degree = 1. / (torch.sparse.sum(adj, dim=1).values() + EOS)
D_value = inv_degree[adj.indices()[0]]
else:
exit("wrong norm mode")
new_values = adj.values() * D_value
return torch.sparse.FloatTensor(adj.indices(), new_values, adj.size()).to(self.device)
2.Iterative Deep Graph Learning for Graph Neural
Networks: Better and Robust Node Embeddings
都是节点生成,没有多图融合,邻接矩阵稀疏度通过drop out 实现
链接: link
code:[link][https://github.com/hugochan/IDGL]
IDGL 的关键原理是基于更好的节点嵌入来学习更好的图结构。当学习到的图结构足够接近为下游预测任务优化的图时,我们的迭代方法会动态停止
图生成方法:
graph learner 多种相似度;AnchorGraphLearner 多种指标方法对比相似度 ,含有掩玛
代码
node embedding 首先0.5drop out–>learn graph
def forward(self, node_features, init_adj=None):
node_features = F.dropout(node_features, self.config.get('feat_adj_dropout', 0), training=self.training)
raw_adj, adj = self.learn_graph(self.graph_learner, node_features, self.graph_skip_conn, init_adj=init_adj)
adj = F.dropout(adj, self.config.get('feat_adj_dropout', 0), training=self.training)
node_vec = self.encoder(node_features, adj)
output = F.log_softmax(node_vec, dim=-1)
return output, adj
raw graph is generated by node features
learn_graph函数中先进行锚点或普通节点生成原始邻接矩阵
raw_adj生成方法:
if self.graph_learn:
graph_learn_fun = AnchorGraphLearner if self.scalable_run else GraphLearner
self.graph_learner = graph_learn_fun(nfeat, config['graph_learn_hidden_size'],
topk=config['graph_learn_topk'],
epsilon=config['graph_learn_epsilon'],
num_pers=config['graph_learn_num_pers'],
metric_type=config['graph_metric_type'],
device=self.device)
self.graph_learner2 = graph_learn_fun(hidden_size,
config.get('graph_learn_hidden_size2', config['graph_learn_hidden_size']),
topk=config.get('graph_learn_topk2', config['graph_learn_topk']),
epsilon=config.get('graph_learn_epsilon2', config['graph_learn_epsilon']),
num_pers=config['graph_learn_num_pers'],
metric_type=config['graph_metric_type'],
device=self.device)
graph learner
class GraphLearner(nn.Module):
def __init__(self, input_size, hidden_size, topk=None, epsilon=None, num_pers=16, metric_type='attention', device=None):
super(GraphLearner, self).__init__()
self.device = device
self.topk = topk
self.epsilon = epsilon
self.metric_type = metric_type
if metric_type == 'attention':
self.linear_sims = nn.ModuleList([nn.Linear(input_size, hidden_size, bias=False) for _ in range(num_pers)])
print('[ Multi-perspective {} GraphLearner: {} ]'.format(metric_type, num_pers))
elif metric_type == 'weighted_cosine':
self.weight_tensor = torch.Tensor(num_pers, input_size)
self.weight_tensor = nn.Parameter(nn.init.xavier_uniform_(self.weight_tensor))
print('[ Multi-perspective {} GraphLearner: {} ]'.format(metric_type, num_pers))
elif metric_type == 'gat_attention':
self.linear_sims1 = nn.ModuleList([nn.Linear(input_size, 1, bias=False) for _ in range(num_pers)])
self.linear_sims2 = nn.ModuleList([nn.Linear(input_size, 1, bias=False) for _ in range(num_pers)])
self.leakyrelu = nn.LeakyReLU(0.2)
print('[ GAT_Attention GraphLearner]')
elif metric_type == 'kernel':
self.precision_inv_dis = nn.Parameter(torch.Tensor(1, 1))
self.precision_inv_dis.data.uniform_(0, 1.0)
self.weight = nn.Parameter(nn.init.xavier_uniform_(torch.Tensor(input_size, hidden_size)))
elif metric_type == 'transformer':
self.linear_sim1 = nn.Linear(input_size, hidden_size, bias=False)
self.linear_sim2 = nn.Linear(input_size, hidden_size, bias=False)
elif metric_type == 'cosine':
pass
else:
raise ValueError('Unknown metric_type: {}'.format(metric_type))
print('[ Graph Learner metric type: {} ]'.format(metric_type))
def forward(self, context, ctx_mask=None):
"""
Parameters
:context, (batch_size, ctx_size, dim)
:ctx_mask, (batch_size, ctx_size)
Returns
:attention, (batch_size, ctx_size, ctx_size)
"""
if self.metric_type == 'attention':
attention = 0
for _ in range(len(self.linear_sims)):
context_fc = torch.relu(self.linear_sims[_](context))
attention += torch.matmul(context_fc, context_fc.transpose(-1, -2))
attention /= len(self.linear_sims)
markoff_value = -INF
elif self.metric_type == 'weighted_cosine':
expand_weight_tensor = self.weight_tensor.unsqueeze(1)
if len(context.shape) == 3:
expand_weight_tensor = expand_weight_tensor.unsqueeze(1)
context_fc = context.unsqueeze(0) * expand_weight_tensor
context_norm = F.normalize(context_fc, p=2, dim=-1)
attention = torch.matmul(context_norm, context_norm.transpose(-1, -2)).mean(0)
markoff_value = 0
elif self.metric_type == 'transformer':
Q = self.linear_sim1(context)
attention = torch.matmul(Q, Q.transpose(-1, -2)) / math.sqrt(Q.shape[-1])
markoff_value = -INF
elif self.metric_type == 'gat_attention':
attention = []
for _ in range(len(self.linear_sims1)):
a_input1 = self.linear_sims1[_](context)
a_input2 = self.linear_sims2[_](context)
attention.append(self.leakyrelu(a_input1 + a_input2.transpose(-1, -2)))
attention = torch.mean(torch.stack(attention, 0), 0)
markoff_value = -INF
elif self.metric_type == 'kernel':
dist_weight = torch.mm(self.weight, self.weight.transpose(-1, -2))
attention = self.compute_distance_mat(context, dist_weight)
attention = torch.exp(-0.5 * attention * (self.precision_inv_dis**2))
markoff_value = 0
elif self.metric_type == 'cosine':
context_norm = context.div(torch.norm(context, p=2, dim=-1, keepdim=True))
attention = torch.mm(context_norm, context_norm.transpose(-1, -2)).detach()
markoff_value = 0
if ctx_mask is not None:
attention = attention.masked_fill_(1 - ctx_mask.byte().unsqueeze(1), markoff_value)
attention = attention.masked_fill_(1 - ctx_mask.byte().unsqueeze(-1), markoff_value)
if self.epsilon is not None:
attention = self.build_epsilon_neighbourhood(attention, self.epsilon, markoff_value)
if self.topk is not None:
attention = self.build_knn_neighbourhood(attention, self.topk, markoff_value)
return attention
def build_knn_neighbourhood(self, attention, topk, markoff_value):
topk = min(topk, attention.size(-1))
knn_val, knn_ind = torch.topk(attention, topk, dim=-1)
weighted_adjacency_matrix = to_cuda((markoff_value * torch.ones_like(attention)).scatter_(-1, knn_ind, knn_val), self.device)
return weighted_adjacency_matrix
def build_epsilon_neighbourhood(self, attention, epsilon, markoff_value):
mask = (attention > epsilon).detach().float()
weighted_adjacency_matrix = attention * mask + markoff_value * (1 - mask)
return weighted_adjacency_matrix
def compute_distance_mat(self, X, weight=None):
if weight is not None:
trans_X = torch.mm(X, weight)
else:
trans_X = X
norm = torch.sum(trans_X * X, dim=-1)
dists = -2 * torch.matmul(trans_X, X.transpose(-1, -2)) + norm.unsqueeze(0) + norm.unsqueeze(1)
return dists
3.Heterogeneous Graph Structure Learning for Graph Neural Networks AAAI 2021
代码:[link][https://github.com/AndyJZhao/HGSL]
提出了一种新的框架 HGSL,该框架联合执行异构图结构学习和 GNN 参数学习以进行分类。考虑到异构图中不同关系的异质性,HGSL 分别生成每个关系子图。在每个生成的关系子图中,HGSL不仅通过生成特征相似度图来考虑特征相似度,而且通过生成特征传播图和语义图来考虑特征和语义中复杂的异构交互。然后,将这些图融合到学习到的异构图中,并与 GNN 一起针对分类目标进行优化
代码输入输出不是很清楚,可能要载入数据读一遍
4. AM-GCN: Adaptive Multi-channel Graph Convolutional Networks.
提出了一种用于半监督分类的自适应多通道图卷积网络 (AM-GCN)。中心思想是我们同时从节点特征、拓扑结构及其组合中提取特定的和共同的嵌入,并使用注意机制来学习嵌入的自适应重要性权重。
特征图(KNN)和拓扑图分别生成节点特征嵌入,注意力自适应融合
联合训练的训练结构可以借鉴一下
def train(model, epochs):
model.train()
optimizer.zero_grad()
output, att, emb1, com1, com2, emb2, emb= model(features, sadj, fadj)
loss_class = F.nll_loss(output[idx_train], labels[idx_train])
loss_dep = (loss_dependence(emb1, com1, config.n) + loss_dependence(emb2, com2, config.n))/2
loss_com = common_loss(com1,com2)
loss = loss_class + config.beta * loss_dep + config.theta * loss_com
acc = accuracy(output[idx_train], labels[idx_train])
loss.backward()
forward函数,特征att
def forward(self, x, sadj, fadj):
emb1 = self.SGCN1(x, sadj) # Special_GCN out1 -- sadj structure graph
com1 = self.CGCN(x, sadj) # Common_GCN out1 -- sadj structure graph
com2 = self.CGCN(x, fadj) # Common_GCN out2 -- fadj feature graph
emb2 = self.SGCN2(x, fadj) # Special_GCN out2 -- fadj feature graph
Xcom = (com1 + com2) / 2
##attention
emb = torch.stack([emb1, emb2, Xcom], dim=1)
emb, att = self.attention(emb)
output = self.MLP(emb)
return output, att, emb1, com1, com2, emb2, emb
5.Graph Structure Learning with Variational Information Bottleneck
我们从信息论的角度提出了一种新的变分信息瓶颈引导图结构学习框架,即 VIB-GSL。 VIB-GSL 是首次尝试推进图结构学习的信息瓶颈 (IB) 原则,为挖掘底层任务相关关系提供了一个更优雅和通用的框架。
假设节点存在马尔科夫局部依赖;邻接矩阵边通过伯努利随机采样获得。网络学习伯努利参数
VIBGSL模型
def forward(self, graphs):
num_sample = graphs.num_graphs
graphs_list = graphs.to_data_list()
new_graphs_list = []
for graph in graphs_list:
x, edge_index = graph.x.to(device), graph.edge_index.to(device)
raw_adj = to_dense_adj(edge_index)[0]
new_feature, new_adj = self.learn_graph(node_features=x,
graph_skip_conn=self.args.graph_skip_conn,
graph_include_self=self.args.graph_include_self,
init_adj=raw_adj)
new_edge_index, new_edge_attr = dense_to_sparse(new_adj)
new_graph = Data(x=new_feature, edge_index=new_edge_index, edge_attr=new_edge_attr)
new_graphs_list.append(new_graph)
loader = DataLoader(new_graphs_list, batch_size=len(new_graphs_list))
batch_data = next(iter(loader))
node_embs, _ = self.backbone_gnn(batch_data.x, batch_data.edge_index)
graph_embs = global_mean_pool(node_embs, batch_data.batch)
mu = graph_embs[:, :self.IB_size]
std = F.softplus(graph_embs[:, self.IB_size:]-self.IB_size, beta=1)
new_graph_embs = self.reparametrize_n(mu, std, num_sample)
logits = self.classifier(new_graph_embs)
return (mu, std), logits, graphs_list, new_graphs_list
原始图作为初始,生成新图
def learn_graph(self, node_features, graph_skip_conn=None, graph_include_self=False, init_adj=None):
new_feature, new_adj = self.graph_learner(node_features)
if graph_skip_conn in (0.0, None):
# add I
if graph_include_self:
if torch.cuda.is_available():
new_adj = new_adj + torch.eye(new_adj.size(0)).cuda()
else:
new_adj = new_adj + torch.eye(new_adj.size(0))
else:
# skip connection
new_adj = graph_skip_conn * init_adj + (1 - graph_skip_conn) * new_adj
return new_feature, new_adj
图生成方式:相似度对比
def forward(self, node_features):
if self.feature_denoise:
masked_features = self.mask_feature(node_features)
learned_adj = self.learn_adj(masked_features)
return masked_features, learned_adj
else:
learned_adj = self.learn_adj(node_features)
return node_features, learned_adj
6.SLAPS:self-Supervision Improves Structure Learning for Graph Neural Networks
在这项工作中,我们提出了使用自监督(SLAPS)同时学习邻接和 GNN 参数,这是一种通过自监督为推断图结构提供更多监督的方法。
KNN MLP生成原始图
邻接矩阵拓扑训练:添加噪声,降噪编码器自监督
图生成方法
def get_adj(self, h):
Adj_ = self.graph_gen(h)
if not self.sparse:
Adj_ = symmetrize(Adj_)
Adj_ = normalize(Adj_, self.normalization, self.sparse)
return Adj_
if gen_mode == 0:
self.graph_gen = FullParam(features, non_linearity, k, knn_metric, self.i, sparse).cuda()#3种不同的knn
elif gen_mode == 1:
self.graph_gen = MLP(2, features.shape[1], math.floor(math.sqrt(features.shape[1] * self.mlp_h)),
self.mlp_h, mlp_epochs, k, knn_metric, self.non_linearity, self.i, self.sparse,
mlp_act).cuda()#MLP FeatURE embedding
elif gen_mode == 2:
self.graph_gen = MLP_Diag(2, features.shape[1], k, knn_metric, self.non_linearity, self.i, sparse,
mlp_act).cuda()#MLP_Diag 计算量降低
"""
embeddings = self.internal_forward(features)
embeddings = F.normalize(embeddings, dim=1, p=2)
similarities = cal_similarity_graph(embeddings)
similarities = top_k(similarities, self.k + 1)
similarities = apply_non_linearity(similarities, self.non_linearity, self.i)
return similarities
similarity_graph = torch.mm(node_embeddings, node_embeddings.t())
"""
end ti end 模型
两个损失函数
optimizer1 = torch.optim.Adam(model1.parameters(), lr=args.lr_adj, weight_decay=args.w_decay_adj)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=args.lr, weight_decay=args.w_decay)
loss1, Adj = self.get_loss_masked_features(model1, features, mask, ogb, args.noise, args.loss)
loss2, accu = self.get_loss_learnable_adj(model2, train_mask, features, labels, Adj)
loss = loss1 * args.lambda_ + loss2
loss.backward()
optimizer1.step()
optimizer2.step()
GCN_DAE
def forward(self, x, adj_t):
if self.sparse:
Adj = adj_t
Adj.edata['w'] = F.dropout(Adj.edata['w'], p=self.dropout_adj_p, training=self.training)
else:
Adj = self.dropout_adj(adj_t)
for i, conv in enumerate(self.layers[:-1]):
x = conv(x, Adj)
x = F.relu(x)
x = F.dropout(x, p=self.dropout, training=self.training)
x = self.layers[-1](x, Adj)
return x
loss of feature
loss = F.binary_cross_entropy_with_logits(logits[indices], features[indices], reduction='mean')
GCN-C
def forward(self, x, adj_t):
if self.sparse:
Adj = adj_t
Adj.edata['w'] = F.dropout(Adj.edata['w'], p=self.dropout_adj_p, training=self.training)
else:
Adj = self.dropout_adj(adj_t)
for i, conv in enumerate(self.layers[:-1]):
x = conv(x, Adj)
x = F.relu(x)
x = F.dropout(x, p=self.dropout, training=self.training)
x = self.layers[-1](x, Adj)
return x
LOSS
def get_loss_learnable_adj(self, model, mask, features, labels, Adj):
logits = model(features, Adj)
logp = F.log_softmax(logits, 1)
loss = F.nll_loss(logp[mask], labels[mask], reduction='mean')
accu = accuracy(logp[mask], labels[mask])
return loss, accu
two step
只训练GCN-DAE
knn-gcn
生成邻接矩阵
Adj = torch.from_numpy(nearest_neighbors(features, args.k, args.knn_metric)).cuda()
Adj = normalize(Adj, args.normalization, args.sparse)
7.Towards Unsupervised Deep Graph Structure Learning
graph learner+model, 通过loss_gcl函数
如果是结构推断,直接生成单位矩阵,如果是结构重定义,使用原始图
loss, Adj = self.loss_gcl(model, graph_learner, features, anchor_adj)
锚点图+特征掩玛后–》model 获得z1
特征–》graph learner–》learned adj +特征–》z2
z1,z2无监督对比
def calc_loss(x, x_aug, temperature=0.2, sym=True):
batch_size, _ = x.size()
x_abs = x.norm(dim=1)
x_aug_abs = x_aug.norm(dim=1)
sim_matrix = torch.einsum('ik,jk->ij', x, x_aug) / torch.einsum('i,j->ij', x_abs, x_aug_abs)
sim_matrix = torch.exp(sim_matrix / temperature)
pos_sim = sim_matrix[range(batch_size), range(batch_size)]
if sym:
loss_0 = pos_sim / (sim_matrix.sum(dim=0) - pos_sim)
loss_1 = pos_sim / (sim_matrix.sum(dim=1) - pos_sim)
loss_0 = - torch.log(loss_0).mean()
loss_1 = - torch.log(loss_1).mean()
loss = (loss_0 + loss_1) / 2.0
return loss
else:
loss_1 = pos_sim / (sim_matrix.sum(dim=1) - pos_sim)
loss_1 = - torch.log(loss_1).mean()
return loss_1
训练过程
def loss_gcl(self, model, graph_learner, features, anchor_adj):
# view 1: anchor graph
if args.maskfeat_rate_anchor:
mask_v1, _ = get_feat_mask(features, args.maskfeat_rate_anchor)
features_v1 = features * (1 - mask_v1)
else:
features_v1 = copy.deepcopy(features)
z1, _ = model(features_v1, anchor_adj, 'anchor')
# view 2: learned graph
if args.maskfeat_rate_learner:
mask, _ = get_feat_mask(features, args.maskfeat_rate_learner)
features_v2 = features * (1 - mask)
else:
features_v2 = copy.deepcopy(features)
learned_adj = graph_learner(features)
if not args.sparse:
learned_adj = symmetrize(learned_adj)
learned_adj = normalize(learned_adj, 'sym', args.sparse)
z2, _ = model(features_v2, learned_adj, 'learner')
# compute loss
if args.contrast_batch_size:
node_idxs = list(range(features.shape[0]))
# random.shuffle(node_idxs)
batches = split_batch(node_idxs, args.contrast_batch_size)
loss = 0
for batch in batches:
weight = len(batch) / features.shape[0]
loss += model.calc_loss(z1[batch], z2[batch]) * weight
else:
loss = model.calc_loss(z1, z2)
return loss, learned_adj
图生成方法:FGP_learner——knn近邻;att:embedding similarity
embeddings = self.internal_forward(features)
embeddings = F.normalize(embeddings, dim=1, p=2)
similarities = cal_similarity_graph(embeddings)
similarities = top_k(similarities, self.k + 1)
similarities = apply_non_linearity(similarities, self.non_linearity, self.i)
return similarities
相似度计算
def cal_similarity_graph(node_embeddings):
similarity_graph = torch.mm(node_embeddings, node_embeddings.t())
return similarity_graph