DGL学习笔记01-DGL at a Glance
0 教程问题描述
该教程基于“ Zachary的空手道俱乐部”问题。空手道俱乐部是一个社交网络,包括34个成员,并在俱乐部外互动的成员之间建立成对链接。俱乐部随后分为两个社区,由教员(节点0)和俱乐部主席(节点33)领导。网络以如下方式可视化,并带有表示社区的颜色:
我们的任务是预测每个成员倾向于加入哪个社区(community)(0或33)。
1 创建图
import dgl
import numpy as np
def build_kerate_clud_graph():
src = np.array([1, 2, 2, 3, 3, 3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 10, 10,
10, 11, 12, 12, 13, 13, 13, 13, 16, 16, 17, 17, 19, 19, 21, 21,
25, 25, 27, 27, 27, 28, 29, 29, 30, 30, 31, 31, 31, 31, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 33, 33, 33, 33, 33])
dst = np.array([0, 0, 1, 0, 1, 2, 0, 0, 0, 4, 5, 0, 1, 2, 3, 0, 2, 2, 0, 4,
5, 0, 0, 3, 0, 1, 2, 3, 5, 6, 0, 1, 0, 1, 0, 1, 23, 24, 2, 23,
24, 2, 23, 26, 1, 8, 0, 24, 25, 28, 2, 8, 14, 15, 18, 20, 22, 23,
29, 30, 31, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30,
31, 32])
u = np.concatenate([src, dst])
v = np.concatenate([dst, src])
return dgl.graph((u, v))
注:np.concatenate([src, dst])是numpy中连接两个向量的操作。比如src= [1,2,3], dst = [3,1,2],那么u = np.concatenate([src, dst]) = [1,2,3,3,1,2],v = np.concatenate([dst, src]) = [3,1,2,1,2,3],我们把它们放在一起看
u = [1,2,3,3,1,2]
v = [3,1,2,1,2,3]
于是,他们每一列就可以看成一条边
2 输出图的一些信息
G = build_graph()
print("结点个数: ", G.number_of_nodes())
print("边的个数: ", G.number_of_edges())
至此,一个极简单的图就创建好了。但是这个图还没有节点的特征和边的特征信息,后面的学习中我们会慢慢加进去。接下来,我们通过networkx和matplotlib将这个图进行可视化。
import networkx as nx
import matplotlib.pyplot as plt
nx_G = G.to_networkx().to_undirected() # 转换成无向图
pos = nx.kamada_kawai_layout(nx_G)
nx.draw(nx_G, pos, with_labels=True, node_color=[[.7, .7, .7]])
plt.show()
3 为节点和边添加特征
import torch.nn as nn
embed = nn.Embedding(34, 5) # 34个节点,节点的维度为5
G.ndata['feat'] = embed.weight # 将特征加入到graph对象中
将特征添加到graph对象中后,我们可以通过G.ndata[‘feat’][i]来访问第i个节点的特征
# 输出第2个节点的特征
print(G.ndata['feat'][1])
# 输出第10、11个节点的特征
print(G.ndata['feat'][[9, 10]])
同样地,还可以通过edata来添加边的信息
edge_feats = nn.Embedding(G.number_of_edges(), 2)
G.edata['efeat'] = edge_feats.weight
# 输出第1条边的信息
print(G.edata['efeta'][0])
官方文档的后面就是关于如何定义图卷积层以及训练样本,但是我们现在初学就先不看了,反正后面也会学到。
源码:
import dgl
import numpy as np
import torch
import torch.nn as nn
import networkx as nx
import matplotlib.pyplot as plt
def build_graph():
src = np.array([1, 2, 2, 3, 3, 3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 10, 10,
10, 11, 12, 12, 13, 13, 13, 13, 16, 16, 17, 17, 19, 19, 21, 21,
25, 25, 27, 27, 27, 28, 29, 29, 30, 30, 31, 31, 31, 31, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 33, 33, 33, 33, 33])
dst = np.array([0, 0, 1, 0, 1, 2, 0, 0, 0, 4, 5, 0, 1, 2, 3, 0, 2, 2, 0, 4,
5, 0, 0, 3, 0, 1, 2, 3, 5, 6, 0, 1, 0, 1, 0, 1, 23, 24, 2, 23,
24, 2, 23, 26, 1, 8, 0, 24, 25, 28, 2, 8, 14, 15, 18, 20, 22, 23,
29, 30, 31, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30,
31, 32])
u = np.concatenate([src, dst])
v = np.concatenate([dst, src])
return dgl.graph((u, v))
# 输出图的一些基本信息
G = build_graph()
print("结点个数: ", G.number_of_nodes())
print("边的个数: ", G.number_of_edges())
nx_G = G.to_networkx().to_undirected()
pos = nx.kamada_kawai_layout(nx_G)
nx.draw(nx_G, pos, with_labels=True, node_color=[[.7, .7, .7]])
plt.show()
embed = nn.Embedding(34, 5) # 34个节点,节点的维度为5
G.ndata['feat'] = embed.weight # 将特征加入到graph对象中
# 输出第三个节点的特征
print(G.ndata['feat'][2])
# 输出第10、11个节点的特征
print(G.ndata['feat'][[9, 10]])
edges_feats = nn.Embedding(G.number_of_edges(), 2)
G.edata['efeat'] = edges_feats.weight
print(G.edata['efeat'][0])