2.1.2.1 神经网络模型搭建与训练
神经网络主要分为编码器、传播层、聚合器三个部分,下面分别对每个部分的结构和具体实现进行介绍:
1、网络编码器部分:
网络每次输入一对相同(正例)或者不相同(反例)的图,编码器负责将Genius提取出的这两个图的结点初始表示xi、边初始表示xij
进行编码,输出32维的结点向量hi0
和16维边向量eij
。这个过程由两个多层感知机MLPnode
和MLPedge
完成:
hi0=MLPnodexi, ∀i∈V
eij=MLPedgexij, ∀i,j∈E
其中MLPnode、MLPedge
均没有隐藏层,也没有激活函数,相当于向量空间的线性映射。实现时,我们定义了继承torch.nn.Module类的GraphEncoder类。主要方法如下:
构造函数__init__,定义初始结点表示、初始边表示、传播过程中的结点向量与边向量的维度,并调用了build_model方法进行具体模型的构建。
def __init__(self,
node_feature_dim,
edge_feature_dim,
node_hidden_sizes=None,
edge_hidden_sizes=None,
name='graph-encoder'):
super(GraphEncoder, self).__init__()
# this also handles the case of an empty list
self._node_feature_dim = node_feature_dim
self._edge_feature_dim = edge_feature_dim
self._node_hidden_sizes = node_hidden_sizes if node_hidden_sizes else None
self._edge_hidden_sizes = edge_hidden_sizes
self._build_model()
_build_model函数:定义并构造GraphEncoder对象的两个成员变量:MLP1(MLPnode)、MLP2(MLPedge
)
def _build_model(self):
layer = []
layer.append(nn.Linear(self._node_feature_dim, self._node_hidden_sizes[0]))
for i in range(1, len(self._node_hidden_sizes)):
layer.append(nn.ReLU())
layer.append(nn.Linear(self._node_hidden_sizes[i - 1], self._node_hidden_sizes[i]))
self.MLP1 = nn.Sequential(*layer)
if self._edge_hidden_sizes is not None:
layer = []
layer.append(nn.Linear(self._edge_feature_dim, self._edge_hidden_sizes[0]))
for i in range(1, len(self._edge_hidden_sizes)):
layer.append(nn.ReLU())
layer.append(nn.Linear(self._edge_hidden_sizes[i - 1], self._edge_hidden_sizes[i]))
self.MLP2 = nn.Sequential(*layer)
else:
self.MLP2 = None
前向传播函数forward:在整个网络进行前向传播时,接收结点与边的初始表示,完成编码器部分的计算,返回结点与边的向量
def forward(self, node_features, edge_features=None):
if self._node_hidden_sizes is None:
node_outputs = node_features
else:
node_outputs = self.MLP1(node_features)
if edge_features is None or self._edge_hidden_sizes is None:
edge_outputs = edge_features
else:
edge_outputs = self.MLP2(edge_features)
return node_outputs, edge_outputs
2、网络传播层部分:
传播层由五个相同的layer迭代构成,layert (1≤t≤5)将结点表示hit
更新为hit+1
,更新过程表示如下:
mj→i=fmessagehit,hjt,eij, ∀i,j∈E1∪E2
μj→i=fmatchhit,hjt, ∀i∈V1,j∈V2,or i∈V2, j∈V1
hit+1=fnodehit,Σjmj→i, Σj`μj`→i
其中fmessage为一个MLP,负责计算结点i
的图内信息传播,mj→i
为图内结点j
向结点i
传递的信息;fmatch
先通过hit,hjt
的欧几里得距离计算结点j与结点i的相似性(结点j
与结点i
不在同一个图中),再通过softmax
函数对相似性进行归一化,最终输出的μj→i
点j
向结点i
传递的信息,可以评价图间结点的相似度,i与j越相似,μj→i
越小;fnode
为一个GRU单元,通过上一层输出的结点表征hit
、结点i的图内信息聚合Σjmj→i
、结点i的图间信息聚合 Σj`μj`→i
来计算本层输出的结点表征hit+1
。
这个更新过程体现了GMN相对于Gemini的优势,不仅考虑了结点i所在图中的邻边与邻接结点的信息传播,还考虑了另一图中所有结点与结点i
的比较信息。
对于传播层来说,我们对整个网络定义了两个类,其中,GraphEmbeddingNet类继承torch.nn.Module,实现Gemini的基础功能,GraphMatchingNet继承GraphEmbeddingNet,在EmbeddingNet的基础上实现传播过程中的图间信息传递。
GraphEmbeddingNet主要数据成员和方法如下:__init__构造函数对GraphEncoder和GraphAggregator类对象进行初始化,并传入结点表示维度、边表示维度、各MLP的隐藏层维度、传播层数等重要参数。最后调用模型构造成员函数build_model()
def __init__(self,
encoder,
aggregator,
node_state_dim,
edge_state_dim,
edge_hidden_sizes,
node_hidden_sizes,
n_prop_layers,
share_prop_params=False,
edge_net_init_scale=0.1,
node_update_type='residual',
use_reverse_direction=True,
reverse_dir_param_different=True,
layer_norm=False,
layer_class=GraphPropLayer,
prop_type='embedding',
name='graph-embedding-net'):
super(GraphEmbeddingNet, self).__init__()
self._encoder = encoder
self._aggregator = aggregator
self._node_state_dim = node_state_dim
self._edge_state_dim = edge_state_dim
self._edge_hidden_sizes = edge_hidden_sizes
self._node_hidden_sizes = node_hidden_sizes
self._n_prop_layers = n_prop_layers
self._share_prop_params = share_prop_params
self._edge_net_init_scale = edge_net_init_scale
self._node_update_type = node_update_type
self._use_reverse_direction = use_reverse_direction
self._reverse_dir_param_different = reverse_dir_param_different
self._layer_norm = layer_norm
self._prop_layers = []
self._prop_layers = nn.ModuleList()
self._layer_class = layer_class
self._prop_type = prop_type
self.build_model()
build_model函数:调用_build_layer()进行每一层传播层的构建:
def build_model(self):
if len(self._prop_layers) < self._n_prop_layers:
# build the layers
for i in range(self._n_prop_layers):
if i == 0 or not self._share_prop_params:
layer = self._build_layer(i)
else:
layer = self._prop_layers[0]
self._prop_layers.append(layer)
_build_layer函数:调用GraphPropMatchingLayer类构造函数,该函数调用父类基函数GraphPropLayer::__init__,对一个传播层进行构造:
def _build_layer(self, layer_id):
"""Build one layer in the network."""
return self._layer_class(
self._node_state_dim,
self._edge_state_dim,
self._edge_hidden_sizes,
self._node_hidden_sizes,
edge_net_init_scale=self._edge_net_init_scale,
node_update_type=self._node_update_type,
use_reverse_direction=self._use_reverse_direction,
reverse_dir_param_different=self._reverse_dir_param_different,
layer_norm=self._layer_norm,
prop_type=self._prop_type)
# name='graph-prop-%d' % layer_id)
GraphPropLayer::__init__:传入层内各个结构的参数,并调用GraphPropLayer::build_ model,对每一个传播层进行构造.最后给网络添加BN层
def __init__(self,
node_state_dim,
edge_state_dim,
edge_hidden_sizes, # int
node_hidden_sizes, # int
edge_net_init_scale=0.1,
node_update_type='residual',
use_reverse_direction=True,
reverse_dir_param_different=True,
layer_norm=False,
prop_type='embedding',
name='graph-net'):
super(GraphPropLayer, self).__init__()
self._node_state_dim = node_state_dim
self._edge_state_dim = edge_state_dim
self._edge_hidden_sizes = edge_hidden_sizes[:]
# output size is node_state_dim
self._node_hidden_sizes = node_hidden_sizes[:] + [node_state_dim]
self._edge_net_init_scale = edge_net_init_scale
self._node_update_type = node_update_type
self._use_reverse_direction = use_reverse_direction
self._reverse_dir_param_different = reverse_dir_param_different
self._layer_norm = layer_norm
self._prop_type = prop_type
self.build_model()
if self._layer_norm:
self.layer_norm1 = nn.LayerNorm()
self.layer_norm2 = nn.LayerNorm()
GraphPropLayer::build_model:此函数对每个传播层的fmessage(MLP)和fnode
(GRU)分别进行构建,并在GMN论文方法的基础上增加了fmessagereverse
模块。因为我们提取的ACFG是有向图,所以对于结点i,fmessage
只会计算mj->i
(j为出结点,i为入结点)。但实际上,结点i的特征不仅仅由入边决定,还由出边决定,所以我们在传播过程中增加了反向信息计算模块fmessagereverse
,这个模块对于结点i计算的是mj->i
(i为出结点,j为入结点)。我们将fmessagereverse
与fmessage
的值简单相加,作为边(i,j)或(j,i)对结点i的总信息传递。
此函数构建的网络结构如下:
ModuleList(
(0): GraphPropMatchingLayer(
(_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(_reverse_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(GRU): GRU(96, 32)
)
(1): GraphPropMatchingLayer(
(_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(_reverse_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(GRU): GRU(96, 32)
)
(2): GraphPropMatchingLayer(
(_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(_reverse_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(GRU): GRU(96, 32)
)
(3): GraphPropMatchingLayer(
(_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(_reverse_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(GRU): GRU(96, 32)
)
(4): GraphPropMatchingLayer(
(_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(_reverse_message_net): Sequential(
(0): Linear(in_features=80, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
)
(GRU): GRU(96, 32)
)
)
GraphPropLayer::build_model源码如下:
def build_model(self):
layer = []
layer.append(nn.Linear(self._node_state_dim*2 + self._edge_state_dim, self._edge_hidden_sizes[0]))
for i in range(1, len(self._edge_hidden_sizes)):
layer.append(nn.ReLU())
layer.append(nn.Linear(self._edge_hidden_sizes[i - 1], self._edge_hidden_sizes[i]))
self._message_net = nn.Sequential(*layer
# optionally compute message vectors in the reverse direction
if self._use_reverse_direction:
if self._reverse_dir_param_different:
layer = []
layer.append(nn.Linear(self._node_state_dim*2 + self._edge_state_dim, self._edge_hidden_sizes[0]))
for i in range(1, len(self._edge_hidden_sizes)):
layer.append(nn.ReLU())
layer.append(nn.Linear(self._edge_hidden_sizes[i - 1], self._edge_hidden_sizes[i]))
self._reverse_message_net = nn.Sequential(*layer)
else:
self._reverse_message_net = self._message_net
if self._node_update_type == 'gru':
if self._prop_type == 'embedding':
self.GRU = torch.nn.GRU(self._node_state_dim * 2, self._node_state_dim)
elif self._prop_type == 'matching':
self.GRU = torch.nn.GRU(self._node_state_dim * 3, self._node_state_dim)
else:
layer = []
if self._prop_type == 'embedding':
layer.append(nn.Linear(self._node_state_dim * 3, self._node_hidden_sizes[0]))
elif self._prop_type == 'matching':
layer.append(nn.Linear(self._node_state_dim * 4, self._node_hidden_sizes[0]))
for i in range(1, len(self._node_hidden_sizes)):
layer.append(nn.ReLU())
layer.append(nn.Linear(self._node_hidden_sizes[i - 1], self._node_hidden_sizes[i]))
self.MLP = nn.Sequential(*layer)
3、网络聚合器部分:
网络的传播层对图中结点接收的图内信息、图间比较信息进行了聚合和迭代,尝试得到合适的结点向量,使得相似的结点在向量空间内的距离大于相离的结点。而聚合器负责将图内的这些结点向量表示的信息进行聚合,得到合适的图嵌入,使得相似图的嵌入距离较小,结构差距较大的图的嵌入距离较大。
聚合器计算公式如下:
hG=MLPG(Σi∈V σMLPgatehi5⊙MLP(hi5))
其中hi5为经过传播层五轮迭代后,图G内顶点i的向量表示。MLP各层维度[32,256],对各结点向量进行线性的维度映射,σ
是Sigmoid函数,σ(MLPgate(hi5))
是门限向量,⊙
为向量各元素对应相乘。Σi∈V σMLPgatehi5⊙MLP(hi5)
本质上是对MLP(hi5)
进行加权求和,利用Sigmoid函数的S形状与正负无穷趋于饱和的性质,对结点向量中数值较小的维度进行限制与过滤,保留数值较大的维度,从而尽可能过滤掉无关信息。MLPG
各层维度为[256,128],负责将图G的结点信息加权和(256维)线性映射到图向量的维度(128维)
我们定义了GraphAggregator(nn.Module)类来构建聚合器,其主要数据成员和方法如下:
GraphAggregator::init,构造函数,对聚合器对象的各MLP输入维度、聚合方式等数据成员进行赋值,并调用GraphAggregator::build_model()。
def __init__(self,
node_hidden_sizes,
graph_transform_sizes=None,
input_size=None,
gated=True,
aggregation_type='sum',
name='graph-aggregator'):
super(GraphAggregator, self).__init__()
self._node_hidden_sizes = node_hidden_sizes
self._graph_transform_sizes = graph_transform_sizes
self._graph_state_dim = node_hidden_sizes[-1]
self._input_size = input_size
# The last element is the size of the aggregated graph representation.
self._gated = gated
self._aggregation_type = aggregation_type
self._aggregation_op = None
self.MLP1, self.MLP2 = self.build_model()
GraphAggregator::build_model:构建MLP和MLPG
def build_model(self):
node_hidden_sizes = self._node_hidden_sizes
if self._gated:
node_hidden_sizes[-1] = self._graph_state_dim * 2
layer = []
layer.append(nn.Linear(self._input_size[0], node_hidden_sizes[0]))
for i in range(1, len(node_hidden_sizes)):
layer.append(nn.ReLU())
layer.append(nn.Linear(node_hidden_sizes[i - 1], node_hidden_sizes[i]))
MLP1 = nn.Sequential(*layer)
if (self._graph_transform_sizes is not None and
len(self._graph_transform_sizes) > 0):
layer = []
layer.append(nn.Linear(self._graph_state_dim, self._graph_transform_sizes[0]))
for i in range(1, len(self._graph_transform_sizes)):
layer.append(nn.ReLU())
layer.append(nn.Linear(self._graph_transform_sizes[i - 1], self._graph_transform_sizes[i]))
MLP2 = nn.Sequential(*layer)
return MLP1, MLP2
GraphAggregator::forward:进行聚合器的前向传播
def forward(self, node_states, graph_idx, n_graphs):
node_states_g = self.MLP1(node_states)
if self._gated:
gates = torch.sigmoid(node_states_g[:, :self._graph_state_dim])
node_states_g = node_states_g[:, self._graph_state_dim:] * gates
graph_states = unsorted_segment_sum(node_states_g, graph_idx, n_graphs)
if self._aggregation_type == 'max':
# reset everything that's smaller than -1e5 to 0.
graph_states *= torch.FloatTensor(graph_states > -1e5)
# transform the reduced graph states further
if (self._graph_transform_sizes is not None and
len(self._graph_transform_sizes) > 0):
graph_states = self.MLP2(graph_states)
return graph_states