最近仔细阅读并研究了Graph kernel Network 这篇论文,论文中的公式还是很好理解的,作者同时提供了源代码,但是代码中有些符号和自定义函数没有加以注释,本人根据论文中的公式溯源代码,给出了部分注释,供参考。首先来看理论部分
公式(7)用来变换初始数据的维度,公式(8)是核积分迭代,(9)输出变换。(8)利用如下(10)实现。
再看代码部分,主要为如下几个函数。
width = 64
ker_width = 1024
depth = 6
edge_features = 6
node_features = 6
model = KernelNN(width, ker_width, depth, edge_features, node_features).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step, gamma=scheduler_gamma)
myloss = LpLoss(size_average=False)
u_normalizer.cuda()
model.train()
# in_width 是最初的数据点的维度,如论文中的d=6
# width 是变换后的维度,如变换后的 d'=64
# ker_in 是进入 kernel-NN 的边特征的维度,d=6
# ker_width=1024
# depth 是执行 kernel-NN的次数
class KernelNN(torch.nn.Module):
def __init__(self, width, ker_width, depth, ker_in, in_width=1, out_width=1):
super(KernelNN, self).__init__()
self.depth = depth
# fc1 用于将点的初始维度变换到高的维度
self.fc1 = torch.nn.Linear(in_width, width)
# 构建kernel网络 此处列表[ker_in, ker_width, ker_width, width**2]传入 DenseNet中的layers, 也就是说网络为4个隐藏层的DNN网络
# ker_in 是输入kernel的维度,为 6
# torch.nn.ReLU 传入 nonlinearity, 对每一层做非线性变换
kernel = DenseNet([ker_in, ker_width, ker_width, width**2], torch.nn.ReLU)
self.conv1 = NNConv_old(width, width, kernel, aggr='mean')
# fc2 用于将点的最终的特征维度变换到维度为1
self.fc2 = torch.nn.Linear(width, 1)
def forward(self, data):
x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
x = self.fc1(x)
for k in range(self.depth):
x = F.relu(self.conv1(x, edge_index, edge_attr))
x = self.fc2(x)
return x
下面看实现Kernel-NN需要的模块。
self.fc1 = torch.nn.Linear(in_width, width)
Applies a linear transformation to input data: y = xA^T + b
in_features: size of each input sample
out_features: size of each output sample
这里是没有激活函数的
class DenseNet(torch.nn.Module):
def __init__(self, layers, nonlinearity, out_nonlinearity=None, normalize=False):
super(DenseNet, self).__init__()
self.n_layers = len(layers) - 1
assert self.n_layers >= 1
self.layers = nn.ModuleList()
for j in range(self.n_layers):
self.layers.append(nn.Linear(layers[j], layers[j+1]))
if j != self.n_layers - 1:
if normalize:
self.layers.append(nn.BatchNorm1d(layers[j+1]))
self.layers.append(nonlinearity())
if out_nonlinearity is not None:
self.layers.append(out_nonlinearity())
def forward(self, x):
for _, l in enumerate(self.layers):
x = l(x)
return x
如下为pytorch geometric 中的代码,初始化该模块时的参变量为input_dim, out_dim 和 NN(一个神经网络)
class NNConv_old(MessagePassing):
r"""The continuous kernel-based convolutional operator from the
`"Neural Message Passing for Quantum Chemistry"
<https://arxiv.org/abs/1704.01212>`_ paper.
This convolution is also known as the edge-conditioned convolution from the
`"Dynamic Edge-Conditioned Filters in Convolutional Neural Networks on
Graphs" <https://arxiv.org/abs/1704.02901>`_ paper (see
:class:`torch_geometric.nn.conv.ECConv` for an alias):
.. math::
\mathbf{x}^{\prime}_i = \mathbf{\Theta} \mathbf{x}_i +
\sum_{j \in \mathcal{N}(i)} \mathbf{x}_j \cdot
h_{\mathbf{\Theta}}(\mathbf{e}_{i,j}),
where :math:`h_{\mathbf{\Theta}}` denotes a neural network, *.i.e.*
a MLP.
Args:
in_channels (int): Size of each input sample.
out_channels (int): Size of each output sample.
nn (torch.nn.Module): A neural network :math:`h_{\mathbf{\Theta}}` that
maps edge features :obj:`edge_attr` of shape :obj:`[-1,
num_edge_features]` to shape
:obj:`[-1, in_channels * out_channels]`, *e.g.*, defined by
:class:`torch.nn.Sequential`.
aggr (string, optional): The aggregation scheme to use
(:obj:`"add"`, :obj:`"mean"`, :obj:`"max"`).
(default: :obj:`"add"`)
root_weight (bool, optional): If set to :obj:`False`, the layer will
not add the transformed root node features to the output.
(default: :obj:`True`)
bias (bool, optional): If set to :obj:`False`, the layer will not learn
an additive bias. (default: :obj:`True`)
**kwargs (optional): Additional arguments of
:class:`torch_geometric.nn.conv.MessagePassing`.
"""
def __init__(self,
in_channels,
out_channels,
nn,
aggr='add',
root_weight=True,
bias=True,
**kwargs):
super(NNConv_old, self).__init__(aggr=aggr, **kwargs)
self.in_channels = in_channels
self.out_channels = out_channels
self.nn = nn
self.aggr = aggr
if root_weight:
self.root = Parameter(torch.Tensor(in_channels, out_channels))
else:
self.register_parameter('root', None)
if bias:
self.bias = Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
reset(self.nn)
size = self.in_channels
uniform(size, self.root)
uniform(size, self.bias)
def forward(self, x, edge_index, edge_attr):
""""""
x = x.unsqueeze(-1) if x.dim() == 1 else x
pseudo = edge_attr.unsqueeze(-1) if edge_attr.dim() == 1 else edge_attr
return self.propagate(edge_index, x=x, pseudo=pseudo)
def message(self, x_j, pseudo):
weight = self.nn(pseudo).view(-1, self.in_channels, self.out_channels)
return torch.matmul(x_j.unsqueeze(1), weight).squeeze(1)
def update(self, aggr_out, x):
if self.root is not None:
aggr_out = aggr_out + torch.mm(x, self.root)
if self.bias is not None:
aggr_out = aggr_out + self.bias
return aggr_out
def __repr__(self):
return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
self.out_channels)
代码中的其余部分为处理数据,感觉很乱,研究中。。。。