GAT代码分析已经有了Pytorch版本GAT Pytorch代码分析。本文是tensorflow版本的GAT源码分析。
原论文:Graph Attention Network
Github代码地址:Tensorflow | Pytorch | Keras
运行结果
运行GAT/execute_cora.py:
代码结构
.
├── data // Cora数据集
├── models // GAT模型定义 (gat.py)
├── pretrained // 预训练的模型
└── utils // 工具定义
超参设置
GAT/execute_cora.py:
# training params
batch_size = 1
nb_epochs = 100000
patience = 100
lr = 0.005 # learning rate
l2_coef = 0.0005 # weight decay
hid_units = [8] # numbers of hidden units per each attention head in each layer
n_heads = [8, 1] # additional entry for the output layer
residual = False
nonlinearity = tf.nn.elu
model = GAT
数据导入
GAT源码使用的是cora数据集。cora的相关介绍可以参考Cora数据集介绍。
数据预处理部分和GCN源码相同GCN代码分析
最终载入的数据adj表示2708篇文章之间的索引关系,形式为邻接矩阵。feature表示1433个单词在2708篇文章中是否存在。
GAT/utils/process.py:
def load_data(dataset_str):
...
print(adj.shape)
print(features.shape)
>>>(2708, 2708)
>>>(2708, 1433)
其中feature是稀疏矩阵,参考SciPy教程 - 稀疏矩阵库scipy.sparse
模型
layers.py
定义attention层
def attn_head(seq, out_sz, bias_mat, activation, in_drop=0.0, coef_drop=0.0, residual=False):
with tf.name_scope('my_attn'):
if in_drop != 0.0:
seq = tf.nn.dropout(seq, 1.0 - in_drop)
seq_fts = tf.layers.conv1d(seq, out_sz, 1, use_bias=False)
# simplest self-attention possible
f_1 = tf.layers.conv1d(seq_fts, 1, 1)
f_2 = tf.layers.conv1d(seq_fts, 1, 1)
logits = f_1 + tf.transpose(f_2, [0, 2, 1])
coefs = tf.nn.softmax(tf.nn.leaky_relu(logits) + bias_mat)
if coef_drop != 0.0:
coefs = tf.nn.dropout(coefs, 1.0 - coef_drop)
if in_drop != 0.0:
seq_fts = tf.nn.dropout(seq_fts, 1.0 - in_drop)
vals = tf.matmul(coefs, seq_fts)
ret = tf.contrib.layers.bias_add(vals)
# residual connection
if residual:
if seq.shape[-1] != ret.shape[-1]:
ret = ret + conv1d(seq, ret.shape[-1], 1) # activation
else:
ret = ret + seq
return activation(ret) # activation
gat.py
logits = model.inference(ftr_in, nb_classes, nb_nodes, is_train,
attn_drop, ffd_drop,
bias_mat=bias_in,
hid_units=hid_units, n_heads=n_heads,
residual=residual, activation=nonlinearity)
class GAT(BaseGAttN):
def inference(inputs, nb_classes, nb_nodes, training, attn_drop, ffd_drop,
bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False):
attns = []
#GAT中预设了8层attention head
for _ in range(n_heads[0]):
attns.append(layers.attn_head(inputs, bias_mat=bias_mat,
out_sz=hid_units[0], activation=activation,
in_drop=ffd_drop, coef_drop=attn_drop, residual=False))
h_1 = tf.concat(attns, axis=-1)
#hid_units表示每一层attention head中的隐藏单元个数
for i in range(1, len(hid_units)):
h_old = h_1
attns = []
for _ in range(n_heads[i]):
attns.append(layers.attn_head(h_1, bias_mat=bias_mat,
out_sz=hid_units[i], activation=activation,
in_drop=ffd_drop, coef_drop=attn_drop, residual=residual))
h_1 = tf.concat(attns, axis=-1)
out = []
#加上输出层
for i in range(n_heads[-1]):
out.append(layers.attn_head(h_1, bias_mat=bias_mat,
out_sz=nb_classes, activation=lambda x: x,
in_drop=ffd_drop, coef_drop=attn_drop, residual=False))
logits = tf.add_n(out) / n_heads[-1]
return logits
base_gattn.py
GAT/models/base_gattn.py:
attention层的损失函数和训练函数
def loss(logits, labels, nb_classes, class_weights):
sample_wts = tf.reduce_sum(tf.multiply(tf.one_hot(labels, nb_classes), class_weights), axis=-1)
#交叉熵损失函数
xentropy = tf.multiply(tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits), sample_wts)
return tf.reduce_mean(xentropy, name='xentropy_mean')
training最小化损失函数和L2 loss
def training(loss, lr, l2_coef):
# weight decay
vars = tf.trainable_variables()
lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vars if v.name not
in ['bias', 'gamma', 'b', 'g', 'beta']]) * l2_coef
# optimizer
opt = tf.train.AdamOptimizer(learning_rate=lr)
# training op
train_op = opt.minimize(loss+lossL2)
return train_op
待续