GAT实战
参数初始化
import os
import glob
import time
import random
import argparse
import numpy as np
import torch
import torch. nn as nn
import torch. nn. functional as F
import torch. optim as optim
from torch. autograd import Variable
from utils import load_data, accuracy
from models import GAT, SpGAT
parser = argparse. ArgumentParser( )
parser. add_argument( '--no-cuda' , action= 'store_true' , default= False , help = 'Disables CUDA training.' )
parser. add_argument( '--fastmode' , action= 'store_true' , default= False , help = 'Validate during training pass.' )
parser. add_argument( '--sparse' , action= 'store_true' , default= False , help = 'GAT with sparse version or not.' )
parser. add_argument( '--seed' , type = int , default= 72 , help = 'Random seed.' )
parser. add_argument( '--epochs' , type = int , default= 10000 , help = 'Number of epochs to train.' )
parser. add_argument( '--lr' , type = float , default= 0.005 , help = 'Initial learning rate.' )
parser. add_argument( '--weight_decay' , type = float , default= 5e-4 , help = 'Weight decay (L2 loss on parameters).' )
parser. add_argument( '--hidden' , type = int , default= 8 , help = 'Number of hidden units.' )
parser. add_argument( '--nb_heads' , type = int , default= 8 , help = 'Number of head attentions.' )
parser. add_argument( '--dropout' , type = float , default= 0.6 , help = 'Dropout rate (1 - keep probability).' )
parser. add_argument( '--alpha' , type = float , default= 0.2 , help = 'Alpha for the leaky_relu.' )
parser. add_argument( '--patience' , type = int , default= 100 , help = 'Patience' )
args = parser. parse_args( )
args. cuda = not args. no_cuda and torch. cuda. is_available( )
random. seed( args. seed)
np. random. seed( args. seed)
torch. manual_seed( args. seed)
if args. cuda:
torch. cuda. manual_seed( args. seed)
加载数据
def load_data ( path= "./data/cora/" , dataset= "cora" ) :
"""Load citation network dataset (cora only for now)"""
print ( 'Loading {} dataset...' . format ( dataset) )
idx_features_labels = np. genfromtxt( "{}{}.content" . format ( path, dataset) , dtype= np. dtype( str ) )
features = sp. csr_matrix( idx_features_labels[ : , 1 : - 1 ] , dtype= np. float32)
labels = encode_onehot( idx_features_labels[ : , - 1 ] )
idx = np. array( idx_features_labels[ : , 0 ] , dtype= np. int32)
idx_map = { j: i for i, j in enumerate ( idx) }
edges_unordered = np. genfromtxt( "{}{}.cites" . format ( path, dataset) , dtype= np. int32)
edges = np. array( list ( map ( idx_map. get, edges_unordered. flatten( ) ) ) , dtype= np. int32) . reshape( edges_unordered. shape)
adj = sp. coo_matrix( ( np. ones( edges. shape[ 0 ] ) , ( edges[ : , 0 ] , edges[ : , 1 ] ) ) , shape= ( labels. shape[ 0 ] , labels. shape[ 0 ] ) , dtype= np. float32)
adj = adj + adj. T. multiply( adj. T > adj) - adj. multiply( adj. T > adj)
features = normalize_features( features)
adj = normalize_adj( adj + sp. eye( adj. shape[ 0 ] ) )
idx_train = range ( 140 )
idx_val = range ( 200 , 500 )
idx_test = range ( 500 , 1500 )
adj = torch. FloatTensor( np. array( adj. todense( ) ) )
features = torch. FloatTensor( np. array( features. todense( ) ) )
labels = torch. LongTensor( np. where( labels) [ 1 ] )
idx_train = torch. LongTensor( idx_train)
idx_val = torch. LongTensor( idx_val)
idx_test = torch. LongTensor( idx_test)
return adj, features, labels, idx_train, idx_val, idx_test
模型构建
class GraphAttentionLayer ( nn. Module) :
"""
Simple GAT layer, similar to https://arxiv.org/abs/1710.10903
"""
def __init__ ( self, in_features, out_features, dropout, alpha, concat= True ) :
super ( GraphAttentionLayer, self) . __init__( )
self. dropout = dropout
self. in_features = in_features
self. out_features = out_features
self. alpha = alpha
self. concat = concat
self. W = nn. Parameter( torch. empty( size= ( in_features, out_features) ) )
nn. init. xavier_uniform_( self. W. data, gain= 1.414 )
self. a = nn. Parameter( torch. empty( size= ( 2 * out_features, 1 ) ) )
nn. init. xavier_uniform_( self. a. data, gain= 1.414 )
self. leakyrelu = nn. LeakyReLU( self. alpha)
def forward ( self, h, adj) :
Wh = torch. mm( h, self. W)
a_input = self. _prepare_attentional_mechanism_input( Wh)
e = self. leakyrelu( torch. matmul( a_input, self. a) . squeeze( 2 ) )
zero_vec = - 9e15 * torch. ones_like( e)
attention = torch. where( adj > 0 , e, zero_vec)
attention = F. softmax( attention, dim= 1 )
attention = F. dropout( attention, self. dropout, training= self. training)
h_prime = torch. matmul( attention, Wh)
if self. concat:
return F. elu( h_prime)
else :
return h_prime
def _prepare_attentional_mechanism_input ( self, Wh) :
N = Wh. size( ) [ 0 ]
Wh_repeated_in_chunks = Wh. repeat_interleave( N, dim= 0 )
Wh_repeated_alternating = Wh. repeat( N, 1 )
all_combinations_matrix = torch. cat( [ Wh_repeated_in_chunks, Wh_repeated_alternating] , dim= 1 )
return all_combinations_matrix. view( N, N, 2 * self. out_features)
def __repr__ ( self) :
return self. __class__. __name__ + ' (' + str ( self. in_features) + ' -> ' + str ( self. out_features) + ')'
class GAT ( nn. Module) :
def __init__ ( self, nfeat, nhid, nclass, dropout, alpha, nheads) :
"""Dense version of GAT."""
super ( GAT, self) . __init__( )
self. dropout = dropout
self. attentions = [ GraphAttentionLayer( nfeat, nhid, dropout= dropout, alpha= alpha, concat= True ) for _ in range ( nheads) ]
for i, attention in enumerate ( self. attentions) :
self. add_module( 'attention_{}' . format ( i) , attention)
self. out_att = GraphAttentionLayer( nhid * nheads, nclass, dropout= dropout, alpha= alpha, concat= False )
def forward ( self, x, adj) :
x = F. dropout( x, self. dropout, training= self. training)
x = torch. cat( [ att( x, adj) for att in self. attentions] , dim= 1 )
x = F. dropout( x, self. dropout, training= self. training)
x = F. elu( self. out_att( x, adj) )
return F. log_softmax( x, dim= 1 )
训练
if args. cuda:
model. cuda( )
features = features. cuda( )
adj = adj. cuda( )
labels = labels. cuda( )
idx_train = idx_train. cuda( )
idx_val = idx_val. cuda( )
idx_test = idx_test. cuda( )
def train ( epoch) :
t = time. time( )
model. train( )
optimizer. zero_grad( )
output = model( features, adj)
loss_train = F. nll_loss( output[ idx_train] , labels[ idx_train] )
acc_train = accuracy( output[ idx_train] , labels[ idx_train] )
loss_train. backward( )
optimizer. step( )
if not args. fastmode:
model. eval ( )
output = model( features, adj)
loss_val = F. nll_loss( output[ idx_val] , labels[ idx_val] )
acc_val = accuracy( output[ idx_val] , labels[ idx_val] )
print ( 'Epoch: {:04d}' . format ( epoch+ 1 ) ,
'loss_train: {:.4f}' . format ( loss_train. data. item( ) ) ,
'acc_train: {:.4f}' . format ( acc_train. data. item( ) ) ,
'loss_val: {:.4f}' . format ( loss_val. data. item( ) ) ,
'acc_val: {:.4f}' . format ( acc_val. data. item( ) ) ,
'time: {:.4f}s' . format ( time. time( ) - t) )
return loss_val. data. item( )
def compute_test ( ) :
model. eval ( )
output = model( features, adj)
loss_test = F. nll_loss( output[ idx_test] , labels[ idx_test] )
acc_test = accuracy( output[ idx_test] , labels[ idx_test] )
print ( "Test set results:" ,
"loss= {:.4f}" . format ( loss_test. item( ) ) ,
"accuracy= {:.4f}" . format ( acc_test. item( ) ) )
t_total = time. time( )
loss_values = [ ]
bad_counter = 0
best = args. epochs + 1
best_epoch = 0
for epoch in range ( args. epochs) :
loss_values. append( train( epoch) )
torch. save( model. state_dict( ) , '{}.pkl' . format ( epoch) )
if loss_values[ - 1 ] < best:
best = loss_values[ - 1 ]
best_epoch = epoch
bad_counter = 0
else :
bad_counter += 1
if bad_counter == args. patience:
break
files = glob. glob( '*.pkl' )
for file in files:
epoch_nb = int ( file . split( '.' ) [ 0 ] )
if epoch_nb < best_epoch:
os. remove( file )
files = glob. glob( '*.pkl' )
for file in files:
epoch_nb = int ( file . split( '.' ) [ 0 ] )
if epoch_nb > best_epoch:
os. remove( file )
print ( "Optimization Finished!" )
print ( "Total time elapsed: {:.4f}s" . format ( time. time( ) - t_total) )
print ( 'Loading {}th epoch' . format ( best_epoch) )
model. load_state_dict( torch. load( '{}.pkl' . format ( best_epoch) ) )
compute_test( )