CS224W 2023 Winter Colab 3 练习记录,附原题
pycharm做的,需安装pyg和torch-sparse和torch-scatter,还有stanford的DeepSNAP
import os
import torch_geometric
import torch
import torch_scatter
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as pyg_nn
import torch_geometric.utils as pyg_utils
from torch import Tensor
from typing import Union, Tuple, Optional
from torch_geometric.typing import (OptPairTensor, Adj, Size, NoneType,
OptTensor)
from torch.nn import Parameter, Linear
from torch_sparse import SparseTensor, set_diag
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import remove_self_loops, add_self_loops, softmax
class GNNStack(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, args, emb=False):
super(GNNStack, self).__init__()
conv_model = self.build_conv_model(args.model_type)
self.convs = nn.ModuleList()
self.convs.append(conv_model(input_dim, hidden_dim))
assert (args.num_layers >= 1), 'Number of layers is not >=1'
for l in range(args.num_layers - 1):
self.convs.append(conv_model(args.heads * hidden_dim, hidden_dim))
# post-message-passing
self.post_mp = nn.Sequential(
nn.Linear(args.heads * hidden_dim, hidden_dim), nn.Dropout(args.dropout),
nn.Linear(hidden_dim, output_dim))
self.dropout = args.dropout
self.num_layers = args.num_layers
self.emb = emb
def build_conv_model(self, model_type):
if model_type == 'GraphSage':
return GraphSage
elif model_type == 'GAT':
# When applying GAT with num heads > 1, you need to modify the
# input and output dimension of the conv layers (self.convs),
# to ensure that the input dim of the next layer is num heads
# multiplied by the output dim of the previous layer.
# HINT: In case you want to play with multiheads, you need to change the for-loop that builds up self.convs to be
# self.convs.append(conv_model(hidden_dim * num_heads, hidden_dim)),
# and also the first nn.Linear(hidden_dim * num_heads, hidden_dim) in post-message-passing.
return GAT
def forward(self, data):
x, edge_index, batch = data.x, data.edge_index, data.batch
for i in range(self.num_layers):
x = self.convs[i](x, edge_index)
x = F.relu(x)
x = F.dropout(x, p=self.dropout, training=self.training)
x = self.post_mp(x)
if self.emb == True:
return x
return F.log_softmax(x, dim=1)
def loss(self, pred, label):
return F.nll_loss(pred, label)
class GraphSage(MessagePassing):
def __init__(self, in_channels, out_channels, normalize=True,
bias=False, **kwargs):
super(GraphSage, self).__init__(**kwargs)
self.in_channels = in_channels
self.out_channels = out_channels
self.normalize = normalize
self.lin_l = torch.nn.Linear(in_channels, out_channels)
self.lin_r = torch.nn.Linear(in_channels, out_channels)
############################################################################
# TODO: Your code here!
# Define the layers needed for the message and update functions below.
# self.lin_l is the linear transformation that you apply to embedding
# for central node.
# self.lin_r is the linear transformation that you apply to aggregated
# message from neighbors.
# Don't forget the bias!
# Our implementation is ~2 lines, but don't worry if you deviate from this.
############################################################################
self.reset_parameters()
def reset_parameters(self):
self.lin_l.reset_parameters()
self.lin_r.reset_parameters()
def forward(self, x, edge_index, size=None):
out = None
wragg = self.lin_r(self.propagate(edge_index, size=size, x=x))
wlhvl = self.lin_l(x)
out = wragg+wlhvl
if self.normalize:
out = torch.nn.functional.normalize(out)
else:
pass
""""""
############################################################################
# TODO: Your code here!
# Implement message passing, as well as any post-processing (our update rule).
# 1. Call the propagate function to conduct the message passing.
# 1.1 See the description of propagate above or the following link for more information:
# https://pytorch-geometric.readthedocs.io/en/latest/notes/create_gnn.html
# 1.2 We will only use the representation for neighbor nodes (x_j), so by default
# we pass the same representation for central and neighbor nodes as x=(x, x).
# 2. Update our node embedding with skip connection from the previous layer.
# 3. If normalize is set, do L-2 normalization (defined in
# torch.nn.functional)
#
# Our implementation is ~5 lines, but don't worry if you deviate from this.
############################################################################
return out
def message(self, x_j):
out = None
out = x_j
############################################################################
# TODO: Your code here!
# Implement your message function here.
# Hint: Look at the formulation of the mean aggregation function, focusing on
# what message each neighboring node passes.
#
# Our implementation is ~1 lines, but don't worry if you deviate from this.
############################################################################
return out
def aggregate(self, inputs, index, dim_size=None):
out = None
# The axis along which to index number of nodes.
node_dim = self.node_dim
agg = torch_scatter.scatter(src=inputs, index=index, dim=node_dim, dim_size=dim_size, reduce='mean')
out = agg
############################################################################
# TODO: Your code here!
# Implement your aggregate function here.
# See here as how to use torch_scatter.scatter:
# https://pytorch-scatter.readthedocs.io/en/latest/functions/scatter.html#torch_scatter.scatter
#
# Our implementation is ~1 lines, but don't worry if you deviate from this.
############################################################################
return out
import torch.optim as optim
def build_optimizer(args, params):
weight_decay = args.weight_decay
filter_fn = filter(lambda p : p.requires_grad, params)
if args.opt == 'adam':
optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay)
elif args.opt == 'sgd':
optimizer = optim.SGD(filter_fn, lr=args.lr, momentum=0.95, weight_decay=weight_decay)
elif args.opt == 'rmsprop':
optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay)
elif args.opt == 'adagrad':
optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay)
if args.opt_scheduler == 'none':
return None, optimizer
elif args.opt_scheduler == 'step':
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate)
elif args.opt_scheduler == 'cos':
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart)
return scheduler, optimizer
import time
import networkx as nx
import numpy as np
import torch
import torch.optim as optim
from tqdm import trange
import pandas as pd
import copy
from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader
import torch_geometric.nn as pyg_nn
import matplotlib.pyplot as plt
def train(dataset, args):
print("Node task. test set size:", np.sum(dataset[0]['test_mask'].numpy()))
print()
test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False)
# build model
model = GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes,
args)
scheduler, opt = build_optimizer(args, model.parameters())
# train
losses = []
test_accs = []
best_acc = 0
best_model = None
for epoch in trange(args.epochs, desc="Training", unit="Epochs"):
total_loss = 0
model.train()
for batch in loader:
opt.zero_grad()
pred = model(batch)
label = batch.y
pred = pred[batch.train_mask]
label = label[batch.train_mask]
loss = model.loss(pred, label)
loss.backward()
opt.step()
total_loss += loss.item() * batch.num_graphs
total_loss /= len(loader.dataset)
losses.append(total_loss)
if epoch % 10 == 0:
test_acc = test(test_loader, model)
test_accs.append(test_acc)
if test_acc > best_acc:
best_acc = test_acc
best_model = copy.deepcopy(model)
else:
test_accs.append(test_accs[-1])
return test_accs, losses, best_model, best_acc, test_loader
def test(loader, test_model, is_validation=False, save_model_preds=False, model_type=None):
test_model.eval()
correct = 0
# Note that Cora is only one graph!
for data in loader:
with torch.no_grad():
# max(dim=1) returns values, indices tuple; only need indices
pred = test_model(data).max(dim=1)[1]
label = data.y
mask = data.val_mask if is_validation else data.test_mask
# node classification: only evaluate on nodes in test set
pred = pred[mask]
label = label[mask]
if save_model_preds:
print("Saving Model Predictions for Model Type", model_type)
data = {}
data['pred'] = pred.view(-1).cpu().detach().numpy()
data['label'] = label.view(-1).cpu().detach().numpy()
df = pd.DataFrame(data=data)
# Save locally as csv
df.to_csv('CORA-Node-' + model_type + '.csv', sep=',', index=False)
correct += pred.eq(label).sum().item()
total = 0
for data in loader.dataset:
total += torch.sum(data.val_mask if is_validation else data.test_mask).item()
return correct / total
class objectview(object):
def __init__(self, d):
self.__dict__ = d
if 'IS_GRADESCOPE_ENV' not in os.environ:
for args in [
{'model_type': 'GraphSage', 'dataset': 'cora', 'num_layers': 2, 'heads': 1, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 500, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},
]:
args = objectview(args)
for model in ['GraphSage']:
args.model_type = model
# Match the dimension.
if model == 'GAT':
args.heads = 2
else:
args.heads = 1
if args.dataset == 'cora':
dataset = Planetoid(root='/tmp/cora', name='Cora')
else:
raise NotImplementedError("Unknown dataset")
test_accs, losses, best_model, best_acc, test_loader = train(dataset, args)
print("Maximum test set accuracy: {0}".format(max(test_accs)))
print("Minimum loss: {0}".format(min(losses)))
# Run test for our best model to save the predictions!
test(test_loader, best_model, is_validation=False, save_model_preds=True, model_type=model)
print()
plt.title(dataset.name)
plt.plot(losses, label="training loss" + " - " + args.model_type)
plt.plot(test_accs, label="test accuracy" + " - " + args.model_type)
plt.legend()
plt.show()