import os
import torch
from torch_geometric.data import InMemoryDataset, download_url
from torch_geometric.io import read_planetoid_data
classPlanetoidPubMed(InMemoryDataset):
url ='https://github.com/kimiyoung/planetoid/raw/master/data'def__init__(self, root, split="public", num_train_per_class=20,
num_val=500, num_test=1000, transform=None,
pre_transform=None):super(PlanetoidPubMed, self).__init__(root, transform, pre_transform)
self.data, self.slices = torch.load(self.processed_paths[0])
self.split = split
assert self.split in['public','full','random']if split =='full':
data = self.get(0)
data.train_mask.fill_(True)
data.train_mask[data.val_mask | data.test_mask]=False
self.data, self.slices = self.collate([data])elif split =='random':
data = self.get(0)
data.train_mask.fill_(False)for c inrange(self.num_classes):
idx =(data.y == c).nonzero(as_tuple=False).view(-1)
idx = idx[torch.randperm(idx.size(0))[:num_train_per_class]]
data.train_mask[idx]=True
remaining =(~data.train_mask).nonzero(as_tuple=False).view(-1)
remaining = remaining[torch.randperm(remaining.size(0))]
data.val_mask.fill_(False)
data.val_mask[remaining[:num_val]]=True
data.test_mask.fill_(False)
data.test_mask[remaining[num_val:num_val + num_test]]=True
self.data, self.slices = self.collate([data])
@propertydefraw_dir(self):return os.path.join(self.root,'raw')
@propertydefprocessed_dir(self):return os.path.join(self.root,'processed')
@propertydefraw_file_names(self):
names =['x','tx','allx','y','ty','ally','graph','test.index']return[f'ind.pubmed.{name}'for name in names]
@propertydefprocessed_file_names(self):return'data.pt'defdownload(self):for name in self.raw_file_names:
download_url(f'{self.url}/{name}', self.raw_dir)defprocess(self):
data = read_planetoid_data(self.raw_dir,'pubmed')
data = data if self.pre_transform isNoneelse self.pre_transform(data)
torch.save(self.collate([data]), self.processed_paths[0])def__repr__(self):return f'{self.name}()'
dataset = PlanetoidPubMed('./input/Planetoid/PubMed')print(dataset.num_classes)# 3print(dataset[0].num_nodes)# 19717print(dataset[0].num_edges)# 88648print(dataset[0].num_features)# 500
构建PlanetoidPubMed的流程
首先检查数据原始文件是否已下载:
检查self.raw_dir目录下是否存在raw_file_names()属性方法返回的每个文件,
如有文件不存在,则调用download()方法执行原始文件下载。
其中self.raw_dir为osp.join(self.root, 'raw')。
其次检查数据是否经过处理:
首先检查之前对数据做变换的方法:检查self.processed_dir目录下是否存在pre_transform.pt文件:如果存在,意味着之前进行过数据变换,则需加载该文件获取之前所用的数据变换的方法,并检查它与当前pre_transform参数指定的方法是否相同;如果不相同则会报出一个警告,“The pre_transform argument differs from the one used in ……”。
接着检查之前的样本过滤的方法:检查self.processed_dir目录下是否存在pre_filter.pt文件,如果存在,意味着之前进行过样本过滤,则需加载该文件获取之前所用的样本过滤的方法,并检查它与当前pre_filter参数指定的方法是否相同,如果不相同则会报出一个警告,“The pre_filter argument differs from the one used in ……”。其中self.processed_dir为osp.join(self.root, 'processed')。
import os
from torch_geometric.utils import negative_sampling
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import train_test_split_edges
dataset = Planetoid(root='./input/Cora', name="Cora", transform=T.NormalizeFeatures())
data = dataset[0]print(data.edge_index.shape)# torch.Size([2, 10556])
data.train_mask = data.val_mask = data.test_mask = data.y =None
data = train_test_split_edges(data)for key in data.keys:print(key,getattr(data, key).shape)
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GATConv
classGAT(torch.nn.Module):def__init__(self, in_channels, hidden_channels, out_channels, seed=2021):super(GAT, self).__init__()
torch.manual_seed(seed)
self.conv1 = GATConv(in_channels, hidden_channels)
self.conv2 = GATConv(hidden_channels, out_channels)defforward(self, x, edge_index):
x = self.conv1(x, edge_index)
x = x.relu()
x = F.dropout(x, p=0.5, training=self.training)
x = self.conv2(x, edge_index)return x