pytorch_geometric安装及工具包使用

github地址:GitHub - pyg-team/pytorch_geometric: Graph Neural Network Library for PyTorch

 一、安装

 下载

下载wheels文件:data.pyg.org/whl/

使用pip install 安装

安装后,使用pip install torch-geometric 安装

二、使用

1、任务1:KarateClub数据集,共34名会员,会员之间有社交关系;其中两个人有矛盾,要预测会员要站谁的队。

数据集说明:整个数据集是一个图,34个会员是34个点,会员之间的关系是边,共156个边;

目标:要对34个点进行分类

1)查询数据集

from torch_geometric.datasets import KarateClub

dataset = KarateClub()
data = dataset[0]  # Get the first graph object.

print(data)
Data(x=[34, 34], edge_index=[2, 156], y=[34], train_mask=[34])
  • data.x:带形状的节点特征矩阵[num_nodes 样本个数, num_node_features每个样本的特征维度]

  • data.edge_index:具有形状和类型的COO格式的图形连接[2, num_edges边的个数]torch.long

  • data.edge_attr:带形状的边缘特征矩阵[num_edges, num_edge_features]

  • data.y:要训练的目标(可以具有任意形状),例如,形状的节点级目标或形状的图形级目标[num_nodes, *][1, *]

  • data.pos:具有形状的节点位置矩阵[num_nodes, num_dimensions]

  • train_mask  有标签的点进行计算

edge_index:表示图的连接关系(start,end两个序列) - node features:每个点的特征 - node labels:每个点的标签 - train_mask:有的节点木有标签(用来表示哪些节点要计算损失)

edge_index

打印edge_index,结果如下

第一行和第二行分别是每个连接的点,两个个数一样

tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,
          1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,
          3,  3,  3,  3,  3,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,
          7,  7,  8,  8,  8,  8,  8,  9,  9, 10, 10, 10, 11, 12, 12, 13, 13, 13,
         13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 21,
         21, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27,
         27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31,
         31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33,
         33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33],
        [ 1,  2,  3,  4,  5,  6,  7,  8, 10, 11, 12, 13, 17, 19, 21, 31,  0,  2,
          3,  7, 13, 17, 19, 21, 30,  0,  1,  3,  7,  8,  9, 13, 27, 28, 32,  0,
          1,  2,  7, 12, 13,  0,  6, 10,  0,  6, 10, 16,  0,  4,  5, 16,  0,  1,
          2,  3,  0,  2, 30, 32, 33,  2, 33,  0,  4,  5,  0,  0,  3,  0,  1,  2,
          3, 33, 32, 33, 32, 33,  5,  6,  0,  1, 32, 33,  0,  1, 33, 32, 33,  0,
          1, 32, 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33,  2, 23,
         24, 33,  2, 31, 33, 23, 26, 32, 33,  1,  8, 32, 33,  0, 24, 25, 28, 32,
         33,  2,  8, 14, 15, 18, 20, 22, 23, 29, 30, 31, 33,  8,  9, 13, 14, 15,
         18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32]])
edge_index = data.edge_index
print(edge_index.t())

转置之后的结果:

tensor([[ 0,  1],
        [ 0,  2],
        [ 0,  3],
        [ 0,  4],
        [ 0,  5],
        [ 0,  6],
        [ 0,  7],
        [ 0,  8],
        [ 0, 10],
        [ 0, 11],
        [ 0, 12],
        [ 0, 13],
        [ 0, 17]]

  • edge_index:表示图的连接关系(start,end两个序列)
  • node features:每个点的特征
  • node labels:每个点的标签
  • train_mask:有的节点木有标签(用来表示哪些节点要计算损失)

 2)利用GCN进行分类

import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv

#定义类GCN
#点和边没有变化,变化的是点的特征维度
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(1234)
        #定义三层GCN,第一层输入34,输出4;第二层输入4,输出4;第三层输入4,输出2
        self.conv1 = GCNConv(dataset.num_features, 4) # 只需定义好输入特征和输出特征即可
        self.conv2 = GCNConv(4, 4)
        self.conv3 = GCNConv(4, 2)
        #每个点得到两维特征,进行4分类
        self.classifier = Linear(2, dataset.num_classes)

    def forward(self, x, edge_index):
        #输入特征x和邻居矩阵edge_index
        h = self.conv1(x, edge_index) # 输入特征与邻接矩阵(注意格式,上面那种)
        #使用激活函数
        h = h.tanh()
        # 得到新的特征h,邻接矩阵没变
        h = self.conv2(h, edge_index)
        h = h.tanh()
        h = self.conv3(h, edge_index)
        h = h.tanh()  
        
        # 分类层
        out = self.classifier(h)

        return out, h

model = GCN()
print(model)

训练模型

import time

model = GCN()
criterion = torch.nn.CrossEntropyLoss()  # Define loss criterion.
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  # Define optimizer.

def train(data):
    optimizer.zero_grad()  
    out, h = model(data.x, data.edge_index) #h是两维向量,主要是为了咱们画个图 
    #计算损失时仅使用tarin_mask为true的点计算,没有标签的不影响损失
    loss = criterion(out[data.train_mask], data.y[data.train_mask])  # semi-supervised
    #反向传播  
    loss.backward()
    #优化器优化  
    optimizer.step()  
    return loss, h

for epoch in range(401):
    loss, h = train(data)
    if epoch % 10 == 0:
        #画图
        visualize_embedding(h, color=data.y, epoch=epoch, loss=loss)
        time.sleep(0.3)

任务2:有多个图,针对图进行2分类

import torch
from torch_geometric.datasets import TUDataset #分子数据集:https://chrsmrrs.github.io/datasets/

dataset = TUDataset(root='data/TUDataset', name='MUTAG')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # 1.对各节点进行编码
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. 平均操作
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. 输出
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(hidden_channels=64)
print(model)
model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        print(out.shape)
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.

def test(loader):
    model.eval()
    correct = 0
    for data in loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)  
        ## 获取最高的概率分类
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(1, 171):
    train()
    train_acc = test(train_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}')

总结一下:

1、对点进行分类,如果有标注信息,loss仅需要计算标注的点的损失。即半监督学习。训练时,保证训练的模型是和分类一样的

2、对图进行分类,其实还是对各个节点进行特征编码,只不过现在多了一步聚合操作,把各个节点特征汇总成全局特征就相当于得到了整个图的编码:torch_geometric.nn.global_mean_pool

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值