GCN实现节点分类任务

数据处理,数据集采用cora数据集

import dgl
from dgl.data import DGLDataset
import torch
import os
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import torch.nn.functional as F

class CoraDataset(DGLDataset):
    def __init__(self):
        super().__init__(name='cora')

    def process(self):
        nodes_data = pd.read_csv(r'../data/cora/cora.content', sep='\t', header=None)
        edges_data = pd.read_csv(r'../data/cora/cora.cites', sep='\t', header=None)
        node_f = nodes_data.iloc[:, 1:-1]
        node_len = nodes_data.iloc[:, 0]
        node_l = nodes_data.iloc[:, -1]
        edges_s = edges_data.iloc[:, 0]
        edges_d = edges_data.iloc[:, 1]
        idx = np.array(node_len, dtype=np.int32)

        idx_map = {j: i for i, j in enumerate(idx)}
        #print(idx_map)

        node_features = torch.from_numpy(node_f.to_numpy())

        node_labels = torch.from_numpy(node_l.astype('category').cat.codes.to_numpy().astype(np.int64))
        edges_src = torch.from_numpy(np.array(list(map(idx_map.get, edges_s))))
        edges_dst = torch.from_numpy(np.array(list(map(idx_map.get, edges_d))))



        #print(edges_src, edges_dst)
        self.graph = dgl.graph((edges_src, edges_dst), num_nodes=nodes_data.shape[0])
        self.graph.ndata['feat'] = node_features
        self.graph.ndata['label'] = node_labels

        n_nodes = nodes_data.shape[0]
        n_train = int(n_nodes * 0.6)
        n_val = int(n_nodes * 0.2)
        train_mask = torch.zeros(n_nodes, dtype=torch.bool)
        val_mask = torch.zeros(n_nodes, dtype=torch.bool)
        test_mask = torch.zeros(n_nodes, dtype=torch.bool)
        train_mask[:n_train] = True
        val_mask[n_train:n_train + n_val] = True
        test_mask[n_train + n_val:] = True
        self.graph.ndata['train_mask'] = train_mask
        self.graph.ndata['val_mask'] = val_mask
        self.graph.ndata['test_mask'] = test_mask

    def __getitem__(self, i):
        return self.graph

    def __len__(self):
        return 1




数据 导入neo4j中查看是否存在孤立节点

#查找入度为0的节点
match (n) where not()-[]-(n) return n

查找出度为0的节点
match (n) where not(n)-[]-() return n

模型搭建及训练

from dgl.nn import GraphConv
import torch.nn as nn
import torch.nn.functional as F
import torch
import dgl
from DLG_GCN.dataprocess import CoraDataset
import matplotlib.pyplot as plt
import numpy as np


class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, num_classes)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h




def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.0005)
    best_val_acc = 0
    best_test_acc = 0

    fig_loss = []
    fig_test_acc = []
    fig_val_acc = []

    features = g.ndata['feat']

    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']

    for epoch in range(1000):
        logits = model(g, features)
        pred = logits.argmax(1)

        loss = F.cross_entropy(logits[train_mask], labels[train_mask])

        # Compute accuracy on training/validation/test
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
        test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

        fig_loss.append(loss)
        fig_val_acc.append(val_acc)
        fig_test_acc.append(test_acc)

        if best_val_acc < val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 5 == 0:
            print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
                epoch, loss, val_acc, best_val_acc, test_acc, best_test_acc))
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()
    ln1 = plt.plot(np.arange(len(fig_loss)), fig_loss, 'r', label='loss')
    ln2 = plt.plot(np.arange(len(fig_test_acc)), fig_test_acc, 'g', label='test_acc')
    ln3 = plt.plot(np.arange(len(fig_val_acc)), fig_val_acc, 'b', label='val_acc')
    ax1.set_xlabel('iteration')
    ax2.set_ylabel('training loss')
    ax2.set_ylabel('training accuracy')

    lns = ln1 + ln2 +ln3
    labels = ["train_loss", "test_acc", "val_acc"]
    # labels = [l.get_label() for l in lns]
    plt.legend(lns, labels, loc='upper left')
    plt.grid(True)

    plt.savefig("./mydata/train_val_test.png")

    plt.figure(2)
    plt.plot(np.arange(len(fig_loss)), fig_loss, 'r', label='loss')
    plt.legend(labels=['loss'])
    plt.grid(True)
    plt.savefig("./mydata/train.png")

    plt.figure(3)
    plt.plot(np.arange(len(fig_val_acc)), fig_val_acc, 'g', label='val_accuracy')
    plt.legend(labels=["test_acuracy"])
    plt.grid(True)
    plt.savefig("./mydata/val.png")

    plt.figure(4)
    plt.plot(np.arange(len(fig_test_acc)), fig_test_acc, 'b', label='test_accuracy')
    plt.legend(labels=["test_acuracy"])
    plt.grid(True)
    plt.savefig("./mydata/test.png")

    plt.show()

cora_data = CoraDataset()
graph = cora_data[0]

g = dgl.to_bidirected(graph, copy_ndata=True)

model = GCN(g.ndata['feat'].shape[1], 16, 7)

train(g, model)


训练结果

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值