SDNE图神经网络之小黑尝试

1.参数设定

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,Dataset
import torch.optim as optim
from argparse import ArgumentParser,ArgumentDefaultsHelpFormatter
import networkx as nx
def parse_args():
    parser = ArgumentParser(formatter_class = ArgumentDefaultsHelpFormatter,conflict_handler = 'resolve')
    # 输入文件
    parser.add_argument('--input',default = './data/cora/cora_edgelist.txt',help = 'Input graph file')
    # 训练结果embedding输出文件
    parser.add_argument('--output',default = './data/cora/Vec.emb',help = 'Output representation file')
    # 并行参数
    parser.add_argument('--workers',default = 8,type = int,help = 'Number of parallel processes')
    # 处理有/无权图
    parser.add_argument('--weighted',action = 'store_true',default = False,help = 'Treat graph as weighted')
    # epoch数量
    parser.add_argument('--epochs',default = 100,type = int,help = 'The training epochs of SDNE.')
    # dropout比例值
    parser.add_argument('--dropout',default = 0.5,type = float,help = 'Dropout rate (1 - keep probability)')
    parser.add_argument('--weight-decay',type = float,default = 5e-4,help = 'Weight for L2 loss on embedding matrix')
    # 学习率设置
    parser.add_argument('--lr',default = 0.001,type = float,help = 'learning rate')
    # 模型参数,一阶相似度和二阶相似度之间的比重
    parser.add_argument('--alpha',default = 1e-2,type = float,help = 'alpha is a hyperparameter in SDNE')
    # 图的稀疏性问题
    parser.add_argument('--beta',default = 5.,type = float,help = 'beta is a hyperparameter in SDNE.')
    parser.add_argument('--mu1',default = 1e-5,type = float,help = 'mul is a hyperparameter in SDNE')
    parser.add_argument('--mu2',default = 1e-4,type = float,help = 'mu2 is a hyperparameter in SDNE')
    # batch大小
    parser.add_argument('--bs',default = 100,type = int,help = 'batch size of SDNE')
    # 自编码器第一层神经元个数
    parser.add_argument('--nhid0',default = 1000,type = int,help = 'The first dim.')
    # 自编码器的第二层神经元个数
    parser.add_argument('--nhid1',default = 128,type = int,help = 'The second dim.')
    # 学习率步长设置
    parser.add_argument('--step_size',default = 10,type = int,help = 'The step size for lr.')
    # 学习率gamma值设置
    parser.add_argument('--gamma',default = 0.9,type = int,help = 'The gamma for lr')
    args = parser.parse_args(args = [])
    return args
args = parse_args()

2.读图函数

def Read_graph(file_name):
    edges = np.loadtxt(file_name).astype(np.int32)
    min_index = edges.min()
    max_index = edges.max()
    assert min_index in [0,1]   # 结点编号必须从0或1开始
    if min_index == 0:
        num_nodes = max_index + 1
    else:
        num_nodes = max_index
    Adj = np.zeros([num_nodes,num_nodes]).astype(np.int32)
    G = nx.Graph()
    for t in range(edges.shape[0]):
        G.add_edge(edges[t][0],edges[t][1])
        if min_index == 0:
            Adj[edges[t][0]][edges[t][1]] = 1
            Adj[edges[t][1]][edges[t][0]] = 1
        else:
            Adj[edges[t][0]+1][edges[t][1]+1] = 1
            Adj[edges[t][1]+1][edges[t][0]+1] = 1
    Adj = torch.FloatTensor(Adj)
    return G,Adj,num_nodes
# G,Adj,num_nodes = Read_graph(args.input)

3.模型定义

class MNN(nn.Module):
    def __init__(self,node_size,nhid0,nhid1,dropout,alpha):
        super(MNN,self).__init__()
        # Encoder层
        self.encoder0 = nn.Linear(node_size,nhid0)
        self.encoder1 = nn.Linear(nhid0,nhid1)
        # Decoder层
        self.decoder0 = nn.Linear(nhid1,nhid0)
        self.decoder1 = nn.Linear(nhid0,node_size)
        # dropout以及alpha
        self.dropout = nn.Dropout(dropout)
        self.alpha = alpha
    def forward(self,adj_batch,adj_mat,adj_b):
        t0 = F.leaky_relu(self.encoder0(adj_batch))
        t0 = F.leaky_relu(self.encoder1(t0))
        embedding = t0
        t0 = F.leaky_relu(self.decoder0(t0))
        # [batch_size,num_nodes]
        t0 = F.leaky_relu(self.decoder1(t0))
        # 一阶相似度
        # [batch_size,1]
        embedding_norm = torch.sum(embedding * embedding,dim = 1,keepdim=True)
        # [batch_size,batch_size]
        embedding_norm_ij = embedding_norm + embedding_norm.transpose(0,1)
        L_1 = torch.sum(adj_mat * (embedding_norm_ij - 2 * torch.mm(embedding,embedding.transpose(0,1))))
        #print(adj_b.shape)
        #print(adj_batch.shape)
        #print(t0.shape)
        L_2 = torch.sum(adj_b * (adj_batch - t0) * adj_b * (adj_batch - t0))
        return L_1,self.alpha * L_2,L_1+self.alpha * L_2
    def savector(self,adj_batch):
        t0 = F.leaky_relu(self.encoder0(adj_batch))
        embedding = F.leaky_relu(self.encoder1(t0))
        return t0
'''
model = MNN(3,500,128,0.3,0.6)
adj_batch = torch.Tensor([[0,1,1],[1,0,0],[0,0,1]]).float()
adj_mat = torch.Tensor([[0,1,1],[1,0,0],[0,0,1]]).float()
b_mat = torch.Tensor([[0,1,1],[1,0,0],[0,0,1]]).float()
L_1,L_2,L_1_2 = model(adj_batch,adj_mat,b_mat)
model.savector(adj_batch)
'''

4.DataSet定义

class DataLoad(Dataset):
    def __init__(self,num_nodes,Adj):
        super(DataLoad,self).__init__()
        self.num_nodes = num_nodes
        self.Adj = Adj
    def __getitem__(self,index):
        return index
    def __len__(self):
        return self.num_nodes

5.优化器定义与数据集准备

# 数据集配置
G, Adj, Node =  Read_graph(args.input)
Data = DataLoad(Node,Adj)
Data = DataLoader(Data,batch_size = args.bs,shuffle = True)
# 模型配置
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MNN(Node,args.nhid0,args.nhid1,args.dropout,args.alpha)
model = model.to(device)
model.train()
# 优化器配置
opt = optim.Adam(model.parameters(),lr = args.lr)
scheduler = optim.lr_scheduler.StepLR(opt,step_size=args.step_size,gamma=args.gamma)

6.模型训练与保存

for epoch in range(1,args.epochs+1):
    loss_sum,loss_L1,loss_L2,loss_reg = 0,0,0,0
    for index in Data:
        adj_batch = Adj[index]    # [batch_size,node_size]
        adj_mat = adj_batch[:,index]    # [batch_size,batch_size]
        b_mat = torch.ones_like(adj_batch)    # [batch_size,batch_size]
        b_mat[adj_batch != 0] = args.beta    # [batch_size,batch_size]
        
        # 损失函数计算与优化
        opt.zero_grad()
        L_1st,L_2nd,L_all = model(adj_batch,adj_mat,b_mat)
        L_reg = 0
        for parameter in model.parameters():
            L_reg += args.mu1*torch.sum(torch.abs(parameter) + args.mu2*parameter*parameter)
        L_loss = L_all + L_reg
        L_loss.backward()
        opt.step()
        # 损失函数记录
        loss_sum += L_loss
        loss_L1 += L_1st
        loss_L2 += L_2nd
        loss_reg += L_reg
    scheduler.step(epoch)
    print('loss for epoch %d is:' % epoch)
    print('loss_sum is %f'%loss_sum)
    print('loss_L1 is %f'%loss_L1)
    print('loss_L2 is %f'%loss_L2)
    print('loss_reg is %f'%loss_reg)
model.eval()
embedding = model.savector(Adj)
np.savetxt(args.output,embedding.detach().numpy())

7.图嵌入展示(使用SDNE压缩)

import numpy as np
import pylab
from sklearn.manifold import TSNE
X = np.loadtxt(args.output)
labels_data = np.loadtxt('./data/cora/cora_labels.txt', dtype=np.int)
tsne = TSNE(n_components=2,init = 'random')
Y = tsne.fit_transform(X[:,1:])
pylab.scatter(Y[:,0],Y[:,1],20,labels_data[:,1])
pylab.show()

在这里插入图片描述

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值