python实现的基于NMF的多图聚类算法

该博客介绍了基于Python实现的两种多图聚类算法:NMF(非负矩阵分解)和Mjnmf。首先,详细讲解了NMF的基本原理和实现步骤,包括图的邻接矩阵表示、相似度矩阵计算以及迭代更新过程。然后,介绍了Mjnmf算法,该算法融合了高阶相似性和图的结构信息。文中提供了完整的代码实现,并给出了数据预处理和迭代更新的方法。最后,给出了数据集的链接。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

python实现的基于NMF的多图聚类算法的封装

代码均由论文复现



前言

怕忘记干了这个事,后面再来补充说明


参考论文

  • Identification of multi-layer networks community by fusing nonnegative matrix factorization and topological structural information
  • Graph Learning for Multiview Clustering

一、NMF

代码

# -*- coding: utf-8 -*-


import os
import sys
import numpy as np
import networkx as nx
import sklearn.preprocessing as nor


def graphsToMatrix(graphs):
  """
    ## 返回图的临接矩阵表示
  """
  if isinstance(graphs, list):
    return [np.asarray(nx.adjacency_matrix(g,weight='weight').todense(),dtype='float32') for g in graphs]
  else:
    return np.asarray(nx.adjacency_matrix(graphs,weight='weight').todense(),dtype='float32')


def getRandomData(xNum,yNum):
  """
    # 获取随机的数据
  """
  return np.random.rand(xNum,yNum)


class NMF(object):
  # 最小分母
  minTrueNum = 10 ** -8
  maxIterNum = 1000
  alpha = 10 ** -2
  # 聚类的数量
  k = 4
  # 节点的数量
  nodesNum = 0
  layerNum = 0
  # 图数据的存储
  graphs = []
  # 图的临接矩阵表示
  wItems = []
  #相似度矩阵
  sItems = []
  # 左侧的综合矩阵
  B = []
  # 右侧的系数矩阵
  fItems = []
  # 错误的分数
  j = [100 ** 4]

  def __init__(self, graphs, k):
    self.graphs = graphs
    self.wItems = graphsToMatrix(graphs)
    self.sItems = [self.simplify(matrix) for matrix in self.wItems]
    # self.wItems = self.sItems
    
    self.k = k
    self.nodesNum  = graphs[0].number_of_nodes()
    self.layerNum = len(graphs)
    self.__initialize()
  
  def __initialize(self):
    self.B = getRandomData(self.nodesNum, self.k)
    self.fItems = [getRandomData(self.k,self.nodesNum) for i in range(self.layerNum)]
  
  def calCulateErr(self):
    """
      ## 计算本轮的误差值
    """
    itemErr = sum([np.sum((w - self.B @k) ** 2) for w,k in zip(self.wItems,self.fItems)])
    self.j.append(itemErr)
    return itemErr

  def updateB(self):
    """
      ## 迭代更新B矩阵
    """
    molecule = sum([w @ f.T for w,f in zip(self.wItems,self.fItems)])
    denominator = self.B @ sum([f @ f.T for f in self.fItems])
    self.B = self.B * (molecule / np.maximum(denominator, self.minTrueNum))

  def updateK(self):
    """
      ## 更新右侧的系数矩阵
    """
    molecules = [self.B.T @ w for w in self.wItems]
    denominators = [self.B.T @ self.B @ f for f in self.fItems]
    self.fItems = [f * (k1 / np.maximum(k2, self.minTrueNum)) for k1,k2,f in zip(molecules, denominators,self.fItems)]
    # self.fItems = [f * ((self.B.T @ w) / np.maximum(self.B.T @ self.B @ f , self.minTrueNum)) 
    #               for f,w in zip(self.fItems, self.wItems)]

  def simplify(self, w):
    """
      # 计算获得相似度矩阵
    """
    cs = nor.normalize(w,axis=0,norm="l2")
    re = cs.T @cs
    np.fill_diagonal(re, 1)
    return re

  def fit(self):
    """
      ## 运行进行迭代更新
    """
    for i in range(self.maxIterNum):
      self.updateB()
      self.updateK()
      errRate = self.calCulateErr()
      # print(f"iter:: {i} err -> {errRate}")
      if errRate < self.alpha:
        print(f"end iter in {i} 轮 -> errRate::{errRate}")


  def getClusterLabel(self):
    """
      ## 获得得到的分类标签
    """
    labels = np.argmax(self.B, 1)
    return labels.tolist()

# from tools import getGraphData

# def runTest():
#   graphs = getGraphData("CELE")
#   m = NMF(graphs, 5)

#   m.fit()
#   print(m.getClusterLabel())


if __name__ == '__main__':
  # runTest()

  pass



二、Mjnmf

代码

# -*- coding: utf-8 -*-

from logging import error
from numpy.linalg import inv
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from sklearn import metrics
import scipy.sparse as sp
import sklearn.preprocessing as nor
from copy import deepcopy
from scipy.io import loadmat
from munkres import  Munkres
from sklearn.cluster import KMeans
import networkx as nx
# import loadData

def getRandomData(xNum,yNum):
  """
    # 获取随机的数据
  """
  return np.random.rand(xNum,yNum)

def read_adj(filename):
    """
        读取txt文件获取每一重图中的信息
        return :
        ``graphs`` -> 代表每重图对应的邻接矩阵的表示 
        ``nodes`` -> 每一重图中的点
        ``layers`` -> 不同的layer对应的id
    """
    df = pd.read_csv(filename, header=None, sep=' ', names=['layerID', 'source', 'target', 'weight'], dtype='int32')
    layers = list(set(df['layerID'].tolist()))
    graphs = []
    nodes = []
    for i in layers:
        g = nx.from_pandas_edgelist(df[df['layerID'] == i], source='source', target='target',edge_attr='weight')
        nodelist=sorted(g.nodes())
        L = np.asarray(nx.adjacency_matrix(g, nodelist=nodelist,weight='weight').todense(),dtype='float32')
        graphs.append(L)
        nodes.append(nodelist)
    return graphs, nodes,layers

class Mjnmf(object):
  """
    # Mjnmf 方法的封装
  """
  lower_control = 10 ** -10
  nodeNum = 0
  layerNum = 3
  l = 4
  theta = 0.1
  beta = 0.1
  alpha = 1
  lamda = 10
  iterNum = 700
  errorradio = 10 ** -6
  k = 4
  dataSet = []
  WItems =[]
  fItems = []
  h = []
  b = []
  wGroups = []
  uItems = []
  sItems = []
  errItems = []
  e = [10 **7] 
  
  def __init__(self,dataSet, k = 4, layerNum =None , nodeNum = None):
    """
      # 设置初始化的数据
    """
    self.dataSet = dataSet
    if layerNum == None: self.layerNum = dataSet.shape[0]
    else : self.layerNum = layerNum
    if nodeNum == None: self.nodeNum = dataSet[0].shape[1]
    else :self.nodeNum = nodeNum

    print(f"初始化 layerNum{self.layerNum} :: nodeNum{self.nodeNum}")
    self.k = k
    self.WItems = [self.constructkernal(item) for item in dataSet ]
    self.__runInitial()

  def __runInitial(self):
    self.fItems = [getRandomData(self.nodeNum,self.k) for i in range(self.nodeNum)  ]
    self.h = getRandomData(self.nodeNum,self.k)
    self.b = getRandomData(self.nodeNum,self.k)
    self.wGroups = [self.high_order(WItem) for WItem in self.WItems]

    self.sItems = [self.similarity(adj) for adj in self.dataSet]
    for index,item in enumerate(self.sItems):
      np.fill_diagonal(self.sItems[index],1)
    self.uItems = [self.cal_sim(sItem) for sItem in self.sItems]
    self.wGroups = [self.cal_sim(wItem) for wItem in self.wGroups]

  def caldistance(self,adj):
    """
        ## 计算获得距离矩阵
    """
    '''numberoffeature = (adj.shape)[0]'''
    pingfanghe = np.sum(adj ** 2,0)
    jiaocha = adj.T @ adj
    distance = np.sqrt(pingfanghe.reshape(self.nodeNum,1) + pingfanghe.reshape(1,self.nodeNum)- 2 * jiaocha)
    return distance
  
  def constructkernal(self,adjcency):
    """初始化得到 W 对应的矩阵"""
    dis = self.caldistance(adjcency)
    sig = np.median(dis)
    '''sig = 1'''
    fenzi = dis ** 2
    fenmu = max(2 *(sig ** 2), self.lower_control)
    wkernal = np.exp(-fenzi / fenmu)
    np.fill_diagonal(wkernal,0)
    return wkernal

  def high_order(self,W):
    P = np.zeros((self.nodeNum,self.nodeNum))
    temp = 1
    for i in range(self.l):
        if i == 0:
            A = deepcopy(W)
        else:
            A = A @ W
        temp *= (i+1)
        P += ((A * (self.theta ** i)) / temp)
    return P

  def similarity(self,W):
    cs = nor.normalize(W,axis=0,norm="l2")
    re = cs.T @cs
    return re

  def cal_sim(self,s):
    """
      对矩阵进行归一化处理
    """
    re = nor.normalize(s,axis=1,norm="l1")
    return re
  
  def b_update(self,B):
    fenzi = sum([w @ f for w,f in zip(self.wGroups, self.fItems)])
    fenmu = B @ sum([f.T @ f for f in self.fItems])
    B = B * (fenzi / np.maximum(fenmu, self.lower_control))
    return B

  def f_update(self,w,u,f):
    fenzi = w.T @ self.b + self.beta * (u @ self.h)    
    fenmu = f @ (self.b.T @ self.b) + self.beta * f
    '''f = f * np.sqrt(fenzi / np.maximum(fenmu, lower_control))'''
    f = f * (fenzi / np.maximum(fenmu, self.lower_control))
    return f
  
  def h_update(self,H):
    uuh = 2 * self.beta * sum([u.T@(u @ H) for u in self.uItems])
    sh = 4 * self.alpha * sum([s.T@ H for s in self.sItems])
    uf = 2 * self.beta * sum([u.T @ f for u,f in zip(self.uItems,self.fItems)])
    
    hhh = 8 * (self.layerNum * self.alpha + self.lamda) * (H @ (H.T @ H))
    fenzi = -uuh + np.sqrt((uuh * uuh) + (2*hhh)* (sh + uf + 4 * self.lamda * H))    
    H = H * np.sqrt(fenzi / np.maximum(hhh, self.lower_control))
    return H
  
  def u_update(self,u,f):
    fenzi = f @ (self.h.T)
    fenmu = u @ self.h @ (self.h.T)
    u = u * (fenzi / np.maximum(fenmu, self.lower_control))
    return u

  def error(self,w,f,s,u):
    e1 =  np.sum((w - self.b @ f.T)**2) 
    e2 =  self.alpha * np.sum((s - self.h @ self.h.T) ** 2) 
    e3 =  self.beta * np.sum((f - u @ self.h)**2)
    '''e4 =  lamda * np.linalg.norm(h.T @ h - np.eye(k),2)'''
    err = e1 + e2 + e3 
    return err
  
  def runIter(self):
    """
      # 运行进行迭代
    """
    for i in range(self.iterNum):
      self.b= self.b_update(self.b)
      self.fItems = [ self.f_update(w,u,f) for w,u,f in zip(self.wGroups,self.uItems, self.fItems)]
      self.uItems = [self.u_update(u,f) for u,f in zip(self.uItems,self.fItems)]
      self.h = self.h_update(self.h)

      self.errItems = [self.error(w,f,s,u) for w,f,s,u in zip(self.wGroups,self.fItems, self.sItems, self.uItems)]
      self.e.append(sum(self.errItems))

      # print(f"iter::{i} -> {self.e[-1]}")
      if abs(self.e[-1] - self.e[-2]) / self.e[-1] <= self.errorradio:
        print(f"结束于 {i}")
        break
  
  def getRes(self):
    """
      # 获得聚类的结果
    """
    ans = np.argmax(self.h, 1)
    return ans

def dataTrans(fileName):
  graphs_adj, nodesF, _ = read_adj('./DataSet/' + fileName + '.txt')
  return graphs_adj, nodesF

def main(fileName:str,clusterNum:int):
  """
    :params
    :``fileName`` 对应的文件名
    :``clusterNum`` 对应的文件名
  """
  data, nodes = dataTrans(fileName)
  d = np.array(data)
  m = Mjnmf(d, k = clusterNum)
  m.runIter()
  return m.getRes().tolist(), nodes[0]


if __name__ == '__main__':
  print(main("CELE"))

数据

网盘
提取码: es4o

总结

有时间来补充非负矩阵分解的知识

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值