针对karate_club数据集,做了谱聚类。由于是2-way clustering,比较简单,得到了图的新的表示空间之后,没有做k-means,仅仅针对正规化后的拉普拉斯矩阵的第二特征值做了符号判断,这和Spectral Clustering Tutorial 一文中的描述一致。
引用了numpy scipy matplotlib networkx包
#coding=utf-8
#MSC means Multiple Spectral Clustering
import numpy as np
import scipy as sp
import scipy.linalg as linalg
import networkx as nx
import matplotlib.pyplot as plt
def getNormLaplacian(W):
"""input matrix W=(w_ij)
"compute D=diag(d1,...dn)
"and L=D-W
"and Lbar=D^(-1/2)LD^(-1/2)
"return Lbar
"""
d=[np.sum(row) for row in W]
D=np.diag(d)
L=D-W
#Dn=D^(-1/2)
Dn=np.power(np.linalg.matrix_power(D,-1),0.5)
Lbar=np.dot(np.dot(Dn,L),Dn)
return Lbar
def getKSmallestEigVec(Lbar,k):
"""input
"matrix Lbar and k
"return
"k smallest eigen values and their corresponding eigen vectors
"