谱聚类推导过程较为繁琐,本文不作介绍,重点在于使用python语言实现算法。
谱聚类步骤:
1.建无向图,获取邻接矩阵W(对称矩阵)
2.计算拉普拉斯矩阵L L = D-W,其中D为对角阵,每个元素为W在该行求和
3.对L进行特征分解,获取最小的k个特征值对应的特征向量
4.将k个特征向量构造成矩阵V
5.i=1,…n,yi对应于V的第i行
6.利用K-means对yi进行聚类
代码:
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
class spectralclustering():
def __init__(self,k,n):#k表示KNN搜索范围,n表示聚类数量
self.k = k
self.n = n
self.__labels = None
def fit(self,x):
x = np.array(x)
N = len(x)
A = np.zeros(N,N) #距离矩阵
W = np.zeros(N,N) #邻接矩阵
D = np.zeros(N,N) #D矩阵
#计算距离矩阵A
for i in range(N):
for j in range(N):
A[i][j] = np.sqrt(np.sum((x[i]-x[j])**2))
A[j][i] = A[i][j] #距离矩阵应为对称矩阵
#计算邻接矩阵W(无向图)
#W矩阵相当于KNN,选取距离某点最近的K个点,值设为1,其他值设为0
for index,each in enumerate(A):
index_array = np.argsort(each)
W[index][index_array[1:self.k+1]] = 1
W = (W + W.T)/2 #保证W为对称矩阵
#计算D矩阵
for k in range(N):
D[k][k] = np.sum(W[k])
#计算拉普拉斯矩阵
L = D - W
#特征分解
lam, H = np.linalg.eig(L)
sorted_idx = np.argsort(lam)
eigenvalue = lam[sorted_idx]
eigenvector = H[:,sorted_idx]
V = eigenvector[:,:self.n]
if(isinstance(V[0][0],complex)): #判断是否虚数
V = abs(V)
#kmeans此处不再用python一行一行地写了...,直接从sklearn中调用或参考本人之前的博客
k_means = KMeans(init='k-means++', n_clusters=self.n, tol=1e-6)
k_means.fit(V)
self.__labels = k_means.labels_
def predict(self,data):
return np.copy(self.__labels)
#使用sklearn的公共数据集看一下聚类效果如何
def generate_dataset(N=300, noise=0.07, random_state=42, visualize=False):
from sklearn.datasets import make_moons
X, y = make_moons(N, noise=noise, random_state=random_state)
if visualize:
fig, ax = plt.subplots(figsize=(16,9))
ax.set_title('Test Dataset for Spectral Clustering', fontsize=18, fontweight='demi')
ax.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='viridis')
plt.show()
return X
if __name__ == '__main__':
# create dataset:
n = 2
k = 5
X = generate_dataset(visualize=False)
# spectral clustering estimation:
sc = spectralclustering(k,n)
sc.fit(X)
category = sc.predict(X)
# visualize:
color = ['red','blue','green','cyan','magenta']
labels = [f'Cluster{k:02d}' for k in range(K)]
for k in range(K):
plt.scatter(X[category == k][:,0], X[category == k][:,1], c=color[k], label=labels[k])
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.title('Spectral Clustering Testcase')
plt.show()
聚类效果: