模糊C均值聚类算法是一种常用的无监督分类方法,广泛的应用于数据挖掘领域中,主要融合了K均值聚类算法和模糊理论,算法步骤如下所示:
Python实现
import torch
import random
import numpy as np
from sklearn import datasets
def FCM(data, N_way, iteration):
# data : N*S, N为样本数,S为特征维数,tensor
# N_way : 类别数
#iteration:迭代次数
#初始化
N = data.shape[0]
U = torch.rand(N_way, N) #C*N,C为类别,N为样本
for i in range(N):
U[:,i] = U[:,i]/(U[:,i].sum())
index = range(N)
C = data[random.sample(index, N_way),:] #C*S,C为类别,N为样本
D = torch.zeros(N_way, N) #样本和类中心的距离矩阵
count = 0
#循环迭代
while True:
#计算聚类中心
u = 1/U.sum(dim=1).reshape((N_way,1))
c = torch.mm(U,data)
C = c*u
#更新划分矩阵
for i in range(N_way):
c_i = C[i,:]
c_i = c_i.unsqueeze(0).expand(N, -1)
d = (c_i - data)*(c_i - data)
d = d.sum(dim=1)
D[i,:] = d
for i in range(N):
mid = D[:,i]
U[:,i] = D[:,i].sum()/D[:,i]
U[:, i] = U[:, i] / (U[:, i].sum())
#判断是否满足终止条件
if(count>iteration):
break
count += 1
#或者根据U(count+1)-U(count)判断
return U,C
if __name__ == '__main__':
iris = datasets.load_iris()
n_sample, n_feature = iris.data.shape
data = iris.data
data = data.astype(np.float32)
data = torch.from_numpy(data)
U,C = FCM(data,3,100)
label = iris.target
count = 0
for i in range(150):
pre = torch.max(U[:,i],0)[1]
print(pre)
if pre==label[i]:
count += 1
print(count)