借助sklearn库实现kmeans聚类和轮廓系数计算
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
K = range(2,20) # 设置主题个数区间
coef = []
for k in K:
km = KMeans(n_clusters=k,random_state = 0).fit(feature) #构建kmeans模型并训练
score = silhouette_score(feature, km.labels_,sample_size=1000) # 计算对应模型的轮廓系数
coef.append(score)
可视化:
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(K,coef) # K为x轴输出,coef是y轴输出
plt.show()