![](https://img-blog.csdnimg.cn/20200911132335937.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911132513167.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911132923492.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911132943967.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911133318738.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
from sklearn.cluster import KMeans
k = 4
model = KMeans(k,random_state = 420)
# 第一种:直接fit自动会把y标签存储到labels_属性下
model.fit(data)
y_pred = model.labels_
centers = model.cluster_centers_
SSE = model.inertia_
# 第二种:fit和predict一起,适用于小数据
y_pred_2 = model.fit_predict(data)
# 第三种:单独predict适用于大数据
model.fit(data)
y_pred_3 = model.predict(data)
![](https://img-blog.csdnimg.cn/20200911133426893.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911133511277.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911133538601.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911133608697.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911133634333.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/2020091113394360.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples,silhouette_score
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
from sklearn.datasets import make_blobs
# 准备数据
data_x = pd.DataFrame(make_blobs()[0],columns = ['x1','x2'])
for i in range(2,5):
# 作图准备
fig,(ax1,ax2) = plt.subplots(1,2)
fig.set_size_inches(18,7)
ax1.set_xlim([-0.1,1])
ax1.set_ylim(0,data_2_x.shape[0]+(i+1)*10)
# 聚类
model = KMeans(i,random_state=0).fit(data_x)
y_pred = pd.DataFrame(model.labels_)
silhouette_all = silhouette_score(data_x,y_pred)
silhouette_one = pd.DataFrame(silhouette_samples(data_x,y_pred))
# silhouette_one = silhouette_all.mean()
data = pd.concat([data_x,y_pred,silhouette_one],axis = 1)
data.columns = ['x1','x2','y_pred','silh']
# 画轮廓系数图
y_lower = 10
for j in range(i) :
# 提取轮廓系数并降序排序
silh = data[data['y_pred']==j].sort_values(
by = 'silh',ascending = False)['silh']
# 画左边条形图
size_silh = silh.shape[0]
color = cm.nipy_spectral(float(j)/i)
ax1.fill_betweenx(np.arange(y_lower, y_lower+size_silh)
,silh
,facecolor=color
,alpha=0.7)
# 标明y类别
ax1.text(-0.05
, y_lower + 0.5 * size_silh
, str(j))
y_lower = y_lower+size_silh
# 标题
ax1.set_title("The silhouette plot for the various clusters.")
ax1.set_xlabel("The silhouette coefficient values")
ax1.set_ylabel("Cluster label")
# 画整体轮廓系数平均值
ax1.axvline(x=silhouette_all, color="red", linestyle="--")
# 画右边样本分布散点图
colors = cm.nipy_spectral(y_pred[0].astype(float) / i)
ax2.scatter(data.iloc[:,0],data.iloc[:,1],c = colors)
# 画样本质心点
centers = model.cluster_centers_
ax2.scatter(centers[j][0], centers[j][1], marker='x',
c="red", alpha=1, s=200)
# 标题
ax2.set_title("The visualization of the clustered data.")
ax2.set_xlabel("Feature space for the 1st feature")
ax2.set_ylabel("Feature space for the 2nd feature")
plt.suptitle(("For n_clusters = %d "
"The average silhouette_score is : %f" %(i,silhouette_all)),
fontsize=25, fontweight='bold')
# plt.suptitle(("Silhouette analysis for KMeans clustering on sample data "
# "with n_clusters = %d" % n_clusters),
# fontsize=14, fontweight='bold')
![](https://img-blog.csdnimg.cn/20200911134025434.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911134054857.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911134118864.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911134153428.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911134212614.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)
![](https://img-blog.csdnimg.cn/20200911134238826.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80MTcxMjgwOA==,size_16,color_FFFFFF,t_70)