一、原理步骤:
例1.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
#用scipy求解距离
from scipy.spatial.distance import cdist
#识别汉字的标签必须要加的
plt.rcParams[‘font.family’] = [‘sans-serif’]
plt.rcParams[‘font.sans-serif’] = [‘SimHei’]
#随机生成一个实数,范围在(0.5-1.5)之间
cluster1 = np.random.uniform(0.5,1.5,(2,10))
cluster2 = np.random.uniform(3.5,4.5,(2,10))
#hstack竖直拼接操作
X = np.hstack((cluster1,cluster2)).T
print(X)
plt.figure()
plt.axis([0,5,0,5])
plt.grid(True)
plt.plot(X[:,0],X[:,1],"k.")
K=range(1,10)
meandistortions=[]
for k in K:
kmeans = KMeans(n_clusters=k)
kmeans.fit(X)
meandistortions.append(sum(np.min(cdist(X,kmeans.cluster_centers_,"euclidean"),axis=1))/X.shape[0])
plt.plot(K,meandistortions,'bx-')
plt.xlabel('k')
plt.ylabel(u'平均畸变程度')
plt.title(u'用肘部法则来确定最佳的K值')
plt.show()
例2:
import numpy as np
from sklearn.cluster import KMeans
from sklearn import metrics
import matplotlib.pyplot as plt
#识别汉字的标签必须要加的
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.figure(figsize=(8,10))
plt.subplot(3,2,1)
x1 = np.array(range(10))
x2 = np.array([1,3,2,2,8,6,7,6,7,1,2,1,1,3])
X = np.array(list(zip(x1,x2))).reshape(len(x1),2)
plt.xlim([0,10])
plt.ylim([0,10])
plt.title("样本")
plt.scatter(X[:,0],X[:,1])
colors = ['b','g','r','c','m','y','k','b']
markers = ['o','s','D','v','^','p','*','+']
tests = [2,3,4,5,8]
subplot_counter = 1
for t in tests:
subplot_counter +=1
plt.subplot(3, 2, subplot_counter)
kmeans_model = KMeans(n_clusters=t).fit(X)
for i, l in enumerate(kmeans_model.labels_):
plt.plot(x1[i], x2[i], color=colors[l], marker=markers[l],ls='None')
plt.xlim([0, 10])
plt.ylim([0, 10])
plt.title('K = %s, 轮廓系数 = %.03f' % (t, metrics.silhouette_score(X, kmeans_model.labels_,metric='euclidean')))
plt.show()