from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import pandas as pd
data1 = pd.read_excel('data4.xlsx')
k_values = range(2, 11)
# 肘部法则
sse = []
for k in k_values:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(data1)
sse.append(kmeans.inertia_)
plt.figure(figsize=(10, 6))
plt.plot(k_values, sse, 'bx-')
plt.xlabel('k')
plt.ylabel('SSE')
plt.title('The Elbow Method showing the optimal k')
plt.show()
# 轮廓系数
sil_scores = []
for k in k_values:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(data1)
score = silhouette_score(data1, kmeans.labels_)
sil_scores.append(score)
plt.figure(figsize=(10, 6))
plt.plot(k_values, sil_scores, 'bx-')
plt.xlabel('k')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Score showing the optimal k')
plt.show()
k_opt_sil = k_values[sil_scores.index(max(sil_scores))]
print(k_opt_sil)