from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
读取数据
data = pd.read_csv('nba .csv')
data.head()
数据标准化
minmax_scaler = MinMaxScaler()# 标准化数据
X = minmax_scaler.fit_transform(data.iloc[:,1:])
X[:5]
肘部法则
# 肘部法则
loss =[]for i inrange(2,10):
model = KMeans(n_clusters=i).fit(X)
loss.append(model.inertia_)
plt.plot(range(2,10),loss)
plt.xlabel('k')
plt.ylabel('loss')
plt.show()
选择最佳聚类效果
k =4
model = KMeans(n_clusters=k).fit(X)# 将标签整合到原始数据上
data['clusters']= model.labels_
data.head()
打印四个类别
for i inrange(k):print('clusters:',i)
label_data = data[data['clusters']== i].iloc[:,0]print(label_data.values)