聚类是无标签的 就是无监督学习
整体代码:
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
data = pd.read_csv('nba.csv')
print(data.head())
minmax_scaler = MinMaxScaler()
# 标准化数据
X = minmax_scaler.fit_transform(data.iloc[:,1:])
print(X[:5])
# 肘部法则选择k值
loss = []
for i in range(2, 10):
model = KMeans(n_clusters=i).fit(X)
loss.append(model.inertia_)
plt.plot(range(2, 10), loss)
plt.xlabel('k')
plt.ylabel('loss')
plt.show()
k = 4
model = KMeans(n_clusters=k).fit(X)
# 将标签整合到原始数据上
data['clusters'] = model.labels_
print(data.head())
for i in range(k):
print('clusters:',i)
label_data = data[data['clusters'] == i].iloc[:,0]
print(label_data.values)