只是做个测试,最终聚类出来的信息实际应用意义不大。大家可以用sklearn中的兰花数据集进行测试,。
我的数据集样式如下:
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import pandas as pd
x_sky_train = pd.read_csv('datadatadata1.csv',usecols = [2,3],header=None)
#将读取的数据转化为numpy数组
x_sky_train1 = x_sky_train.values
print(x_sky_train1)
#print(x_sky_train)
print(np.shape(x_sky_train1))
#构造聚类器
clf = KMeans(n_clusters=6)
#开始聚类
y_sky_train = clf.fit_predict(x_sky_train1)
#获得聚类标签
label_clf = clf.labels_
print(label_clf)
#获得聚类中心,保存在df_center的DataFrame中给数据加上标签
center = clf.cluster_centers_
df_center = pd.DataFrame(center, columns=['x', 'y'])
df = pd.DataFrame(x_sky_train1, index=label_clf, columns=['x', 'y'])
df1 = df[df.index==0]