sklearn官网documentation
http://scikit-learn.org/dev/modules/generated/sklearn.cluster.KMeans.html
# K-Means Algorithm
import numpy as np
from sklearn.cluster import KMeans
def loadData(file):
tmp = np.loadtxt(file, dtype=np.str, delimiter=",")
data = tmp[0:1514, 0:5].astype(np.float) # load data
return data
if __name__ == '__main__':
handledData = loadData("handled300001.CSV")
estimator = KMeans(n_clusters=2)
estimator.fit_predict(handledData)# compute k-means clustering
prelabel = estimator.labels_# labels
centroids = estimator.cluster_centers_# centroids
# inertia = estimator.inertia_# centroid sum
res1 = []
res2 = []# save the two component clusterings
for items in range(len(handledData)):
if (int(prelabel[items]) == 0):
res1.append(handledData[items][0], handledData[items][1], handledData[items][2],\
handledData[items][3], handledData[items][4])
elif (int(prelabel[items]) == 1):
res2.append(handledData[items][0], handledData[items][1], handledData[items][2],\
handledData[items][3], handledData[items][4])
print(prelabel, '\n', centroids)
我看官网的document,貌似sklearn的kmeans不能输出不能直接输出各个cluster 就补充了后面的res1和res2来分别存储两个cluster