满意答案
JJW7890
2016.09.08
采纳率:50% 等级:11
已帮助:5140人
12345678910111213141516171819202122232425262728293031323334353637383940# -*- coding: utf-8 -*-from sklearn.cluster import KMeansfrom sklearn.externals import joblibimport numpy final = open('c:/test/final.dat' , 'r') data = [line.strip().split('\t') for line in final]feature = [[float(x) for x in row[3:]] for row in data] #调用kmeans类clf = KMeans(n_clusters=9)s = clf.fit(feature)print s #9个中心print clf.cluster_centers_ #每个样本所属的簇print clf.labels_ #用来评估簇的个数是否合适,距离越小说明簇分的越好,选取临界点的簇个数print clf.inertia_ #进行预测print clf.predict(feature) #保存模型joblib.dump(clf , 'c:/km.pkl') #载入保存的模型clf = joblib.load('c:/km.pkl') '''#用来评估簇的个数是否合适,距离越小说明簇分的越好,选取临界点的簇个数for i in range(5,30,1): clf = KMeans(n_clusters=i) s = clf.fit(feature) print i , clf.inertia_'''
00分享举报