import pandas as pd
from sklearn.cluster import KMeans
if __name__=='__main__':
inputfile = 'data.xlsx'
outputfile = 'data1.xlsx'
k = 3
iteration = 500
data = pd.read_excel(inputfile,index_col='ID')
data_zs = 1.0*(data-data.mean())/data.std()
model = KMeans(n_clusters=k, n_jobs=4, max_iter=iteration)
model.fit(data_zs)
r1 = pd.Series(model.labels_).value_counts()
r2 = pd.DataFrame(model.cluster_centers_)
r = pd.concat([r2, r1], axis=1)
print(r)
r = pd.concat([data, pd.Series(model.labels_, index=data.index)], axis=1)
r.columns = list(data.columns) + [u'聚类类别']
r.to_excel(outputfile)
一开始少打了这个命令
model.fit(data_zs)
就会报错,显示KMeans里无labels标签
第二次显示:raise ImportError('[joblib] Attempting to do parallel computing '
ImportError: [joblib] Attempting to do parallel computing without protecting your import on a system that does not support forking. To use parallel-computing in a script, you must protect your main loop using "if __name__ == '__main__'". Please see the joblib documentation on Parallel for more information
Traceback (most recent call last):