三维绘图工具包:from mpl_toolkits.mplot3d import Axes3D
聚类分析:
from pandas import read_csv
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from sklearn import metrics
filename='wine.csv'
names=['Alcohol','MailicAcid','Ash','AlclinityOfAsh','Magnesium','TotalPhenols',
'Flavanoids','Nonflay','Proa','hue','OD280','Color','prol','class']
dataset=read_csv(filename,names=names)
dataset['class']=dataset['class'].replace(to_replace=[1,2,3],value=[0,1,2])
array=dataset.values
X=array[:,0:13]
y=array[:,13]
#数据降维
pca=PCA(n_components=3)
X_scale=StandardScaler().fit_transform(X)
X_reduce=pca.fit_transform(scale(X_scale))
#模型训练
model=KMeans(n_clusters=3)
model.fit(X_reduce)
labels=model.labels_
#输出模型的准确度
print('%.3f %.3f %.3f %.3f %.3f %.3f' %(
metrics.homogeneity_score(y,labels),
metrics.completeness_score(y,labels),
metrics.v_measure_score(y,labels),
metrics.adjusted_rand_score(y,labels),
metrics.adjusted_mutual_info_score(y,labels), #调整的互信息评分
metrics.silhouette_score(X_reduce,labels))) #所有样本的轮廓系数的平均值
#绘制模型的分布图
fig= plt.figure()
ax=Axes3D(fig,rect=[0,0,.95,1],elev=48,azim=134)
ax.scatter(X_reduce[:,0],X_reduce[:,1],X_reduce[:,2],c=labels.astype(np.float))
plt.show()
运行结果:
0.864 0.858 0.861 0.880 0.856 0.453