用ARI来评价聚类性能。
代码如下:
# -*- coding: utf-8 -*-
"""
Created on Tue May 22 14:49:02 2018
@author: eagle
"""
# =============================================================================
# K均值算法实现的数据聚类
# =============================================================================
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
from sklearn import metrics
digits_train = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tra',header=None)
digits_test = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tes',header=None)
X_train = digits_train[np.arange(64)]
y_train = digits_train[64]
X_test = digits_test[np.arange(64)]
y_test = digits_test[64]
kmeans = KMeans(n_clusters = 10)
kmeans.fit(X_train)
y_pred = kmeans.predict(X_test)
#用ARI来评价K-Means聚类的性能
print(metrics.adjusted_rand_score(y_test,y_pred))