运用sklearn的KMeans进行聚类数据分析。
聚类:即将相似的元素聚集在一起分成一类。
from sklearn.cluster import Birch
from sklearn.cluster import KMeans
X = pokemon[['Attack','Defense']]
X = X.as_matrix()
clf = KMeans(n_clusters=4) #进行聚类
y_pred=clf.fit_predict(X)
x=[n[0] for n in X]
y=[n[1] for n in X]
plt.scatter(x,y,c=y_pred,marker='o')
plt.title('Pokemon Data')
plt.xlabel('Attack')
plt.ylabel('Defense')
plt.legend(['A','B','C','D'])
plt.show()
优化散点图:
X = pokemon[['Attack','Defense']]
# 用的是两列数据,经降维后用多列数据
#X = pokemon[['Total','HP','Attack','Defense','Sp. Atk','Sp. Def','Speed']]
X = X.as_matrix()
clf = KMeans(n_clusters=4)
y_pred=clf.fit_predict(X)
#进行降维:from sklearn.decomposition import PCA
#pca = PCA(n_components=2)
#newData = pca.fit_transform(X)
#x = [n[0] for n in newData]
#y = [n[1] for n in newData]
x=[n[0] for n in X]
y=[n[1] for n in X]
#给各个分类画出不同的形状
x1 = []
y1 = []
x2 = []
y2 = []
x3 = []
y3 = []
x4 = []
y4 = []
i=0
while i < len(X):
if y_pred[i]==0:
x1.append(X[i][0])
y1.append(X[i][1])
elif y_pred[i]==1:
x2.append(X[i][0])
y2.append(X[i][1])
elif y_pred[i]==2:
x3.append(X[i][0])
y3.append(X[i][1])
elif y_pred[i]==3:
x4.append(X[i][0])
y4.append(X[i][1])
i = i+1
plt.figure(figsize=(10,10))
plot1, = plt.plot(x1, y1, 'or', marker="x")
plot2, = plt.plot(x2, y2, 'og', marker="o")
plot3, = plt.plot(x3, y3, 'ob', marker="*")
plot4, = plt.plot(x4, y4, 'oy', marker="o")
plt.legend((plot1, plot2, plot3,plot4), ('A', 'B', 'C','D'), fontsize=10)
plt.title('Pokemon Data')
plt.xlabel('Attack')
plt.ylabel('Defense')
plt.show()
对多列数据进行聚类: