from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
X=np.array(value)
X_max=np.max(X)
X_min=np.min(X)
X=(X-X_min)/(X_max-X_min)
X=np.vstack(X).reshape(-1,1)
model = KMeans( n_clusters=9,
init='k-means++',
n_init=10,
max_iter=300,
random_state=0 )
model.fit(X)
X_out = pd.DataFrame(X,index=model.labels_) #用X和对应的聚类标签值组成新的dataframe
X_out_center = pd.DataFrame(model.cluster_centers_) #用计算得到的聚类中心组成dataframe
X_outwithcenter=X_out.append(X_out_center)#将X_out_center放入到X_out中,一并tsne,不能单独tsne
将X_outwithcenter进行降维,并进行可视化:
#用TSNE进行数据降维并展示聚类结果
from sklearn.manifold import TSNE
tsne = TSNE()
tsne.fit_transform(X_outwithcenter) #进行数据降维,并返回结果
X_tsne = pd.DataFrame(tsne.embedding_, index = X_outwithcenter.index)
#将index化成原本的数据的index,tsne后index会变化
import matplotlib.pyplot as plt
#根据类别分割数据后,画图
d0 = X_tsne[X_tsne.index == 0]
plt.scatter(d0[0], d0[1],c='lightgreen',
marker='o' )
d1 = X_tsne[X_tsne.index == 1]
plt.scatter(d1[0], d1[1], c='orange',
marker='o' )
d2 = X_tsne[X_tsne.index == 2]
plt.scatter(d2[0], d2[1], c='lightblue',
marker='o' )
d3 = X_tsne[X_tsne.index == 3]
plt.scatter(d3[0], d3[1], c='black',
marker='o' )
d4 = X_tsne[X_tsne.index == 4]
plt.scatter(d4[0], d4[1], c='chocolate',
marker='o' )
d5 = X_tsne[X_tsne.index == 5]
plt.scatter(d5[0], d5[1], c='cyan',
marker='o' )
d6 = X_tsne[X_tsne.index == 6]
plt.scatter(d6[0], d6[1], c='azure',
marker='o' )
d7 = X_tsne[X_tsne.index == 7]
plt.scatter(d7[0], d7[1], c='bisque',
marker='o' )
d8 = X_tsne[X_tsne.index == 8]
plt.scatter(d8[0], d8[1], c='burlywood',
marker='o' )
#取中心点,由于分成了9类,所以X_tsne尾部的9条数据是后来加入的中心点信息
d = X_tsne.tail(9)
plt.scatter(d[0], d[1], c='yellow',s=150,
marker='*' )
plt.show()