# coding=gbk
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
# data = pd.read_excel('lactose-lactuse-scatter.xlsx', engine='openpyxl')
# #遍历所有数据
# x = data.iloc[:]
#
# #5个簇,1个自由点
# mod = KMeans(n_clusters=5, random_state=1)
##用训练器数据X拟合分类器模型并对训练器数据X进行预测
# y_pre = mod.fit_predict(x)
#
# #打印聚类的结果
# r1 = pd.Series(mod.labels_).value_counts()
# r2 = pd.DataFrame(mod.cluster_centers_)
# r = pd.concat([r2,r1],axis=1)
# r.columns=['lactose','lactutose','juleishumu']
# print(r)
#
# #plt.figure(figsize=(20,8),dpi=80)
# #定位两列的列标
# plt.scatter(x.iloc[:,0],x.iloc[:,1],marker='o',c=y_pre)
# plt.show()
data = pd.read_excel('score1000.xlsx', engine='openpyxl')
#遍历所有数据
x = data.iloc[:]
#5个簇,1个自由点
mod = KMeans(n_clusters=10, random_state=1)
##用训练器数据X拟合分类器模型并对训练器数据X进行预测
y_pre = mod.fit_predict(x)
#打印聚类的结果
r1 = pd.Series(mod.labels_).value_counts()
r2 = pd.DataFrame(mod.cluster_centers_)
r = pd.concat([r2,r1],axis=1)
r.columns=['total_score','interface_delta_B','juleishumu']
print(r)
#plt.figure(figsize=(20,8),dpi=80)
#定位两列的列标
plt.scatter(x.iloc[:,0],x.iloc[:,1],marker='o',c=y_pre)
plt.show()
#定位两列的列标 marker设置散点的形状,c,设置散点的颜色,cmap取多值时使用,s设置点面积的大小
plt.scatter(x.iloc[:,0],x.iloc[:,1],marker=".",c=y_pre,cmap='rainbow',s=10)
cmaps = [('Perceptually Uniform Sequential', [
'viridis', 'plasma', 'inferno', 'magma']),
('Sequential', [
'Greys', 'Purples', 'Blues', 'Greens', 'Oranges', 'Reds',
'YlOrBr', 'YlOrRd', 'OrRd', 'PuRd', 'RdPu', 'BuPu',
'GnBu', 'PuBu', 'YlGnBu', 'PuBuGn', 'BuGn', 'YlGn']),
('Sequential (2)', [
'binary', 'gist_yarg', 'gist_gray', 'gray', 'bone', 'pink',
'spring', 'summer', 'autumn', 'winter', 'cool', 'Wistia',
'hot', 'afmhot', 'gist_heat', 'copper']),
('Diverging', [
'PiYG', 'PRGn', 'BrBG', 'PuOr', 'RdGy', 'RdBu',
'RdYlBu', 'RdYlGn', 'Spectral', 'coolwarm', 'bwr', 'seismic']),
('Qualitative', [
'Pastel1', 'Pastel2', 'Paired', 'Accent',
'Dark2', 'Set1', 'Set2', 'Set3',
'tab10', 'tab20', 'tab20b', 'tab20c']),
('Miscellaneous', [
'flag', 'prism', 'ocean', 'gist_earth', 'terrain', 'gist_stern',
'gnuplot', 'gnuplot2', 'CMRmap', 'cubehelix', 'brg', 'hsv',
'gist_rainbow', 'rainbow', 'jet', 'nipy_spectral', 'gist_ncar'])]
参数c 可以等于:['c', 'b', 'g', 'r', 'm', 'y', 'k', 'w']
b——blue
c——cyan
g——green
k——black
m——magenta
r——red
w——white
y——yellow
标记序号
file_name = pd.read_excel('score1000.xlsx', engine='openpyxl')
#遍历所有数据
data = file_name.iloc[:]
#print(x)
#5个簇,1个自由点
mod = KMeans(n_clusters=5, random_state=1)
##用训练器数据X拟合分类器模型并对训练器数据X进行预测
y_pre = mod.fit_predict(data)
n= np.arange(1200)
#打印聚类的结果
r1 = pd.Series(mod.labels_).value_counts() #10簇中每簇分别有多少个
r2 = pd.DataFrame(mod.cluster_centers_) #10簇聚类后的x,y坐标
r = pd.concat([r2,r1],axis=1)
r.columns=['total_score','interface_delta_B','juleishumu']
n = np.arange(1165)
x=data.iloc[:,0]
y =data.iloc[:,1]
print(x,y)
plt.figure(figsize=(10,10),dpi=160)
plt.scatter(x,y,marker=".",c=y_pre,cmap='rainbow',s=16)
for i,txt in enumerate(n):
plt.annotate(txt,(x[i],y[i]))
#print(i)
plt.show()