说明:本代码应用于第14届认证杯网络大赛二阶段,用于对k-means聚类后的子群进行合并,以减少子群数量,减少企业的停车场个数。
1.code
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
@Time : 2021/5/16 0:16
@Author : kingback
@Site :
@File : 空间聚类.py
@Software: PyCharm
'''
import numpy as np
import pandas as pd
from scipy.io import loadmat
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram,linkage
#解决中文显示问题
plt.rcParams['font.sans-serif']=['KaiTi']
plt.rcParams['axes.unicode_minus'] = False
state=np.random.RandomState(99)#设置随机状态
a=state.multivariate_normal([10,10],[[1,3],[3,11]],size=7)#生成多元正态变量
b=state.multivariate_normal([-10,-10],[[1,3],[3,11]],size=8)
#加载数据集
data = loadmat('data/data.mat')
X = data['X']
data2 = pd.DataFrame(data.get('X'), columns=['X1', 'X2'])
X1=data2['X1']
X2=data2['X2']
# data=np.concatenate((X1,X2))#
data=np.array(data2)
# XX=[34.78339, 34.79536, 34.78331, 34.76939, 34.8346, 34.76125, 34.80035, 34.80215, 34.760619, 34.81693, 34.76894, 34.7802, 34.77672, 34.75712, 34.76602, 34.78334, 34.774964, 34.79317, 34.84021, 34.81855, 34.7496, 34.81139, 34.80934, 34.80099, 34.74612, 34.78974, 34.77272]
# YY=[32.09556, 32.08372, 32.12094, 32.06719, 32.12329, 32.04625, 32.12786, 32.04625, 32.054935, 32.11941, 32.07577, 32.066, 32.05647, 32.0376, 32.05299, 32.07991, 32.08437, 32.06652, 32.11176, 32.1111, 32.03869, 32.057, 32.12327, 32.05472, 32.0321, 32.08527, 32.05038]
#这是K-means 聚类中心
XX=[34.76108187,34.79126601,34.79680209,34.82143107,34.79023412,34.8082334,34.79403493,34.83429745,34.74926851,34.80076535,34.78867012,34.80912569,34.79873351,34.76074887,34.79976415,34.79455934,34.78818356,34.77236293,34.77626906,34.81784023]
YY=[32.04071004,32.08160757,32.04856103,32.11032905,32.06010103,32.05781723,32.1072213,32.12024152,32.03931188,32.11593169,32.09503919,32.05016822,32.06604083,32.05270476,32.12952825,32.12143757,32.11441352,32.06610536,32.04912527,32.12071612]
# plt.scatter(XX,YY,color='r',s=1000,alpha=0.3)
data=np.vstack([XX,YY]).T
# data=np.concatenate((a,b))#把数据进行拼接
fig,ax=plt.subplots(figsize=(8,8))#设置图片大小
ax.set_aspect('equal')#把两坐标轴的比例设为相等
plt.scatter(data[:,0],data[:,1],marker='*',s=100,c='black')
txt=[i for i in range(len(data[:,0]))]
for i in range(len(data[:,0])):
plt.annotate(txt[i], xy = (data[:,0][i], data[:,1][i]), xytext = (data[:,0][i]+0.001, data[:,1][i]+0.001)) # 这里xy是需要标记的坐标,xytext是对应的标签坐标
# plt.ylim([-30,30])#设置Y轴数值范围
# plt.xlim([-30,30])
plt.legend(loc=1)
plt.title("K-means聚类结果",size=16)
plt.xlabel("经度",size=15)
plt.ylabel("维度",size=15)
plt.yticks(fontproperties = 'Times New Roman', size = 13)
plt.xticks(fontproperties = 'Times New Roman', size = 13)
plt.show()
z = linkage(data, "average") #用average算法,即类平均法
fig,ax=plt.subplots(figsize=(8,8))
dendrogram(z,leaf_font_size=14)#画图
plt.title("系统聚类结果",size=16)
plt.xlabel("子群标签",size=15)
plt.ylabel("距离",size=15)
plt.yticks(fontproperties = 'Times New Roman', size = 13)
plt.xticks(fontproperties = 'Times New Roman', size = 13)
plt.axhline(y=0.0125,c='black')#画一条分类线
plt.show()
2.效果图
横坐标表示子群标签,纵坐标表示距离