利用spark的k-means算子跑完模型后,得到中心节点,计算每个节点到中心节点的距离,用于选择每个聚类中更加典型的节点
import pandas as pd
import sklearn.preprocessing as preproc
import matplotlib.pyplot as plt
import numpy as np
df_center = pd.read_csv('D:\\data\\center\\center.csv', delimiter=',')
df_fen = pd.read_csv('D:\\data\\fenxi\\fenxi.csv', delimiter=',')
clus = df_center['cluster']
itm = 0
for itm in clus:
df_center22 = df_center.loc[df_center['cluster'] == itm].values
df_fen22 = df_fen.loc[df_fen['cluster'] == itm].values
uuuu = df_fen.loc[df_fen['cluster'] == itm].copy()
uuuu["uu_"+str(itm)] = np.sqrt((np.sum(np.power(df_center22[:, 1:] - df_fen22[:, 1:-1], 2), 1)).astype(float))
uuuu.to_csv("D:\\data\\center\\resutl_"+str(itm) +".csv", encoding="utf_8_sig")