# -*- coding: utf-8 -*-
"""
Created on Mon Feb 18 14:59:53 2019
@author: Administrator
"""
#from pyclust import KMedoids #保留,用于切换函数
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import random
def im_txt(file):
"""
读取数据
"""
data=np.loadtxt(file,dtype=np.float32)
return data
def out_txt(outfile,line):
f=open(outfile,"w")
try:
for i in line:
f.write(str(i))
f.write("\n")
f.close()
except:
f.close()
print("分类数据未保存!!!!")
def initianlize_centers(n_clusters):
"""初始化,生成随机聚类中心"""
global n_data
centers=[] #聚类中心位置信息例:[101,205,5,3,7]
i=0
while i
temp=random.randint(0,n_data-1)
if temp not in centers:
centers.append(temp)
i=i+1
else:
pass
return centers
def clus_process(centers,data):
"""根据聚类中心进行聚类"""
result_clusters=[]
centers=np.array(centers)
"""遍历每个样本"""
for i in range(0,len(data)):
uni_temp=[] #临时存储距离数据
for j in centers:
temp=np.sqrt(np.sum(np.square(data[i]-data[j])))
uni_temp.append(temp)
c_min=min(uni_temp) #距离最小值
result_clusters.append(uni_temp.index(c_min)) #距离最小值所在位置即为归属簇
return result_clusters
def chose_centers(result_clusters,n_clusters):
centers=[]
for i in range(0,n_clusters): #逐个簇进行随机
temp=[] #记录每个簇样本在data中的位置
for j in range(0,len(result_clusters)): #遍历每个样本
if result_clusters[j]==i: #寻找簇i的样本
temp.append(j)
try:
c_temp=random.sample(temp,1) #在样本中随机取一个值作为新的聚类中心
except:
print("sample bug")
print(temp)
centers.append(c_temp[0])
return centers
def count_E(centers_new,data,result_clusters_new):
"""计算价值函数"""
E=0
for i in range(0,len(centers_new)):
for j in range(0,len(data)):
if result_clusters_new[j]==i:
temp=np.sqrt(np.sum(np.square(data[j]-data[centers_new[i]])))
E+=temp
return E
def KMedoids(n_clusters,data,max_iter):
"""初始化"""
centers=initianlize_centers(n_clusters)
"""根据随机中心进行聚类"""
result_clusters=clus_process(centers,data)
"""重新选择聚类中心,并比较"""
xie=0 #计数器
E=5*5000
"""
_old:用来记录上一次的聚类结果
_new:新一次聚类的结果
无old和new:输出结果
"""
while xie<=max_iter:
centers_new=chose_centers(result_clusters,n_clusters) #新的聚类中心
result_clusters_new=clus_process(centers,data) #新的聚类结果
"""计算价值函数E"""
E_new=count_E(centers_new,data,result_clusters_new)
"""价值函数变小,则更新聚类中心和聚类结果"""
if E_new
centers=centers_new
result_clusters=result_clusters_new
E=E_new
print("价值函数为:%s"%E)
print("聚类中心:%s"%centers)
xie=0
"""阈值计数器"""
xie=xie+1
if xie%10==0 and xie!=0:
print(xie)
return centers,result_clusters
def randomcolor(x):
"""随机生成十六进制编码"""
colors=[]
i=0
while i
colorArr = ['1','2','3','4','5','6','7','8','9','A','B','C','D','E','F']
color = ""
j=0
while j<6:
color += colorArr[random.randint(0,14)]
j=j+1
color="#"+color
if color in colors:
continue
else:
colors.append(color)
i=i+1
return colors
def main():
global n_data
file="text.txt"
data=im_txt(file)
n_data=len(data)
'''准备可视化需要的降维数据'''
data_TSNE = TSNE(learning_rate=100,n_iter=5000).fit_transform(data)
'''对不同的k进行试探性K-medoids聚类并可视化'''
plt.figure(figsize=(12,8))
"""聚类数"""
k=18 ###
centers,result_clusters = KMedoids(k,data,10) ###
color=randomcolor(k)
colors = ([color[k] for k in result_clusters])
plt.subplot(222)
plt.rcParams['figure.dpi'] = 300
plt.scatter(data_TSNE[:,0],data_TSNE[:,1],s=10,c=colors)
plt.title('K-medoids Resul of '.format(str(k)))
out_txt("分类数数(ture).txt",result_clusters)
main()