#普通k_means,结束条件为迭代次数
def _k_means_normal(data,iter_num,center_num,dis):
print(len(data))
#随机选择质心
print('初始化质心')
indexs = list(range(len(data)))
random.shuffle(indexs)
init_centroids_index = indexs[:center_num]
centroids = data[init_centroids_index,:]
#确定种类编号
levels = list(range(center_num))
print('分配种类编号')
print('开始迭代')
sample_target=[]
for i in range(iter_num):
new_centroids=[[] for i in range(center_num)]
new_centroids_num=[0 for i in range(center_num)]
sample_target=[]
#遍历数据
for sample in data:
#计算距离,由距离该数据最近的质心,确定该点所属类别
distances = [dis(sample,centroid) for centroid in centroids]
cur_level = np.argmin(distances)
sample_target.append(cur_level)
#统计,方便迭代完成后重新计算质心
new_centroids_num[cur_level]+=1
if len(new_centroids[cur_level]) < 1:
new_centroids[cur_level] = sample
else:
new_centroids[cur_level] = new_centroids[cur_level]+sample
#重新划分质心
centroids = []
for centroid, num in zip(new_centroids,new_centroids_num):
centroids.append([item/num for item in centroid])
centroids = np.array(centroids)
print('结束')
# print(sample_target)
return sample_target
def k_means(data,iter_num=10,center_num=3,cls='normal',dis=eu_distance):
if cls == 'normal':
return _k_means_normal(data,iter_num,center_num,dis=dis)