K-Means聚类思想
- 随机选K个点作为中心
- 根据剩下点与选出的K个中心点的距离,归入最近的类
- 重新计算所有点的均值作为中心
- 重复2,3直至聚类中心不再发生改变
python实现:
import numpy as np
from sklearn.cluster import KMeans
def loadData(filePath):
fr = open(filePath,'r+')
lines = fr.readlines()
retData=[]
retCityName = []
for line in lines:
items = line.strip().split(',')#文件预处理,划分名字和消费水平
retCityName.append(items[0])
retData.append([float(items[i]) for i in range(1,len(items))])
return retData,retCityName
if __name__=='__main__':
data,cityName = loadData('city.txt')
km = KMeans(n_clusters=3)#聚成三类
label = km.fit_predict(data)#打上相应标签
expenses = np.sum(km.cluster_centers_,axis=1)