import numpy as np
from sklearn.cluster import KMeans
def loadData(filePath):
fr = open(filePath,'r+') #r+指用于读写
lines = fr.readlines()
retData = []
retCityName = []
for line in lines:
items = line.strip().split(',')
'''解读
1.strip()去除首尾空格,strip(0)则是去除首尾的0
2.split(',')在line中出现,的地方切片
'''
retCityName.append(items[0])
retData.append([float(items[i]) for i in range(1,len(items))])
return retCityName,retData
if __name__ =='__main__':
cityName,data = loadData('31省市居民家庭消费水平-city.txt')
km = KMeans(n_clusters = 3,n_init = 100) #n_clusters调协聚类的k,
#n_init是在k较小时,使用不同seed的次数,默认为10
label = km.fit_predict(data) #加载聚类数值,并返回完成聚类后对应数据的标签
expenses = np.sum(km.cluster_centers_,axis=1)
print(km.cluster_centers_)
print(np.sum(km.cluster_centers_,axis = 0))
print(expenses)
#print(expenses)
CityCluster = [[],[],[]]
for i in range(len(cityName)):
CityCluster[label[i]].append(cityName[i])
for i in range(len(CityCluster)):
print("Expenses:%.2f"%expenses[i])
print(CityCluster[i])
Kmeans笔记
最新推荐文章于 2024-07-09 17:33:15 发布