31省城市居民消费水平分类
k-means 算法 采用默认距离即欧式距离。
import numpy as np
from sklearn.cluster import KMeans
def loadData(filePath):
fr = open(filePath,'r+')
lines = fr.readlines()
retData = []
retCityName = []
for line in lines:
items = line.strip().split(",")
retCityName.append(items[0])
retData.append([float(items[i]) for i in range(1,len(items))])
return retData,retCityName
if __name__ == '__main__':
#print("请输入要分成几类:")
#a=int(input())
data,cityName = loadData('city.txt') #读入数据
km = KMeans(n_clusters=3) #分为几类,即簇中心的个数
label = km.fit_predict(data) #计算簇中心并为簇分配序号
expenses = np.sum(km.cluster_centers_,axis=1) #对分的每类求和
#print(expenses)
CityCluster=[[],[],[]] #初始化三个簇
for i in range(len(cityName)):
CityCluster[label[i]].append(cityName[i]) #簇归类
for i in range(len(CityCluster)):
print("Expenses:%.2f" % expenses[i]) #打印簇中心,即均值
print(CityCluster[i]) #打印簇