import numpy as np
from sklearn.cluster import KMeans
def loadData(filePath):
fr = open(filePath, 'r+') # 读写打开一个文本文件
lines = fr.readlines()
retData = [] # 城市各个消费数据
retCityName = [] # 城市名称
for line in lines:
items = line.strip().split(",") # 以逗号为分隔
retCityName.append(items[0]) # 返回第一个,即城市名称
retData.append([float(items[i]) for i in range(1, len(items))])
for i in range(1, len(items)):
return retData, retCityName
if __name__ == '__main__':
data, cityName = loadData('city.txt') # 导入文件,返回数据
km = KMeans(n_clusters=5, init='k-means++', max_iter=300) # 设值簇的数量为3,init为初始化方法,max_iter为最大迭代次数
label = km.fit_predict(data) # label:聚类后各数据所属的标签;fit_predict()计算簇中心以及为簇分配序号
expenses = np.sum(km.cluster_centers_, axis=1)
print(expenses)
CityCluster = [[], [], [],[],[]]
for i in range(len(cityName)): # 即对每个城市进行遍历
CityCluster[label[i]].append(cityName[i])
for i in range(len(CityCluster)):
print("Express:%.2f " % expenses[i])
print(CityCluster[i])
Python机器学习应用-Kmeans算法
最新推荐文章于 2023-12-24 10:10:57 发布