# -*- coding: utf-8 -*-
"""
@Author: xxx
@Features of this code:利用聚类找出24、48、72小时雨型
"""
import numpy as np
import pandas as pd
import glob
# import matplotlib.pyplot as plt
from sklearn.cluster import KMeans # 导入K均值聚类算法
# from pylab import *
# 读取数据
def read_data(filename):
f = open(filename, 'r')
file = f.readlines()
temp = []
for r in range(len(file)):
line = file[r].split()
temp = pd.concat([pd.DataFrame(temp), pd.DataFrame([line])], axis=0)
f.close()
data_fun = np.array(temp).astype(float)
data_fun = np.hstack([np.zeros([30, 1]), data_fun])
data_fun = np.delete(data_fun, -1, 1) # 删除最后一列
return data_fun
# 处理成各小时雨量分布
def get_pre_h(data, points):
data_h = np.zeros([np.size(data, 0), points])
data_h1 = np.zeros([np.size(data, 0), points])
for r in range(np.size(data, 0)):
data_line = np.delete(data[r, 4::], np.isnan(data[r, 4::])) # 短历时从第3列开始
delta = np.floor(len(data_line) / points).astype(int)
for i in range(points):
data_h[r, i] = np.sum(data_line[i * delta: (i + 1) * delta])
for i in range(points):
data_h1[r, i] = data_h[r, i] / np.sum(data_h[r, :])
return data_h1
# K聚类分析
def k_m(h, k):
# k 为类数
k_model = KMeans(n_clusters=k) # 调用K聚类算法
k_model.fit(h) # 训练模型
k_count = pd.Series(k_model.labels_).value_counts() # 统计各个类别的数目
k_center = pd.DataFrame(k_model.cluster_centers_) # 找出聚类中心
k_cst1 = pd.concat([k_count, k_center], axis=1) # 横向连接(0是纵向),得到聚类中心对应的类别下的数目
k_cst2 = pd.concat([pd.Series(k_model.labels_), pd.DataFrame(h)], axis=1) # 详细输出每个样本对应的类别
return k_cst1, k_cst2
if __name__ == '__main__':
files = glob.glob(r'*.txt') # 批量读取数据
for fn in files:
data_input = read_data(fn)
# hours_count = int(np.size(data_input, 1) / 12)
# data_h = np.zeros([30, hours_count])
# for iter_h in range(hours_count):
# data_h[:, iter_h] = np.sum(data_input[:, iter_h * 12:iter_h * 12 + 12], 1)
cluster_count_center, cluster_type = k_m(data_input, 5)
class_name = ['第1类', '第2类', '第3类', '第4类', '第5类']
cluster_count_center.index = class_name
# top30_1h
cluster_count_center.to_excel(fn[0:48] + '聚类.xlsx') ## top30_1h
print(fn[0:48] + '聚类.xlsx' + '保存完毕!')
# top30_2h
# cluster_count_center.to_excel(fn[0:51] + '聚类.xlsx')
# print(fn[0:51]+'聚类.xlsx'+'保存完毕!')
雨型K均值聚类
最新推荐文章于 2024-08-09 00:05:38 发布