数据为纽约市的交通进出情况(一个txt进,一个txt出),然后已知一年365天*24小时的数据,想用HA来预测,并计算MAE和RMSE来评估预测准确性。
我的read()函数是用来拼接两个文件夹里的数据的,一般只要读取一个文件夹的数据即可,返回是一个大矩阵。
我的HA是按周来计算的,第一周不参与评价预测。从第8天开始,以之前每周的同一天取平均做预测。
比如今天是周二,则累加之前所有周的周二的数据,取平均值。
import numpy as np
from Train_Validate import *
import math
def read():
matrix = []
file1 = "tensor_year_hour_lease.txt"
file2 = "tensor_year_hour_return.txt"
f1 = open(file1, "r")
f2 = open(file2, "r")
matrix1 = []
lines1 = f1.readlines()
for line in lines1:
arr = line.split(",")
arr = np.array(arr, dtype=int)
matrix1.append(arr)
f1.close()
matrix2 = []
lines2 = f2.readlines()
for line in lines2:
arr = line.split(",")
arr = np.array(arr, dtype=int)
matrix2.append(arr)
f2.close()
matrix = np.hstack((matrix1, matrix2)) # 拼接成功 输出(8760*188)
return matrix
def ha(day,hour):
lines = read()
# print(lines.shape)
now = (day-1)*24+hour
onweek = day % 7 # 这天是一周内的第几天
times = math.ceil(now / 7 / 24) # 向上取整 得次数
# print("time:", times)
sum = [0 for i in range(188)]
for i in range(0, times):
sum = np.array(sum) + np.array(lines[i*7*24+onweek*24+hour])
# print(sum)
sum = np.array(sum)
sum = sum / times
return sum
def ergodic():
bigaa = []
for i in range(7, 365): # 日期
for j in range(0, 24): # 小时
print(i, j)
aa = ha(i, j)
bigaa.append(aa)
res(bigaa)
def res(predect):
predect = np.array(predect)
real = read()
real = real[168:]
# print(real.shape) # 8592 * 188
# print(predect.shape) # 8592 * 188
# MAE
mae = np.average(np.absolute(predect - real))
# RMSE
mse = np.average((predect - real) ** 2)
rmse = mse ** 0.5
print("MAE:", mae)
print("RMSE:", rmse)
if __name__ == '__main__':
ergodic()