工具包
使用GluonTS工具包,主页:https://ts.gluon.ai/
github页面:https://github.com/awslabs/gluon-ts/
使用代码
非官方代码,是我学习时测试的代码,并且尽可能标注了注释:
核心代码:
import pandas as pd
import matplotlib.pyplot as plt
from gluonts.dataset.common import ListDataset
from gluonts.mx.trainer import Trainer
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.evaluation.backtest import make_evaluation_predictions
def probabilistic_predict(train_se: pd.Series, test_se: pd.Series, freq):
training_data = ListDataset( # 使用这个工具包需要把数据格式转换为这种类型
[
{
"start": train_se.index[0], # 训练集的起始日期
"target": train_se,
}
],
freq=freq,
)
test_data = ListDataset(
[
{
"start": train_se.index[0], # 测试集的起始日期
"target": pd.concat([train_se, test_se]), # 注意这里是训练集+测试集
}
],
freq=freq,
)
prediction_length = len(test_se)
estimator = SimpleFeedForwardEstimator( # 构造训练器
num_hidden_dimensions=[100],
prediction_length=prediction_length,
context_length=100,
freq=freq,
trainer=Trainer(epochs=10,
learning_rate=1e-3,
num_batches_per_epoch=100)
)
predictor = estimator.train(training_data) # 开始训练
forecast_it, ts_it = make_evaluation_predictions( # 用测试集测试训练的效果
dataset=test_data, # test dataset
predictor=predictor, # predictor
num_samples=100, # number of sample paths we want for evaluation
)
tss = list(ts_it)
ts_entry = tss[0]
forecasts = list(forecast_it)
forecast_entry = forecasts[0]
return ts_entry, forecast_entry
绘图函数:
def plot_prob_forecasts(ts_entry, forecast_entry): # 绘图
plot_length = 100 # 绘制多少个点
prediction_intervals = (50.0, 90.0)
legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]
fig, ax = plt.subplots(1, 1, figsize=(20, 10))
ts_entry[-plot_length:].plot(ax=ax) # plot the time series
forecast_entry.plot(prediction_intervals=prediction_intervals, color='g')
plt.grid(which="both")
plt.legend(legend, loc="upper left")
plt.savefig('save_fig.png')
plt.show()
使用:
if __name__ == '__main__':
import numpy as np
# 生成从2021.01.01至2021.12.30共365天的数据,模拟时间序列
time_df = pd.DataFrame(np.random.randint(0, 10, size=(365, 1)), columns=['data'],
index=pd.date_range('2021-01-01', periods=365, freq='D'))
# 把1.1至10.31当做训练集,11.1至12.30当做测试集
train = time_df.truncate(after='2021-10-31').iloc[:, 0]
test = time_df.truncate(before='2021-11-01').iloc[:, 0]
ts_entry, forecast_entry = probabilistic_predict(train, test, freq='D') # 预测
plot_prob_forecasts(ts_entry, forecast_entry) # 绘图
绘制出的效果图如下:
由于是随机数,因此可预测性并不高,效果自然不会太好。