使用Keras搭建LSTM模型进行多变量时间序列预测(一)

使用UCI空气质量数据集为例,原始数据下载地址
使用前一小时数据预测后1小时(Timestep=1)
参考:https://blog.csdn.net/tMb8Z9Vdm66wH68VX1/article/details/78463811
特征列表:

Column NameMeaningColumn NameMeaning
NO行号TEMP温度
year年份PRES气压
month月份cbwd组合风向
dayIws累计风速
hourIs累积降雪时间
pm2.5PM2.5 浓度(label)Ir累积降雨时间
DEWP露点

数据加载和预处理

import os
import sys
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf  #tf版本为1.13
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

# 加载原始数据
data = pd.read_csv('./PRSA_data.csv')
data.head()

在这里插入图片描述

# 整合时间
date = pd.PeriodIndex(year=data["year"], month=data["month"], day=data["day"], hour=data['hour'], freq="H")
data['date'] = date
drop_cols = ['No','year','month','day','hour']
for col in drop_cols:
    data.drop(col,axis=1,inplace=True)
data.set_index('date',inplace=True)
data.columns=['pollution','dew','temp','press','wnd_dir','wnd_spd','snow','rain']
data.head()

在这里插入图片描述

# 处理空值
data['pollution'].fillna(0,inplace=True)
data = data[24:]
data.head()
data.to_csv('pollution.csv',index=None)

在这里插入图片描述

数据可视化

plt_cols = ['pollution','dew','temp','press','wnd_spd','snow','rain']
values = data[plt_cols].values
plt.figure(figsize=(12,10))
for i in range(len(plt_cols)):
    plt.subplot(len(plt_cols),1,i+1)
    plt.plot(values[:,i])
    plt.title(plt_cols[i],y=0.5,loc='right')
plt.show()

在这里插入图片描述

Demo1

# 生成LSTM需要的数据格式
new_data = data.copy()
new_data['y'] =  new_data['pollution'].shift(-1) # 
new_data = new_data[:-1]

# 对风向进行数值编码
lbl = LabelEncoder()
data['wnd_dir'] = lbl.fit_transform(data['wnd_dir'])

# 归一化 (按照原博将y也一起归一化了)
for col in new_data.columns]:
    mms = MinMaxScaler()
    new_data[col] = mms.fit_transform(new_data[col].values.reshape(-1,1))
new_data.head()

在这里插入图片描述

# 训练测试集分割
y = new_data.pop('y')
X = new_data
# 使用前1年数据训练,剩余数据为据测试,也可以使用train_test_split随机分割
train_size = 365*24*1
X_train = X[:train_size]
X_test = X[train_size:]
y_train = y[:train_size]
y_test = y[train_size:]
X_train = X_train.values.reshape(X_train.shape[0],1,X_train.shape[1])
X_test = X_test.values.reshape(X_test.shape[0],1,X_test.shape[1])
print("X_train:",X_train.shape,"y_train:",y_train.shape) # X_train: (8760, 1, 8) y_train: (8760,)
print("X_test:",X_test.shape,"y_test:",y_test.shape) # X_test: (35039, 1, 8) y_test: (35039,)

构建训练模型

# 使用和参考博客相同的参数
from tensorflow import keras
model = keras.models.Sequential()
model.add(keras.layers.LSTM(50,input_shape=(X_train.shape[1],X_train.shape[2])))
model.add(keras.layers.Dense(1)) 
model.compile(loss='mae',optimizer='adam')
history = model.fit(X_train,y_train,
                    epochs = 50,
                    batch_size = 72,
                    validation_data = (X_test,y_test))
 # 省去训练log
 # 误差可视化
 def plot_learning_curves(history, label, epochs, min_value, max_value,title):
    data = {}
    data['train'] = history.history[label]
    data['test'] = history.history['val_'+label]
    pd.DataFrame(data).plot(figsize=(8, 5))
    plt.grid(True)
    plt.axis([0, epochs, min_value, max_value])
    plt.title(title)
    plt.show()
plot_learning_curves(history, 'loss', 30, 0, 0.06,'MAE')

在这里插入图片描述

# 预测评价
# 做归一化的时候没有特意保存scaler,就用label的scaler了,应该没有问题
pred_train = model.predict(X_train)
inv_train = mms.inverse_transform(pred_train)
train_y_true = mms.inverse_transform(y_train.values.reshape(-1,1))
rmse = np.sqrt(mean_squared_error(train_y_true,inv_train))
print("train rmse:",rmse) # train rmse:30.686416544450598

pred_test = model.predict(X_test)
inv_pred = mms.inverse_transform(pred_test)
test_y_true = mms.inverse_transform(y_test.values.reshape(-1,1))
rmse = np.sqrt(mean_squared_error(test_y_true,inv_pred))
print("test rmse:",rmse)  # test rmse: 26.48025156818094

测试集误差居然比训练误差还要更小一些,不知道原博所说的RMSE3.836是怎么得到的

评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值