目录
紧接上一篇气温预测-单特征,我们再来实现一下利用多特征进行气温预测。
为了避免麻烦,部分数据处理过程不再赘述,详情参考单特征一篇。
一、预测将来某一个时间点的气温
第一步,取出数据
# 取出三个特征的数据
features_considered = ['p (mbar)', 'T (degC)', 'rho (g/m**3)']
features = df[features_considered]
features.index = df['Date Time']
features.head()
#观察数据
features.plot(subplots=True)
#归一化
dataset = features.values
data_mean = dataset[:TRAIN_SPLIT].mean(axis=0)
data_std = dataset[:TRAIN_SPLIT].std(axis=0)
dataset = (dataset - data_mean)/data_std
第二步,制作训练集和验证集
#时间窗口数据制作函数
def multivariate_data(dataset, target, start_index, end_index,
history_size, target_size, step,
single_step=False):
data = []
labels = []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i, step)
data.append(dataset[indices])
if single_step:
labels.append(target[i+target_size])
else:
labels.append(target[i:i+target_size])
return np.array(data), np.array(labels)
'''
选择训练窗口为5天的数据,而每个记录为10分钟,
则 5 * 24 * 6 = 720 条记录
step = 6 表示每隔6个时间点实际取一次数据,即窗口大小:720/6=120,索引为range(0,720,6)
要预测的结果(标签)为12个小时后的温度值,即 12*6=72个时间点的结果
'''
past_history = 720
future_target = 72
STEP = 6
x_train_single, y_train_single = multivariate_data(dataset,
dataset[:,1],
0, TRAIN_SPLIT,
past_history,
future_target,
STEP,
single_step=True)
x_val_single, y_val_single = multivariate_data(dataset,
dataset[:,1],
TRAIN_SPLIT,
None,
past_history,
future_target,
STEP,
single_step=True)
#构建数据
train_data_single = tf.data.Dataset.from_tensor_slices((x_train_single, y_train_single))
train_data_single = train_data_single.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
val_data_single = tf.data.Dataset.from_tensor_slices((x_val_single, y_val_single))
val_data_single = val_data_single.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
第三步,搭建网络模型
single_step_model = tf.keras.models.Sequential()
single_step_model.add(keras.layers.LSTM(32,
input_shape=x_train_single.shape[-2:]))
single_step_model.add(keras.layers.Dense(1))
single_step_model.compile(optimizer=tf.optimizers.RMSprop(),
loss='mae')
第四步,提前预测输出维度
for x, y in val_data_single.take(1):
print(single_step_model.predict(x).shape)
第五步,训练模型
single_step_history = single_step_model.fit(train_data_single,
epochs=EPOCHS,
steps_per_epoch=EVALUATION_INTERVAL,
validation_data=val_data_single,
validation_steps=50)
第六步,训练结果展示
def plot_train_history(history, title):
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(loss))
plt.figure()
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title(title)
plt.legend()
plt.show()
plot_train_history(single_step_history,
'Single Step Training and validation loss')
第八步,预测结果与真实值差异展示
for x, y in val_data_single.take(3):
plot = show_plot([x[0][:, 1].numpy(), y[0].numpy(),
single_step_model.predict(x)[0]],
12, 'Single Step Prediction')
plot.show()
二、预测接下来一个时间片段的气温
第一步,构建数据集
future_target = 72
x_train_multi, y_train_multi = multivariate_data(dataset,
dataset[:,1],
0, TRAIN_SPLIT,
past_history,
future_target,
STEP)
x_val_multi, y_val_multi = multivariate_data(dataset,
dataset[:,1],
TRAIN_SPLIT,
None,
past_history,
future_target,
STEP)
train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
val_data_multi = val_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
第二步,数据集展示
def multi_step_plot(history, true_future, prediction):
plt.figure(figsize=(12,6))
num_in = create_time_steps(len(history))
num_out = len(true_future)
plt.plot(num_in, np.array(history[:, 1]), label='History')
plt.plot(np.arange(num_out)/STEP, np.array(true_future), 'bo', label='True Future')
if prediction.any():
plt.plot(np.arange(num_out)/STEP, np.array(prediction),
'ro', label='Predicted Future')
plt.legend(loc='upper left')
plt.show()
for x, y in train_data_multi.take(1):
multi_step_plot(x[0], y[0], np.array([0]))
第三步,搭建网络模型
#2层的LSTM网络,预测72个时间结果
multi_step_model = tf.keras.Sequential()
multi_step_model.add(keras.layers.LSTM(32,
return_sequences=True,
input_shape=x_train_multi.shape[-2:]))
multi_step_model.add(keras.layers.LSTM(16,
activation='relu'))
multi_step_model.add(keras.layers.Dense(72))
multi_step_model.compile(optimizer=tf.optimizers.RMSprop(clipvalue=1.0),
loss='mae')
第四步,提前预测输出维度
for x, y in val_data_multi.take(1):
print(multi_step_model.predict(x).shape)
第五步,训练网络模型
multi_step_history = multi_step_model.fit(train_data_multi,
epochs=EPOCHS,
steps_per_epoch=EVALUATION_INTERVAL,
validation_data=val_data_multi,
validation_steps=50)
第六步,展示loss变化过程
plot_train_history(multi_step_history,
'Multi Step Training and validation loss')
第七步,展示预测结果与真实值的差异
for x, y in val_data_multi.take(3):
multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
plot.show()