一、了解
数据集:气温数据,多种指标
任务目标:预测未来某一时间点的气温/未来某一时间片段的气温
数据集下载链接如下:jena_climate_2009_2016
二、实战--单特征
第一步,加载数据集
df = pd.read_csv('./datasets/jena_climate_2009_2016.csv')
df.head()
此数据集的数据,是每个十分钟一条数据,每个数据有多个指标
第二步,构建序列数据函数
'''
dataset表示输入的数据集,里面可以是一个或者多个列特征
history_size表示时间窗口的大小
indices = range(i-history_size,i)表示窗口序列索引,i表示每个窗口的起始位置
target_size 表示要预测的结果是窗口的第几个时间点,0则表示下一个时间点的预测结果,取其当做标签
'''
def univariate_data(dataset, start_index, end_index,
history_size,target_size):
data = []
labels = []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i)
#Reshape data from (history_size,) to (history_size,1)
data.append(np.reshape(dataset[indices], (history_size, 1)))
labels.append(dataset[i+target_size])
return np.array(data), np.array(labels)
第三步,取出数据DF
#前30W个样本数据当训练集,剩下的当验证集
TRAIN_SPLIT = 300000
#取出单特征数据df, 只选一个温度特征
uni_data = df['T (degC)']
uni_data.index = df['Date Time']
uni_data.head()
#展示当前特征
uni_data.plot(subplots=True)
#得到温度数据
uni_data = uni_data.values
print( uni_data.shape )
#数据预处理,归一化
uni_train_mean = uni_data[:TRAIN_SPLIT].mean()
uni_train_std = uni_data[:TRAIN_SPLIT].std()
uni_data = (uni_data - uni_train_mean)/uni_train_std
#窗口大小为20条数据,预测一个时刻的气温
univariate_past_history = 20
univariate_future_target = 0
x_train_uni, y_train_uni = univariate_data(uni_data, 0, TRAIN_SPLIT,
univariate_past_history,
univariate_future_target)
x_val_uni, y_val_uni = univariate_data(uni_data, TRAIN_SPLIT,
None, univariate_past_history,
univariate_future_target)
#维度
print( x_train_uni.shape )
print( y_train_uni.shape )
print( x_val_uni.shape )
print( y_val_uni.shape )
第四步,时间序列数据展示
# 创建 以当前天为0,之前的 20天[-20,-19,,,,-1]
def create_time_steps(length):
time_steps = []
for i in range(-length, 0, 1):
time_steps.append(i)
return time_steps
def show_plot(plot_data, delta, title):
labels = ['History', 'True Future', 'Model Prediction']
marker = ['.-', 'rx', 'go']
time_steps = create_time_steps(plot_data[0].shape[0])
if delta:
future = delta
else:
future = 0
plt.title(title)
for i, x in enumerate(plot_data):
if i:
plt.plot(future, plot_data[i], marker[i],
markersize=10, label=labels[i])
else:
plt.plot(time_steps, plot_data[i].flatten(),
marker[i], label=labels[i])
plt.legend()
plt.xlim([time_steps[0], (future+5)*2])
plt.xlabel('Time-Step')
return plt
show_plot([x_train_uni[0], y_train_uni[0]], 0, 'Sample Example')
第五步,用from_tensor_slices创建Tensor类型训练数据和验证数据
BATCH_SIZE = 256
BUFFER_SIZE = 10000
train_univariate = tf.data.Dataset.from_tensor_slices((x_train_uni, y_train_uni))
train_univariate = train_univariate.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
val_univariate = tf.data.Dataset.from_tensor_slices((x_val_uni, y_val_uni))
val_univariate = val_univariate.batch(BATCH_SIZE).repeat()
第六步,用 LSTM模型 预测气温
simple_lstm_model = tf.keras.models.Sequential([
keras.layers.LSTM(8, input_shape=x_train_uni.shape[-2:]), #输入维度为 (20,1)
keras.layers.Dense(1)
])
simple_lstm_model.compile(optimizer=tf.optimizers.Adam(),
loss='mae')
simple_lstm_model.summary()
运行结果如下:
Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm_1 (LSTM) (None, 8) 320 _________________________________________________________________ dense_1 (Dense) (None, 1) 9 ================================================================= Total params: 329 Trainable params: 329 Non-trainable params: 0
第七步,先预测一下测试结果
#得到一个batch的测试结果
for x, y in val_univariate.take(1):
print(simple_lstm_model.predict(x).shape)
第八步,训练模型
# 为了训练能快点,一个epoch就只训练200次
EVALUATION_INTERVAL = 200
EPOCHS = 10
simple_lstm_model.fit(train_univariate,
epochs=EPOCHS,
steps_per_epoch=EVALUATION_INTERVAL,
validation_data=val_univariate,
validation_steps=50)
第九步,用训练好的模型预测结果
#展示其中部分数据的预测结果
# x 是 256个样例,即一个batch
for x, y in val_univariate.take(3):
plot = show_plot([x[0].numpy(), y[0].numpy(),
simple_lstm_model.predict(x)[0]],
0, 'Simple LSTM model')
plot.show()
后面,将用多特征来训练网络模型