基于TensorFlow的双层BiDirection_Attention_LSTM的北京PM2.5数据集预测
预测北京PM2.5数据集你会学到:
传统的线性模型难以解决多变量或多输入问题,而神经网络如LSTM则擅长于处理多个变量的问题,该特性使其有助于解决时间序列预测问题。
在接下来的这篇博客中,你将学会如何利用深度学习框架TF搭建LSTM模型来处理多个变量的时间序列预测问题。
经过这个博客你会掌握:
1. 如何将原始数据转化为适合处理时序预测问题的数据格式;
2. 如何准备数据并搭建双层双向+Attention的LSTM来处理时序预测问题;
3. 如何利用模型预测。
1.数据集内容
2.数据处理
利用Pandas处理数据集,PM2.5数据集在这里 ==》
[link](Beijing PM2.5 Data Set)
def PreData():
df = pd.read_csv(config.preprocess_file_path)
# 丢弃掉不需要的行
df.drop('No', axis=1, inplace=True)
# 丢弃含有NA的行
df.dropna(axis=0, how='any', inplace=True`
# 填充csv中的NAN为0.0
df.fillna(axis=0, value=0.0, inplace=True)
# index为False表示不要序列
df.to_csv(config.process_file_path, index=False)
new_pd = pd.read_csv(config.process_file_path)
print(new_pd.info())
print(new_pd.head(5))
#准备训练和测试的批次为100的数据
def getTrain_Test():
df = pd.read_csv(config.process_file_path)
weather_data = df.iloc[0:40000, 5:12]
test_data = df.iloc[40000:41700, 5:12]
weather_pm = df.iloc[0:40000, 4:5]
test_pm = df.iloc[40000:41700, 4:5]
print(weather_data)
print(type(weather_pm))
print(len(weather_pm))
i = 0
# 当数据为测试集时
if FLAGS.isPreData_isTrain_isPredict == 2:
while i < len(test_data):
# 当数据为训练集时
x_train = test_data[i:i + config.batch_size].values
y_label = test_pm[i:i + config.batch_size].values
# print(type(x_train)) <class 'numpy.ndarray'>
# print(x_train.shape) (72, 7)
string_encode = sp.LabelEncoder()
to_1 = sp.MinMaxScaler(feature_range=(0, 1))
x_train[:, 3] = string_encode.fit_transform(x_train[:, 3])
x_train = to_1.fit_transform(x_train)
y_label = to_1.fit_transform(y_label)
x_train = np.reshape(x_train, [100, 7, 1])
train_x_list.append(x_train.tolist())
label_y_list.append(y_label.tolist())
i += config.batch_size
if FLAGS.isPreData_isTrain_isPredict == 1:
while i < len(weather_pm):
# 当数据为训练集时
x_train = weather_data[i:i + config.batch_size].values
y_label = weather_pm[i:i + config.batch_size].values
# print(type(x_train)) <class 'numpy.ndarray'>
# print(x_train.shape) (72, 7)
string_encode = sp.LabelEncoder()
to_1 = sp.MinMaxScaler(feature_range=(0, 1))
x_train[:, 3] = string_encode.fit_transform(x_train[:, 3])
x_train = to_1.fit_transform(x_train)
y_label = to_1.fit_transform(y_label)
x_train = np.reshape(x_train, [100, 7, 1])
train_x_list.append(x_train.tolist())
label_y_list.append(y_label.tolist())
i += config.batch_size
3.需要导入的包以及参数配置
import tensorflow as tf
import pandas as pd
import sklearn.preprocessing as sp
import numpy as np
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_integer('isPreData_isTrain_isPredict', 1, '0为PreData,1为Train,2为Predict')
tf.flags.DEFINE_integer('isAttention', 1, '0为关闭Attention机制,1为启用Attention机制')
tf.flags.DEFINE_integer('is_featureMap', 0, '0为每个特征都Attention权重,1为每个featureMap一个权重')
class Config():
preprocess_file_path = './PRSA_data_2010.1.1-2014.12.31.csv'
process_file_path = './PMcsv/bjpm.csv'
batch_size = 100
loop_time = [128, 128]
input_train_dim = 7
input_label_dim = 1
lstm_units = 128
config = Config()
train_x_list = []
label_y_list = []
input_data = tf.placeholder(tf.float32, [1, config.batch_size, config.input_train_dim, 1])
input_label = tf.placeholder(tf.float32, [1, config.batch_size, config.input_label_dim])
4.下面上LSTM模型了
def Bi_lstm():
global input_data
# 双层LSTM,第二次LSTM将前向,后向的输出值拼接起来
concat_tensor = input_data
concat_tensor = tf.reshape(concat_tensor, [100, 7, 1])
with tf.variable_scope('lstm1'):
lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=config.lstm_units)
lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.95, output_keep_prob=0.95)
lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.95, output_keep_prob=0.95)
# [100,7,1]===>[100,7,128]
value, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, concat_tensor, dtype=tf.float32)
# [[100,7,128],[100,7,128]]===>[100,7,256]
concat_tensor = tf.concat(value, 2)
# concat_tensor = tf.layers.batch_normalization(concat_tensor)
with tf.variable_scope('lstm2'):
lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=config.lstm_units)
lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.90, output_keep_prob=0.80)
lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.90, output_keep_prob=0.80)
# [100,7,1]===>[100,7,128]
value2, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, concat_tensor, dtype=tf.float32)
# [[100,7,128],[100,7,128]]===>[100,7,256]
bi_concat_tensor = tf.concat(value2, 2)
# bi_concat_tensor = tf.layers.batch_normalization(bi_concat_tensor)
# C耦合
# c_cell = tf.Variable(tf.truncated_normal(shape=[100, 256, 256]))
# bta_concat_tensor = tf.nn.softmax(c_cell)
# ones = tf.ones(shape=[100, 256, 256])
# bta_bi_concat_tensor = ones - bta_concat_tensor
# lstm_outvalue = tf.matmul(concat_tensor, bta_concat_tensor) + tf.matmul(bi_concat_tensor, bta_bi_concat_tensor)
if FLAGS.isAttention == 0:
bi_value = tf.split(bi_concat_tensor, 2, 2)
# 将前向,后向的lstm输出的特征相加
lstm_value = bi_value[0] + bi_value[1]
new_value = tf.transpose(lstm_value, perm=[1, 0, 2])
# [200,7,128]=第七层的[200,128]
tf.cast(tf.shape(new_value)[0], dtype=tf.int32) - 1
gather = tf.gather(new_value, tf.cast(tf.shape(new_value)[0], tf.int32) - 1)
# full_lstm_value = tf.reshape(lstm_value, shape=[config.batch_size, config.input_train_dim * config.lstm_units])
with tf.variable_scope("full_connect"):
# [128,1]
weight = w_init(config.lstm_units, config.input_label_dim)
bias = b_init(config.input_label_dim)
# [batch_size,1]
predict = tf.nn.xw_plus_b(gather, weight, bias)
return predict
else:
bi_value = tf.split(bi_concat_tensor, 2, 2)
attention = Attention(bi_value[0] + bi_value[1])
with tf.variable_scope("full_connect1"):
# [200,128][128,256]=[200,256]
weight = w_init(config.lstm_units, 256)
bias = b_init(256)
# [batch_size,1]
full_value = tf.nn.xw_plus_b(attention, weight, bias)
with tf.variable_scope("full_connect2"):
# [200,128][128,256]=[200,256]
weight = w_init(256, config.input_label_dim)
bias = b_init(config.input_label_dim)
# [batch_size,1]
predict = tf.nn.xw_plus_b(full_value, weight, bias)
return predict
5.Attention机制
这里我准备了两套:1.基于特征图的Attention
2.基于每一个特征值的Attention
def Attention(input):
tanh_output = tf.tanh(input)
# 给一个批次中所有的特征一个权重
#特征Attetion
if FLAGS.is_featureMap == 0:
weight = tf.Variable(
tf.truncated_normal(shape=[config.lstm_units * config.input_train_dim, config.input_train_dim]))
re_tanh_output = tf.reshape(tanh_output, [config.batch_size, config.lstm_units * config.input_train_dim])
# [100,7*128]**[128*7,7]=[100,7]
mul_output = tf.matmul(re_tanh_output, weight)
# [100,7]个权重
alpha = tf.nn.softmax(mul_output)
# [100,128,7]*[100,7,1]==[100,128,1]
end_value = tf.matmul(tf.transpose(input, [0, 2, 1]),
tf.reshape(alpha, [config.batch_size, config.input_train_dim, 1]))
s_value = tf.squeeze(end_value)
tanh_s_val = tf.tanh(s_value)
end_value = tf.nn.dropout(tanh_s_val, keep_prob=0.9)
return end_value
#特征图Attetion
else:
weight = tf.Variable(tf.truncated_normal(shape=[config.lstm_units, 1]))
re_tanh_output = tf.reshape(tanh_output, [config.batch_size * config.input_train_dim, config.lstm_units])
# [100*7,128]**[128,1]=[100*7,1]
mul_output = tf.matmul(re_tanh_output, weight)
tf.reshape(mul_output, [config.batch_size, config.input_train_dim])
# [100,7]个权重
alpha = tf.nn.softmax(mul_output)
# [100,128,7]*[100,7,1]==[100,128,1]
end_value = tf.matmul(tf.transpose(input, [0, 2, 1]),
tf.reshape(alpha, [config.batch_size, config.input_train_dim, 1]))
s_value = tf.squeeze(end_value)
return tf.tanh(s_value)
6.训练模型
def train_lstm():
getTrain_Test()
predict = Bi_lstm()
loss = tf.reduce_sum(tf.square(tf.reshape(predict, [-1]) - tf.reshape(input_label, [-1])))
acc = tf.reduce_mean(tf.abs(tf.reshape(predict, [-1]) - tf.reshape(input_label, [-1])))
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = tf.train.AdamOptimizer().minimize(loss)
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
tf.summary.scalar("loss", loss)
tf.summary.scalar("acc", acc)
merge = tf.summary.merge_all()
with tf.Session() as sess:
sess.run(init_op)
if FLAGS.isPreData_isTrain_isPredict == 1:
file_write = tf.summary.FileWriter("./Pm_summary/"
, sess.graph)
for i in range(20):
start = 0
end = 1
while (end < len(train_x_list)):
sess.run(train_op,
feed_dict={input_data: train_x_list[start:end], input_label: label_y_list[start:end]})
print('第%d轮,第%d批次,loss===》%f,预测误差为=====》%f' % (i, end, sess.run(loss,
feed_dict={
input_data: train_x_list[
start:end],
input_label: label_y_list[
start:end]}),
sess.run(acc,
feed_dict={
input_data: train_x_list[start:end],
input_label: label_y_list[
start:end]}))
)
merged = sess.run(merge, feed_dict={input_data: train_x_list[start:end],
input_label: label_y_list[
start:end]})
file_write.add_summary(merged, end)
start += 1
end += 1
saver.save(sess, "C:\\Users\\76152\\PycharmProjects\\LSTM\\check\\check")
if FLAGS.isPreData_isTrain_isPredict == 2:
saver.restore(sess, "C:\\Users\\76152\\PycharmProjects\\LSTM\\check\\check")
for i in range(1):
start = 0
end = 1
while (end < len(train_x_list)):
print('第%d轮,第%d批次,loss===》%f,预测误差为=====》%f' % (i, end, sess.run(loss,
feed_dict={
input_data: train_x_list[
start:end],
input_label: label_y_list[
start:end]}),
sess.run(acc,
feed_dict={
input_data: train_x_list[start:end],
input_label: label_y_list[
start:end]}))
)
start += 1
end += 1
if __name__ == '__main__':
train_lstm()
7.完整代码在这里
import tensorflow as tf
import pandas as pd
import sklearn.preprocessing as sp
import numpy as np
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_integer('isPreData_isTrain_isPredict', 1, '0为PreData,1为Train,2为Predict')
tf.flags.DEFINE_integer('isAttention', 1, '0为关闭Attention机制,1为启用Attention机制')
tf.flags.DEFINE_integer('is_featureMap', 0, '0为每个特征都Attention权重,1为每个featureMap一个权重')
class Config():
preprocess_file_path = './PRSA_data_2010.1.1-2014.12.31.csv'
process_file_path = './PMcsv/bjpm.csv'
batch_size = 100
loop_time = [128, 128]
input_train_dim = 7
input_label_dim = 1
lstm_units = 128
config = Config()
train_x_list = []
label_y_list = []
input_data = tf.placeholder(tf.float32, [1, config.batch_size, config.input_train_dim, 1])
input_label = tf.placeholder(tf.float32, [1, config.batch_size, config.input_label_dim])
def w_init(shape0: int, shape1: int):
weight = tf.Variable(tf.random_normal([shape0, shape1]))
return weight
def b_init(shape: int):
bias = tf.Variable(tf.constant(0.01, dtype=tf.float32, shape=[shape]))
return bias
def PreData():
df = pd.read_csv(config.preprocess_file_path)
# 丢弃掉不需要的行
df.drop('No', axis=1, inplace=True)
# 丢弃含有NA的行
df.dropna(axis=0, how='any', inplace=True)
# 填充csv中的NAN为0.0
df.fillna(axis=0, value=0.0, inplace=True)
# index为False表示不要序列
df.to_csv(config.process_file_path, index=False)
new_pd = pd.read_csv(config.process_file_path)
print(new_pd.info())
print(new_pd.head(5))
def getTrain_Test():
df = pd.read_csv(config.process_file_path)
weather_data = df.iloc[0:40000, 5:12]
test_data = df.iloc[40000:41700, 5:12]
weather_pm = df.iloc[0:40000, 4:5]
test_pm = df.iloc[40000:41700, 4:5]
print(weather_data)
print(type(weather_pm))
print(len(weather_pm))
i = 0
# 当数据为测试集时
if FLAGS.isPreData_isTrain_isPredict == 2:
while i < len(test_data):
# 当数据为训练集时
x_train = test_data[i:i + config.batch_size].values
y_label = test_pm[i:i + config.batch_size].values
# print(type(x_train)) <class 'numpy.ndarray'>
# print(x_train.shape) (72, 7)
string_encode = sp.LabelEncoder()
to_1 = sp.MinMaxScaler(feature_range=(0, 1))
x_train[:, 3] = string_encode.fit_transform(x_train[:, 3])
x_train = to_1.fit_transform(x_train)
y_label = to_1.fit_transform(y_label)
x_train = np.reshape(x_train, [100, 7, 1])
train_x_list.append(x_train.tolist())
label_y_list.append(y_label.tolist())
i += config.batch_size
if FLAGS.isPreData_isTrain_isPredict == 1:
while i < len(weather_pm):
# 当数据为训练集时
x_train = weather_data[i:i + config.batch_size].values
y_label = weather_pm[i:i + config.batch_size].values
# print(type(x_train)) <class 'numpy.ndarray'>
# print(x_train.shape) (72, 7)
string_encode = sp.LabelEncoder()
to_1 = sp.MinMaxScaler(feature_range=(0, 1))
x_train[:, 3] = string_encode.fit_transform(x_train[:, 3])
x_train = to_1.fit_transform(x_train)
y_label = to_1.fit_transform(y_label)
x_train = np.reshape(x_train, [100, 7, 1])
train_x_list.append(x_train.tolist())
label_y_list.append(y_label.tolist())
i += config.batch_size
def Bi_lstm():
global input_data
# 双层LSTM,第二次LSTM将前向,后向的输出值拼接起来
concat_tensor = input_data
concat_tensor = tf.reshape(concat_tensor, [100, 7, 1])
with tf.variable_scope('lstm1'):
lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=config.lstm_units)
lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.95, output_keep_prob=0.95)
lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.95, output_keep_prob=0.95)
# [100,7,1]===>[100,7,128]
value, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, concat_tensor, dtype=tf.float32)
# [[100,7,128],[100,7,128]]===>[100,7,256]
concat_tensor = tf.concat(value, 2)
# concat_tensor = tf.layers.batch_normalization(concat_tensor)
with tf.variable_scope('lstm2'):
lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=config.lstm_units)
lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.90, output_keep_prob=0.80)
lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.90, output_keep_prob=0.80)
# [100,7,1]===>[100,7,128]
value2, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, concat_tensor, dtype=tf.float32)
# [[100,7,128],[100,7,128]]===>[100,7,256]
bi_concat_tensor = tf.concat(value2, 2)
# bi_concat_tensor = tf.layers.batch_normalization(bi_concat_tensor)
# C耦合
# c_cell = tf.Variable(tf.truncated_normal(shape=[100, 256, 256]))
# bta_concat_tensor = tf.nn.softmax(c_cell)
# ones = tf.ones(shape=[100, 256, 256])
# bta_bi_concat_tensor = ones - bta_concat_tensor
# lstm_outvalue = tf.matmul(concat_tensor, bta_concat_tensor) + tf.matmul(bi_concat_tensor, bta_bi_concat_tensor)
if FLAGS.isAttention == 0:
bi_value = tf.split(bi_concat_tensor, 2, 2)
# 将前向,后向的lstm输出的特征相加
lstm_value = bi_value[0] + bi_value[1]
new_value = tf.transpose(lstm_value, perm=[1, 0, 2])
# [200,7,128]=第七层的[200,128]
tf.cast(tf.shape(new_value)[0], dtype=tf.int32) - 1
gather = tf.gather(new_value, tf.cast(tf.shape(new_value)[0], tf.int32) - 1)
# full_lstm_value = tf.reshape(lstm_value, shape=[config.batch_size, config.input_train_dim * config.lstm_units])
with tf.variable_scope("full_connect"):
# [128,1]
weight = w_init(config.lstm_units, config.input_label_dim)
bias = b_init(config.input_label_dim)
# [batch_size,1]
predict = tf.nn.xw_plus_b(gather, weight, bias)
return predict
else:
bi_value = tf.split(bi_concat_tensor, 2, 2)
attention = Attention(bi_value[0] + bi_value[1])
with tf.variable_scope("full_connect1"):
# [200,128][128,256]=[200,256]
weight = w_init(config.lstm_units, 256)
bias = b_init(256)
# [batch_size,1]
full_value = tf.nn.xw_plus_b(attention, weight, bias)
with tf.variable_scope("full_connect2"):
# [200,128][128,256]=[200,256]
weight = w_init(256, config.input_label_dim)
bias = b_init(config.input_label_dim)
# [batch_size,1]
predict = tf.nn.xw_plus_b(full_value, weight, bias)
return predict
def train_lstm():
getTrain_Test()
predict = Bi_lstm()
loss = tf.reduce_sum(tf.square(tf.reshape(predict, [-1]) - tf.reshape(input_label, [-1])))
acc = tf.reduce_mean(tf.abs(tf.reshape(predict, [-1]) - tf.reshape(input_label, [-1])))
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = tf.train.AdamOptimizer().minimize(loss)
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
tf.summary.scalar("loss", loss)
tf.summary.scalar("acc", acc)
merge = tf.summary.merge_all()
with tf.Session() as sess:
sess.run(init_op)
if FLAGS.isPreData_isTrain_isPredict == 1:
file_write = tf.summary.FileWriter("./Pm_summary/"
, sess.graph)
for i in range(20):
start = 0
end = 1
while (end < len(train_x_list)):
sess.run(train_op,
feed_dict={input_data: train_x_list[start:end], input_label: label_y_list[start:end]})
print('第%d轮,第%d批次,loss===》%f,预测误差为=====》%f' % (i, end, sess.run(loss,
feed_dict={
input_data: train_x_list[
start:end],
input_label: label_y_list[
start:end]}),
sess.run(acc,
feed_dict={
input_data: train_x_list[start:end],
input_label: label_y_list[
start:end]}))
)
merged = sess.run(merge, feed_dict={input_data: train_x_list[start:end],
input_label: label_y_list[
start:end]})
file_write.add_summary(merged, end)
start += 1
end += 1
saver.save(sess, "C:\\Users\\76152\\PycharmProjects\\LSTM\\check\\check")
if FLAGS.isPreData_isTrain_isPredict == 2:
saver.restore(sess, "C:\\Users\\76152\\PycharmProjects\\LSTM\\check\\check")
for i in range(1):
start = 0
end = 1
while (end < len(train_x_list)):
print('第%d轮,第%d批次,loss===》%f,预测误差为=====》%f' % (i, end, sess.run(loss,
feed_dict={
input_data: train_x_list[
start:end],
input_label: label_y_list[
start:end]}),
sess.run(acc,
feed_dict={
input_data: train_x_list[start:end],
input_label: label_y_list[
start:end]}))
)
start += 1
end += 1
# [200,7,128]
def Attention(input):
tanh_output = tf.tanh(input)
# 给一个批次中所有的特征一个权重
if FLAGS.is_featureMap == 0:
weight = tf.Variable(
tf.truncated_normal(shape=[config.lstm_units * config.input_train_dim, config.input_train_dim]))
re_tanh_output = tf.reshape(tanh_output, [config.batch_size, config.lstm_units * config.input_train_dim])
# [200,7*128]**[128*7,7]=[200,7]
mul_output = tf.matmul(re_tanh_output, weight)
# [200,7]个权重
alpha = tf.nn.softmax(mul_output)
# [200,128,7]*[200,7,1]==[200,128,1]
end_value = tf.matmul(tf.transpose(input, [0, 2, 1]),
tf.reshape(alpha, [config.batch_size, config.input_train_dim, 1]))
s_value = tf.squeeze(end_value)
tanh_s_val = tf.tanh(s_value)
end_value = tf.nn.dropout(tanh_s_val, keep_prob=0.9)
return end_value
else:
weight = tf.Variable(tf.truncated_normal(shape=[config.lstm_units, 1]))
re_tanh_output = tf.reshape(tanh_output, [config.batch_size * config.input_train_dim, config.lstm_units])
# [200*7,128]**[128,1]=[200*7,1]
mul_output = tf.matmul(re_tanh_output, weight)
tf.reshape(mul_output, [config.batch_size, config.input_train_dim])
# [200,7]个权重
alpha = tf.nn.softmax(mul_output)
# [200,128,7]*[200,7,1]==[200,128,1]
end_value = tf.matmul(tf.transpose(input, [0, 2, 1]),
tf.reshape(alpha, [config.batch_size, config.input_train_dim, 1]))
s_value = tf.squeeze(end_value)
return tf.tanh(s_value)
if __name__ == '__main__':
train_lstm()
8.loss以及acc
9.总结
效果一般,LSTM并不是很适合,希望哪位同学提升了预测acc可以告诉我!!!!!!!!!