时间序列数据:
在每个lookback(时间段)内间隔取data,但是同一个batch中的相邻两个lookback的起始点是相邻的。
Tensorflow - 生成批量数据 - 方法3_Zhou_Dao的博客-CSDN博客 同样是时间序列。
import numpy as np
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop
import matplotlib.pyplot as plt
def get_data():
dir = 'jena_climate_2009_2016.csv'
f = open(dir, 'r', encoding='UTF-8')
data = f.read()
f.close()
lines = data.split('\n')
header = lines[0].split(',')
lines = lines[1:]
print(len(lines), header)
all_data = np.zeros((len(lines), len(header)-1))
for i, line in enumerate(lines):
values = [float(x) for x in line.split(',')[1:]]
all_data[i, :] = values
# normalization
mean = all_data[:200000].mean(axis=0)
all_data -= mean
std = all_data[:200000].std(axis=0)
all_data /= std
return all_data
def generator(data, lookback, delay, min_index, max_index,shuffle=False, batch_size=128, step=6):
if max_index is None:
max_index = len(data) - delay - 1
i = min_index + lookback
print(i)
while 1:
# batch size
if shuffle:
rows = np.random.randint(min_index + lookback, max_index, size=batch_size)
else:
if i + batch_size >= max_index:
i = min_index + lookback
rows = np.arange(i, min(i + batch_size, max_index))
i += len(rows)
samples = np.zeros((len(rows), lookback // step, data.shape[-1])) # 一批有128个5天的数据; time_step = 6;
targets = np.zeros((len(rows),)) # ?第二维??
for j, row in enumerate(rows):
indices = range(rows[j] - lookback, rows[j], step) # 用过去5天数据,预测明天数据; rows的值每循环一次,不断变大
# print(indices)
samples[j] = data[indices]
targets[j] = data[rows[j] + delay][1] # [1] 表示只取温度数据
print(len(targets), targets.shape, targets)
yield samples, targets
lookback = 1440
step = 6
delay = 144
batch_size = 128
all_data = get_data()
train_gen = generator(all_data,
lookback=lookback,
delay=delay,
min_index=0,
max_index=200000,
shuffle=True,
step=step,
batch_size=batch_size)
val_gen = generator(all_data,
lookback=lookback,
delay=delay,
min_index=200001,
max_index=300000,
step=step,
batch_size=batch_size)
test_gen = generator(all_data,
lookback=lookback,
delay=delay,
min_index=300001,
max_index=None,
step=step,
batch_size=batch_size)
val_steps = (300000 - 200001 - lookback) //batch_size # !!!!!!!
test_steps = (len(all_data) - 300001 - lookback) //batch_size
# way1 : 一种基于常识的、非机器学习的基准方法
def evaluate_naive_method():
batch_maes = []
for step in range(val_steps):
samples, targets = next(val_gen)
print(type(samples))
preds = samples[:, -1, 1] # 一批有batch size个5天的数据,取所有batch中,5天里面最后一次测量的温度
mae = np.mean(np.abs(preds - targets))
batch_maes.append(mae)
print(np.mean(batch_maes))
# way2 : GRU
def GRU():
model = Sequential()
model.add(layers.GRU(32, input_shape=(None, all_data.shape[-1])))
model.add(layers.Dense(1)) # 最终预测的是一个值
model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen,
steps_per_epoch=500, # 每个epoch返回steps_per_epoch次数据
epochs=20,
validation_data=val_gen,
validation_steps=val_steps)
return history
# way3 :
def GRU_drop_out():
model = Sequential()
model.add(layers.GRU(32,
dropout=0.2,
recurrent_dropout=0.2,
input_shape=(None, all_data.shape[-1])))
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen,
steps_per_epoch=500,
epochs=40,
validation_data=val_gen,
validation_steps=val_steps)
return history
# way4 : 使用 dropout 正则化的堆叠 GRU 模型
def stack_GRU_dropout():
model = Sequential()
model.add(layers.GRU(32,
dropout=0.1,
recurrent_dropout=0.5,
return_sequences=True, # 中间层返回输出
input_shape=(None, all_data.shape[-1])))
model.add(layers.GRU(64, activation='relu',
dropout=0.1,
recurrent_dropout=0.5))
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen,
steps_per_epoch=500,
epochs=40,
validation_data=val_gen,
validation_steps=val_steps)
return history
def bid_LSTM():
model = Sequential()
model.add(layers.Bidirectional(
layers.GRU(32), input_shape=(None, all_data.shape[-1])))
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen,
steps_per_epoch=500,
epochs=40,
validation_data=val_gen,
validation_steps=val_steps)
return history
# come from chapter 6.4
def con1d():
model = Sequential()
model.add(layers.Conv1D(32,5,activation='relu',input_shape=(None, all_data.shape[-1])))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32,5,activation='relu'))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32,5,activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=20,
validation_data=val_gen,validation_steps=val_steps)
return history
# come from chapter 6.4 GRU+Conv1D
def tempterature_con1d_GRU():
step = 3
lookback = 720
delay = 144
train_gen = generator(all_data,
lookback=lookback,
delay=delay,
min_index=0,
max_index=200000,
shuffle=True,
step=step)
val_gen = generator(all_data,
lookback=lookback,
delay=delay,
min_index=200001,
max_index=300000,
step=step)
test_gen = generator(all_data,
lookback=lookback,
delay=delay,
min_index=300001,
max_index=None,
step=step)
val_steps = (300000 - 200001 - lookback) // 128
test_steps = (len(all_data) - 300001 - lookback) // 128
model = Sequential()
model.add(layers.Conv1D(32, 5, activation='relu', input_shape=(None, all_data.shape[-1])))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation='relu')) # 直接加GRU
model.add(layers.GRU(32, dropout=0.1, recurrent_dropout=0.5))
model.add(layers.Dense(1))
model.summary()
model.compile(optimizer=RMSprop(), loss='ame')
history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=20,
validation_data=val_gen, validation_steps=val_steps)
return history
def test_performace():
history = con1d()
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
test_performace()