Python深度学习(7):温度预测

本节主要是将上一节学习到的RNN,LSTM,GRU用于温度预测问题,设一个baseline,然后不断进行优化。
数据集网址:耶拿天气数据集
下载后解压到当前目录即可。

数据集介绍

此为耶拿2009-2016的数据,每10分钟有一条数据,总共有420551行数据

基本思路

预处理阶段
编写一个Python生成器,以当前浮点数数组作为输入,并从最近的数据中生成数据批量,同时生成未来的目标温度。
将数据标准化,即每个时间序列减去平均值,除以标准差
编写平方误差函数,使用MAE来评估方法
网络训练
在这个阶段不断优化网络结构
(1)使用DNN
(2)使用基于GRU网络基准
(3)使用RNN stack
(4)使用双向GRU
(5)使用一维CNN
(6)使用一维卷积基和GRU结合

代码详解

import os
from matplotlib import pyplot as plt
import numpy as np
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop


data_dir = './jena_climate_2009_2016.csv/jena_climate_2009_2016.csv' #数据存放位置

f = open(data_dir) #打开文件
data = f.read() #读取文件
f.close() #关闭

lines = data.split('\n') #按行切分
header = lines[0].split(',') #每行按,切分
lines = lines[1:] #去除第0行,第0行为标记

print(header)
print(len(lines))

#将所有的数据转为float型数组
float_data = np.zeros((len(lines), len(header)-1))
for i, line in enumerate(lines):
    values = [float(x) for x in line.split(',')[1:]]
    float_data[i, :] = values

'''
temp = float_data[:, 1]
plt.figure()
plt.plot(range(len(temp)), temp)
plt.legend()

plt.figure()
plt.plot(range(1440), temp[:1440])
plt.show()
'''
#数据标准化,减去平均值,除以标准值
mean = float_data[:200000].mean(axis = 0) 
float_data -= mean
std = float_data[:200000].std(axis = 0)
float_data /= std
#数据生成器
def generator(data, lookback, delay, min_index, max_index, shuffle=False, batch_size=128, step=6):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index = lookback
    while 1:
        if shuffle: #打乱顺序
            rows = np.random.randint(min_index + lookback, max_index, size=batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback #超过记录序号时,从头开始
            rows = np.arange(i, min(i+batch_size, max_index))
            i += len(rows)
        samples = np.zeros((len(rows), lookback // step, data.shape[-1]))
        targets = np.zeros((len(rows), ))
        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][1]
        yield samples, targets

lookback = 1440 #给定10天的观测数据
step = 6 #每6个采样一次,即每小时一个数据点
delay = 144 #目标是未来24小时之后的数据
batch_size = 128

train_gen = generator(float_data, lookback=lookback, delay=delay,
                      min_index=0,max_index=200000, shuffle=True,
                      step=step, batch_size = batch_size)
val_gen = generator(float_data, lookback=lookback, delay=delay,
                    min_index=200001, max_index=300000, step=step,
                    batch_size=batch_size)
test_gen = generator(float_data, lookback=lookback, delay=delay,
                     min_index=300001, max_index=None, step=step,
                     batch_size=batch_size)
val_steps = (300000 - 200001- lookback) // batch_size
test_steps = (len(float_data) - 300001 - lookback) // batch_size
#计算mae
def evaluate_naive_method():
    batch_maes = []
    for step in range(val_steps):
        samples, targets = next(val_gen)
        preds = samples[:, -1, 1]
        mae = np.mean(np.abs(preds - targets))
        batch_maes.append(mae)
    print('mae=',np.mean(batch_maes))

#evaluate_naive_method()
'''
#密集连接模型DNN
model = Sequential()
model.add(layers.Flatten(input_shape=[lookback // step, float_data.shape[-1]]))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(), loss = 'mae')
history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=20,
                              validation_data=val_gen, validation_steps=val_steps)
'''

'''
#基于GRU的模型
model  = Sequential()
model.add(layers.GRU(32, dropout = 0.2, recurrent_dropout = 0.2, input_shape=(None, float_data.shape[-1])))
model.add(layers.Dense(1))

model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=20,
                              validation_data=val_gen, validation_steps=val_steps)
'''
'''
#循环差堆叠
model  = Sequential()
model.add(layers.GRU(32, dropout = 0.1, recurrent_dropout = 0.5, return_sequences=True, input_shape=(None, float_data.shape[-1])))
model.add(layers.GRU(64, activation='relu',dropout = 0.1, recurrent_dropout = 0.5))
model.add(layers.Dense(1))

model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=20,
                              validation_data=val_gen, validation_steps=val_steps)
'''

'''
#使用双向GRU
model = Sequential()
model.add(layers.Bidirectional(layers.GRU(32), input_shape=(None, float_data.shape[-1])))
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(), loss = 'mae')
history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=40,
                              validation_data=val_gen, validation_steps=val_steps)
'''
'''
#使用一维CNN
model = Sequential()
model.add(layers.Conv1D(32, 5, activation='relu', input_shape=(None, float_data.shape[-1])))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation='relu'))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation='relu'))
model.add(layers.GlobalMaxPool1D())
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(), loss = 'mae')
history = model.fit_generator(train_gen, steps_per_epoch=500, epochs = 20,
                              validation_data=val_gen, validation_steps=val_steps)
'''

'''
#一维卷积基与GRU融合
model  = Sequential()
model.add(layers.Conv1D(32, 5, activation='relu', input_shape=(None, float_data.shape[-1])))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation='relu'))
model.add(layers.GRU(32, dropout = 0.1, recurrent_dropout = 0.5))
model.add(layers.Dense(1))

model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=20,
                              validation_data=val_gen, validation_steps=val_steps)
'''
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1,len(loss) + 1)

plt.figure()
plt.plot(epochs, loss, 'bo', label = 'training loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation loss')
plt.title('Training and Validation loss')
plt.legend()

plt.show()

运行结果就不一一展示了,书上讲这个例子主要是让我们学习一下平时如何慢慢调整baseline。

  • 7
    点赞
  • 137
    收藏
    觉得还不错? 一键收藏
  • 12
    评论
评论 12
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值