LSTM详解及示例

详情参考:LSTM基础教程


#LSTM预测示例
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
import sklearn.preprocessing as sk
import sklearn.metrics as sm
import time
import sys
import os
import matplotlib
from matplotlib import rc
import matplotlib.pyplot as pl
import random
import pickle

def getData():
    with open('./Data/data.npy', 'rb') as file_:
        data = np.load(file_)
    return data[::10,:-1]

def scaleData(X):
    scaler = sk.StandardScaler()
    scaler = scaler.fit(X)
    X_scaled = scaler.transform(X)
    return X_scaled, scaler

def generateData(X, lookback_time, pred_time):
    dataX, dataY = [], []
    for i in range(len(X) - lookback_time):
        dataX.append(X[i:i+lookback_time,:])
        dataY.append(X[i+lookback_time:i+lookback_time+pred_time,:])
    return np.array(dataX), np.array(dataY)

def computeLossFunction(Y, prediction, loss_weight, lambda_loss_amount):
    #output = sum(t**2)/2
    l2 = lambda_loss_amount*sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
    loss_ = tf.matmul(tf.reduce_mean(tf.squared_difference(prediction, Y), 0), loss_weight)/sum(loss_weight)
    loss = loss_[0][0] + l2
    return loss

def computeNumberOfModelParameters():
    total_parameters = 0
    for variable in tf.trainable_variables():
        shape = variable.get_shape()
        variable_parameters = 1
        for dim in shape:
            variable_parameters *= dim.value
        total_parameters += variable_parameters
    return total_parameters

cpu_time1 = time.time()

#Create folders for saving
save_dir = ['./Data/', './Figures/', 'Network']
for path in save_dir:
    if os.path.exists(path) is False:
        os.makedirs(path)
    
print('#'*10 + ' Generate data for train and test' + '#'*10)
data = getData()
data_train = data[:200000]
data_val = data[200000:220000]
data_mean = np.mean(data_train, axis=0)
datakc = data_train - np.matlib.repmat(data_mean, data_train.shape[0], 1)
Ek = np.mean(datakc**2, 0)
loss_weights = np.float32(np.reshape(np.sqrt(Ek), (-1, 1)))

input_size = data.shape[1]
output_size = input_size
lookback_time = 40
pred_time = 1
hidden_size = 20
number_of_layers = 5


X_train, Y_train = generateData(data_train, lookback_time, pred_time)
X_train_reshaped = np.reshape(X_train, (X_train.shape[0]*X_train.shape[1], input_size))
Y_train_reshaped = np.reshape(Y_train, (Y_train.shape[0]*Y_train.shape[1], output_size))
X_train_scaled, scaler_input = scaleData(X_train_reshaped)
Y_train_scaled, scaler_output = scaleData(Y_train_reshaped)
X_train = np.reshape(X_train_scaled, (X_train.shape[0], X_train.shape[1], input_size))
Y_train = np.reshape(Y_train_scaled, (Y_train.shape[0], Y_train.shape[1], output_size))

X_val, Y_val = generateData(data_val, lookback_time, pred_time)
X_val_reshaped = np.reshape(X_val, (X_val.shape[0]*X_val.shape[1], input_size))
Y_val_reshaped = np.reshape(Y_val, (Y_val.shape[0]*Y_val.shape[1], output_size))
X_val_scaled = scaler_input.transform(X_val_reshaped)
Y_val_scaled = scaler_output.transform(Y_val_reshaped)
X_val = np.reshape(X_val_scaled, (X_val.shape[0], X_val.shape[1], input_size))
Y_val = np.reshape(Y_val_scaled, (Y_val.shape[0], Y_val.shape[1], output_size))

#Is_training = True
Is_training = False

if Is_training:

    batch_size = 100
    print('#'*10 + ' Create the graph' + '#'*10)
          
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, [None, lookback_time, input_size], name='input_x')
    Y = tf.placeholder(tf.float32, [None, pred_time, output_size], name='input_y')
    states = tf.placeholder(tf.float32, [number_of_layers, 2, None, hidden_size], name='lstm_states')
    tuple_states = tuple([tf.contrib.rnn.LSTMStateTuple(states[i][0], states[i][1]) for i in range(number_of_layers)])
    lam = tf.placeholder(tf.float32, name='lam')

    with tf.variable_scope('lstm'):
        lstm_cells = []
        for _ in range(number_of_layers):
            lstm_cells.append(tf.contrib.rnn.BasicLSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True, activation=tf.nn.softsign))
    
        multi_lstm = tf.contrib.rnn.MultiRNNCell(cells=lstm_cells, state_is_tuple=True)
        lstm_outputs, states = tf.nn.dynamic_rnn(multi_lstm, X, dtype=tf.float32, initial_state=tuple_states)
        
    with tf.variable_scope('output_lstm'):
        weights = tf.get_variable('weights', shape=[hidden_size*pred_time, input_size], initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.get_variable('biases', shape=[pred_time, input_size], initializer=tf.constant_initializer(0.1))
    
        output_lstm = tf.matmul(lstm_outputs[:,-1,:], weights)
        output_lstm = tf.reshape(output_lstm, [tf.shape(output_lstm)[0], pred_time, input_size])
        prediction  = output_lstm + biases
    #0.95: 5e-6:2.5==>5e-7:5.6==>5e-8:7.3==>5e-9:7.3==>1e-6:2.5
    loss = computeLossFunction(Y, prediction, loss_weights, lam)
    loss_val = computeLossFunction(Y, prediction, loss_weights, 0.0)   
    
    learning_rate = 0.001
    decay_rate = 0.98
    global_step = tf.Variable(0, trainable=False)
    lr_adam = tf.train.exponential_decay(learning_rate, global_step, 1, decay_rate, staircase=False)
    
    optimizer = tf.train.AdamOptimizer(lr_adam)
    train = optimizer.minimize(loss)
    
    optimizer_train = tf.train.GradientDescentOptimizer(lr_adam)
    train_train = optimizer_train.minimize(loss)
    
    total_parameters = computeNumberOfModelParameters()
    print('# Number of parameters: {:}'.format(total_parameters))

    saver = tf.train.Saver(max_to_keep=10000)
    tf.add_to_collection('prediction', prediction)
    sess = tf.Session()
    
    no_of_batches = int(np.ceil(np.shape(X_train)[0]/batch_size))
    #n_train_per_epoch = X_train.shape[0]/4.0
    epoches = 250
    p = epoches #Patience

    Ni = [-9]
    for i in Ni:
        v = 0
        beta_ = 1*pow(10, i)
        parameters = '_LB=' + str(lookback_time) + '_HS=' + str(hidden_size) + '_beta_=' + str(beta_)
        print('\n# lambda:', beta_)
        print('#'*10 + ' Training' + '#'*10)
        init = tf.global_variables_initializer()
        sess.run(init)
        ep = 1
        k = 0
        v = 1e10
        resX, resLB, resLV = [], [], []
        while (ep<=epoches and k<=p):
            k_train = 1
            j_trained_before = 0
            ptr = 0
            for j in range(no_of_batches-1):
                states_batch = np.zeros((number_of_layers, 2, batch_size, hidden_size))
                X_train_batch = X_train[ptr:ptr+batch_size,:,:]
                Y_train_batch = Y_train[ptr:ptr+batch_size,:,:]
                
                feed_dict = {X:X_train_batch, Y:Y_train_batch, states:states_batch, global_step:ep, lam:beta_}
                if (j - j_trained_before == k_train):
                    j_trained_before = j
                    k_train = random.randrange(1, 5)
                    sess.run(train, feed_dict = feed_dict)
    
                ptr += batch_size
    
            loss_batch = sess.run(loss, feed_dict=feed_dict)
            
#            c_state_val, h_state_val = np.zeros((X_val.shape[0], hidden_size)), np.zeros((X_val.shape[0], hidden_size))
#            loss_epoch = sess.run(loss_val, feed_dict={X:X_val, Y:Y_val, c_state:c_state_val, h_state:h_state_val})
#            sys.stdout.write('\r# Val Epoch={:d} Loss_val={:.7f}'.format(ep, loss_epoch))
#            sys.stdout.flush()
            
#            if (loss_epoch <= v):
#                v = loss_epoch
#                k = 0
#                saver.save(sess, './Network/lstm'+parameters)
#                print('# Save the graph beta_={:}'.format(beta_))
#            else:
#                k += 1
    
#            resX.append(beta_)
#            resLB.append(loss_batch)
#            resLV.append(loss_epoch)
    
            ep += 1
            saver.save(sess, './Network/ex42_lstm'+parameters)
            print('# Save the graph beta_={:}'.format(beta_))
#        resX, resLB, resLV = np.array(resX), np.array(resLB), np.array(resLV)
#        LOSS = {'X':resX, 'LB':resLB, 'LV':resLV}
#        with open('./Data/lstm' + parameters + '_loss.pickle', 'wb') as file_:
#            pickle.dump(LOSS, file_)            

else:
    beta_ = 1*pow(10, -9)
    h = 0.01
    parameters = '_LB=' + str(lookback_time) + '_HS=' + str(hidden_size) + '_beta_=' + str(beta_)
    
    stand = np.sqrt(np.mean(pow(np.linalg.norm(data, axis=1), 2)))
    tf.reset_default_graph()
    sess = tf.Session()
    new_saver = tf.train.import_meta_graph('./Network/ex42_lstm'+parameters+'.meta')
    new_saver.restore(sess, './Network/ex42_lstm'+parameters)
    prediction = tf.get_collection('prediction')[0]
    
    graph = tf.get_default_graph()
    input_x = graph.get_operation_by_name('input_x').outputs[0]
    lstm_c = graph.get_operation_by_name('lstm_c').outputs[0]
    lstm_h = graph.get_operation_by_name('lstm_h').outputs[0]
    
    for k in range(227000, 228000, 1600):
        TEST_EXAMPLES = 2000
        X_test = data[k:k+lookback_time,:]
        X_test_scaled = scaler_input.transform(X_test)
        Y_test = data[k+lookback_time:k+lookback_time+TEST_EXAMPLES,:]
        
        batch_size = 1
        sys.stdout.write('\r'+'#'*10+' Test '+str(k)+' '+'#'*10)
        sys.stdout.flush()
        result, real, err, dif = [], [], [], []
        x_test_batch = X_test_scaled.reshape((batch_size, lookback_time, input_size))
        for i in range(TEST_EXAMPLES):
            c_state_test, h_state_test = np.zeros((batch_size, hidden_size)), np.zeros((batch_size, hidden_size))
            y_test_batch = sess.run(prediction, feed_dict={input_x:x_test_batch, lstm_c:c_state_test, lstm_h:h_state_test})
            y_test_batch_reshaped = np.reshape(y_test_batch, [-1, output_size])
            y_test_batch_scaled = scaler_output.inverse_transform(y_test_batch_reshaped)
            
#            x_test_batch_last = np.reshape(x_test_batch[:,-1,:], (batch_size, input_size))
#            x_test_batch_last_scaled = scaler_input.inverse_transform(x_test_batch_last)
#            x_pred = x_test_batch_last_scaled + y_test_batch_scaled
            x_pred = y_test_batch_scaled
            
            result.append(x_pred)
            real.append(Y_test[i])
            error = np.linalg.norm(x_pred - Y_test[i])/stand
            err.append(error)
            dif.append(x_pred - Y_test[i])

            x_feed_scaled = scaler_input.transform(x_pred)
            x_feed_scaled = np.reshape(x_feed_scaled, (1, pred_time, input_size))
            x_test_batch = np.concatenate((x_test_batch, x_feed_scaled), axis=1)
            x_test_batch = x_test_batch[:, pred_time:lookback_time+pred_time,:]
        
        result = np.reshape(np.array(result), (-1, output_size))
        real = np.reshape(np.array(real), (-1, output_size))
        err = np.array(err).reshape(-1, 1)
        dif = np.reshape(np.array(dif), (-1, output_size))
        
        err_s = [j for j, err_e in enumerate(err) if err_e > 0.1]
        if len(err_s) != 0:
            tv = err_s[0]*h
        else:
            tv = len(real)*h
            
    T_pre = np.linspace(0, h*len(real), len(real))
    figsize = 10, 10
    figure, ax = pl.subplots(figsize=figsize)
    pl.rc("text", usetex=True)
    
    font1 = {'family' : 'Times New Roman',  
    'weight' : 'normal',  
    'size'   : 18,  }
      
    font2 = {'family' : 'Times New Roman',  
    'weight' : 'normal',  
    'size'   : 30,  
    }  
    ax1 = pl.subplot(411)
    pl.plot(T_pre, result[:,0], '-r', label=r'$Predicted$', linewidth=2)
    pl.plot(T_pre, real[:,0], '--b', label=r'$Actual$', linewidth=2)
    pl.legend(loc=1, ncol=2, prop=font1)
    pl.ylabel(r'$u(t)$', font2)
    pl.xticks([])
    pl.xlim(0, h*len(real))
    pl.tick_params(labelsize=20)  
    pl.title('$(a)$', fontsize=30, loc='left')
    labels = ax1.get_xticklabels() + ax1.get_yticklabels()  
    [label.set_fontname('Times New Roman') for label in labels]  
    ax1.set_xticks([])
    
    ax1 = pl.subplot(412)
    pl.plot(T_pre, result[:,1], '-r', label=r'$Predicted$', linewidth=2)
    pl.plot(T_pre, real[:,1], '--b', label=r'$Actual$', linewidth=2)
    pl.ylabel(r'$v(t)$', font2)
    pl.xticks([])
    pl.xlim(0, h*len(real))
    pl.tick_params(labelsize=20)  
    labels = ax1.get_xticklabels() + ax1.get_yticklabels()  
    [label.set_fontname('Times New Roman') for label in labels]  
    ax1.set_xticks([])
    
    ax1 = pl.subplot(413)
    pl.plot(T_pre, result[:,2], '-r', label=r'$Predicted$', linewidth=2)
    pl.plot(T_pre, real[:,2], '--b', label=r'$Actual$', linewidth=2)
    pl.ylabel(r'$\theta(t)$', font2)
    pl.xticks([])
    pl.xlim(0, h*len(real))
    pl.tick_params(labelsize=20)  
    labels = ax1.get_xticklabels() + ax1.get_yticklabels()  
    [label.set_fontname('Times New Roman') for label in labels]  
    ax1.set_xticks([])
    
    ax4 = pl.subplot(414)
    pl.plot(T_pre, err, '-r', linewidth=2)
    pl.axvline(tv, color='k', linewidth=2)
    pl.axhline(0.1, color='k', linewidth=2)
    pl.ylabel(r'$E(t)$', font2)
    pl.xlabel(r'$t$', font2)
    pl.ylim(0, 2)
    pl.xlim(0, h*len(real))
    pl.tick_params(labelsize=20)  
    labels = ax4.get_xticklabels() + ax4.get_yticklabels()  
    [label.set_fontname('Times New Roman') for label in labels]  
    #ax3.set_xticks([])
    
    pl.savefig('./Figures/ex42_lstm'+parameters+'.png', dpi=600)
    pl.savefig('./Figures/ex42_lstm'+parameters+'.pdf', dpi=600)

cpu_time2 = time.time()
print("Time Used :{:.2f}s".format(cpu_time2 - cpu_time1))

注:以上内容仅供参考,转载请注明出处。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

sinat_38707640

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值