# 双向LSTM

## 概观

1. 双向LSTM
2. 序列分类问题
3. LSTM序列分类
4. 双向LSTM序列分类
5. 将LSTM和双向LSTM作比较
6. 比较双向LSTM不同的合并模式

## 双向LSTM

– Mike Schuster和Kuldip K. Paliwal，Bidirectional Recurrent Neural Networks，1997

– Alex Graves和Jurgen Schmidhuber，Framangular Phoneme Classification with Bidirectional LSTM and Other Neural Network Architectures，2005

– Alex Graves和Jurgen Schmidhuber，Framangular Phoneme Classification with Bidirectional LSTM and Other Neural Network Architectures，2005

### Keras中的双向LSTM

Keras通过双向层包装器支持双向LSTM 。

• ‘ sum’：输出相加。
• ‘ mul’：输出相乘。
• ‘ concat’：输出连接在一起（默认），将输出数量提高到下一层的两倍。
• ‘ ave’：输出的平均值。

## 序列分类问题

 0.63144003 0.29414551 0.91587952 0.95189228 0.32195638 0.60742236 0.83895793 0.18023048 0.84762691 0.29165514

 0 0 0 1 1 1 1 1 1 1

 1 # create a sequence of random numbers in [0,1]
 2
 3 X= array([random()for _in range(10)])

 1 # calculate cut-off value to change class values
 2
 3 limit = 10/4.0

 1 pos1, pos1+pos2, pos1+pos2+pos3, …

 1 # determine the class outcome for each item in cumulative sequence
 2
 3 y = array([0 if x < limitelse 1 for x in cumsum(X)])

 01 from randomimport random
 02
 03 from numpyimport array
 04
 05 from numpyimport cumsum
 06
 07
 08
 09 # create a sequence classification instance
 10
 11 def get_sequence(n_timesteps):
 12
 13 # create a sequence of random numbers in [0,1]
 14
 15 X = array([random() for _ in range(n_timesteps)])
 16
 17 # calculate cut-off value to change class values
 18
 19 limit = n_timesteps/4.0
 20
 21 # determine the class outcome for each item in cumulative sequence
 22
 23 y = array([0 if x < limitelse 1 for x in cumsum(X)])
 24
 25 return X, y

 1 X, y = get_sequence(10)
 2
 3 print(X)
 4
 5 print(y)

 [ 0.22228819 0.26882207 0.069623 0.91477783 0.02095862 0.713225270.90159654 0.65000306 0.88845226 0.4037031 ][0 0 0 0 0 0 1 1 1 1]

## LSTM用于序列分类

 1 # reshape input and output data to be suitable for LSTMs
 2
 3 X = X.reshape(1, n_timesteps, 1)
 4
 5 y = y.reshape(1, n_timesteps, 1)

 01 # create a sequence classification instance
 02 def get_sequence(n_timesteps):
 03 # create a sequence of random numbers in [0,1]
 04 X= array([random()for _in range(n_timesteps)])
 05 # calculate cut-off value to change class values
 06 limit= n_timesteps/4.0
 07 # determine the class outcome for each item in cumulative sequence
 08 y= array([0 if x < limitelse 1 for xin cumsum(X)])
 09 # reshape input and output data to be suitable for LSTMs
 10 X= X.reshape(1, n_timesteps,1)
 11 y= y.reshape(1, n_timesteps,1)
 12 return X, y

 1 # define LSTM
 2
 3 model = Sequential()
 4
 5 model.add(LSTM(20, input_shape=(10, 1), return_sequences=True))
 6
 7 model.add(TimeDistributed(Dense(1, activation='sigmoid')))
 8
 9 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

LSTM将接受1000次训练。将为每次训练生成一个新的随机输入序列，以使网络适合。这样可以确保模型不会记住单个序列，也可以通过推广解决方案来解决所有可能的随机输入序列。

 01 # train LSTM
 02
 03 for epochin range(1000):
 04
 05 # generate new random sequence
 06
 07 X,y = get_sequence(n_timesteps)
 08
 09 # fit model for one epoch on this sequence
 10
 11 model.fit(X, y, epochs=1, batch_size=1, verbose=2)

 1 # evaluate LSTM
 2
 3 X,y = get_sequence(n_timesteps)
 4
 5 yhat = model.predict_classes(X, verbose=0)
 6
 7 for i in range(n_timesteps):
 8
 9 print('Expected:', y[0, i], 'Predicted', yhat[0, i])

 01 from randomimport random
 02
 03 from numpyimport array
 04
 05 from numpyimport cumsum
 06
 07 from keras.modelsimport Sequential
 08
 09 from keras.layersimport LSTM
 10
 11 from keras.layersimport Dense
 12
 13 from keras.layersimport TimeDistributed
 14
 15
 16
 17 # create a sequence classification instance
 18
 19 def get_sequence(n_timesteps):
 20
 21 # create a sequence of random numbers in [0,1]
 22
 23 X = array([random() for _ in range(n_timesteps)])
 24
 25 # calculate cut-off value to change class values
 26
 27 limit = n_timesteps/4.0
 28
 29 # determine the class outcome for each item in cumulative sequence
 30
 31 y = array([0 if x < limitelse 1 for x in cumsum(X)])
 32
 33 # reshape input and output data to be suitable for LSTMs
 34
 35 X = X.reshape(1, n_timesteps, 1)
 36
 37 y = y.reshape(1, n_timesteps, 1)
 38
 39 return X, y
 40
 41
 42
 43 # define problem properties
 44
 45 n_timesteps = 10
 46
 47 # define LSTM
 48
 49 model = Sequential()
 50
 51 model.add(LSTM(20, input_shape=(n_timesteps, 1), return_sequences=True))
 52
 53 model.add(TimeDistributed(Dense(1, activation='sigmoid')))
 54
 55 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
 56
 57 # train LSTM
 58
 59 for epochin range(1000):
 60
 61 # generate new random sequence
 62
 63 X,y = get_sequence(n_timesteps)
 64
 65 # fit model for one epoch on this sequence
 66
 67 model.fit(X, y, epochs=1, batch_size=1, verbose=2)
 68
 69 # evaluate LSTM
 70
 71 X,y = get_sequence(n_timesteps)
 72
 73 yhat = model.predict_classes(X, verbose=0)
 74
 75 for i in range(n_timesteps):
 76
 77 print('Expected:', y[0, i], 'Predicted', yhat[0, i])

 01 ...
 02
 03 Epoch1/1
 04
 05 0s - loss:0.2039 - acc:0.9000
 06
 07 Epoch1/1
 08
 09 0s - loss:0.2985 - acc:0.9000
 10
 11 Epoch1/1
 12
 13 0s - loss:0.1219 - acc:1.0000
 14
 15 Epoch1/1
 16
 17 0s - loss:0.2031 - acc:0.9000
 18
 19 Epoch1/1
 20
 21 0s - loss:0.1698 - acc:0.9000
 22
 23 Expected: [0] Predicted [0]
 24
 25 Expected: [0] Predicted [0]
 26
 27 Expected: [0] Predicted [0]
 28
 29 Expected: [0] Predicted [0]
 30
 31 Expected: [0] Predicted [0]
 32
 33 Expected: [0] Predicted [1]
 34
 35 Expected: [1] Predicted [1]
 36
 37 Expected: [1] Predicted [1]
 38
 39 Expected: [1] Predicted [1]
 40
 41 Expected: [1] Predicted [1]

## 双向LSTM用于序列分类

 1 model.add(Bidirectional(LSTM(20, return_sequences=True), input_shape=(n_timesteps, 1)))

 01 from randomimport random
 02 from numpyimport array
 03 from numpyimport cumsum
 04 from keras.modelsimport Sequential
 05 from keras.layersimport LSTM
 06 from keras.layersimport Dense
 07 from keras.layersimport TimeDistributed
 08 from keras.layersimport Bidirectional
 09 
 10 # create a sequence classification instance
 11 def get_sequence(n_timesteps):
 12 # create a sequence of random numbers in [0,1]
 13 X= array([random()for _in range(n_timesteps)])
 14 # calculate cut-off value to change class values
 15 limit= n_timesteps/4.0
 16 # determine the class outcome for each item in cumulative sequence
 17 y= array([0 if x < limitelse 1 for xin cumsum(X)])
 18 # reshape input and output data to be suitable for LSTMs
 19 X= X.reshape(1, n_timesteps,1)
 20 y= y.reshape(1, n_timesteps,1)
 21 return X, y
 22 
 23 # define problem properties
 24 n_timesteps= 10
 25 # define LSTM
 26 model= Sequential()
 27 model.add(Bidirectional(LSTM(20, return_sequences=True), input_shape=(n_timesteps,1)))
 28 model.add(TimeDistributed(Dense(1, activation='sigmoid')))
 29 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
 30 # train LSTM
 31 for epochin range(1000):
 32 # generate new random sequence
 33 X,y= get_sequence(n_timesteps)
 34 # fit model for one epoch on this sequence
 35 model.fit(X, y, epochs=1, batch_size=1, verbose=2)
 36 # evaluate LSTM
 37 X,y= get_sequence(n_timesteps)
 38 yhat= model.predict_classes(X, verbose=0)
 39 for iin range(n_timesteps):
 40 print('Expected:', y[0, i],'Predicted', yhat[0, i])

 01 ...
 02
 03 Epoch1/1
 04
 05 0s - loss:0.0967 - acc:0.9000
 06
 07 Epoch1/1
 08
 09 0s - loss:0.0865 - acc:1.0000
 10
 11 Epoch1/1
 12
 13 0s - loss:0.0905 - acc:0.9000
 14
 15 Epoch1/1
 16
 17 0s - loss:0.2460 - acc:0.9000
 18
 19 Epoch1/1
 20
 21 0s - loss:0.1458 - acc:0.9000
 22
 23 Expected: [0] Predicted [0]
 24
 25 Expected: [0] Predicted [0]
 26
 27 Expected: [0] Predicted [0]
 28
 29 Expected: [0] Predicted [0]
 30
 31 Expected: [0] Predicted [0]
 32
 33 Expected: [1] Predicted [1]
 34
 35 Expected: [1] Predicted [1]
 36
 37 Expected: [1] Predicted [1]
 38
 39 Expected: [1] Predicted [1]
 40
 41 Expected: [1] Predicted [1]

## 将LSTM与双向LSTM进行比较

1. LSTM（按原样）
2. 带有输入序列LSTM（例如，您可以通过将LSTM图层的“go_backwards”参数设置为“True”来实现）
3. 双向LSTM

 01 def get_lstm_model(n_timesteps, backwards):
 02
 03 model = Sequential()
 04
 05 model.add(LSTM(20, input_shape=(n_timesteps, 1), return_sequences=True, go_backwards=backwards))
 06
 07 model.add(TimeDistributed(Dense(1, activation='sigmoid')))
 08
 09 model.compile(loss='binary_crossentropy', optimizer='adam')
 10
 11 return model

 01 def get_bi_lstm_model(n_timesteps, mode):
 02
 03 model = Sequential()
 04
 05 model.add(Bidirectional(LSTM(20, return_sequences=True), input_shape=(n_timesteps, 1), merge_mode=mode))
 06
 07 model.add(TimeDistributed(Dense(1, activation='sigmoid')))
 08
 09 model.compile(loss='binary_crossentropy', optimizer='adam')
 10
 11 return model

 01 def train_model(model, n_timesteps):
 02
 03 loss = list()
 04
 05 for _ in range(250):
 06
 07 # generate new random sequence
 08
 09 X,y = get_sequence(n_timesteps)
 10
 11 # fit model for one epoch on this sequence
 12
 13 hist = model.fit(X, y, epochs=1, batch_size=1, verbose=0)
 14
 15 loss.append(hist.history['loss'][0])
 16
 17 return loss

 001 from randomimport random
 002
 003 from numpyimport array
 004
 005 from numpyimport cumsum
 006
 007 from matplotlibimport pyplot
 008
 009 from pandasimport DataFrame
 010
 011 from keras.modelsimport Sequential
 012
 013 from keras.layersimport LSTM
 014
 015 from keras.layersimport Dense
 016
 017 from keras.layersimport TimeDistributed
 018
 019 from keras.layersimport Bidirectional
 020
 021
 022
 023 # create a sequence classification instance
 024
 025 def get_sequence(n_timesteps):
 026
 027 # create a sequence of random numbers in [0,1]
 028
 029 X = array([random() for _ in range(n_timesteps)])
 030
 031 # calculate cut-off value to change class values
 032
 033 limit = n_timesteps/4.0