2020-09-23

一个基于keras实现seq2seq(Encoder-Decoder)的序列预测例子

  • 序列预测问题描述:
  • 输入序列为随机产生的整数序列,目标序列是对输入序列前三个元素进行反转后的序列,当然这只是我们自己定义的一种形式,可以自定义更复杂的场景。
输入序列		                目标序列
[13, 28, 18, 7, 9, 5]		[18, 28, 13]
[29, 44, 38, 15, 26, 22]	[38, 44, 29]
[27, 40, 31, 29, 32, 1]		[31, 40, 27]
...
  • 输入序列与目标序列的构造及向量化
from numpy import array
from numpy import argmax
from keras.utils import to_categorical
 
# 随机产生在(1,n_features)区间的整数序列,序列长度为n_steps_in
def generate_sequence(length, n_unique):
	return [randint(1, n_unique-1) for _ in range(length)]
 
# 构造LSTM模型输入需要的训练数据
def get_dataset(n_in, n_out, cardinality, n_samples):
	X1, X2, y = list(), list(), list()
	for _ in range(n_samples):
		# 生成输入序列
		source = generate_sequence(n_in, cardinality)
		# 定义目标序列,这里就是输入序列的前三个数据
		target = source[:n_out]
		target.reverse()
		# 向前偏移一个时间步目标序列
		target_in = [0] + target[:-1]
		# 直接使用to_categorical函数进行on_hot编码
		src_encoded = to_categorical(source, num_classes=cardinality)
		tar_encoded = to_categorical(target, num_classes=cardinality)
		tar2_encoded = to_categorical(target_in, num_classes=cardinality)
		
		X1.append(src_encoded)
		X2.append(tar2_encoded)
		y.append(tar_encoded)
	return array(X1), array(X2), array(y)
 
# one_hot解码
def one_hot_decode(encoded_seq):
	return [argmax(vector) for vector in encoded_seq]
 
# 输入参数
n_features = 50 + 1
n_steps_in = 6
n_steps_out = 3
# 生成处理后的输入序列与目标序列,这里测试产生了一个序列样本
X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1)
print(X1.shape, X2.shape, y.shape)
print('X1=%s, X2=%s, y=%s' % (one_hot_decode(X1[0]), one_hot_decode(X2[0]), one_hot_decode(y[0])))

# 结果如下:
(1, 6, 51) (1, 3, 51) (1, 3, 51)
X1=[32, 16, 12, 34, 25, 24], X2=[0, 12, 16], y=[12, 16, 32]
  • 构造Seq2Seq模型,参考官网 github
# 构造Seq2Seq训练模型model, 以及进行新序列预测时需要的的Encoder模型:encoder_model 与Decoder模型:decoder_model
def define_models(n_input, n_output, n_units):
	# 训练模型中的encoder
	encoder_inputs = Input(shape=(None, n_input))
	encoder = LSTM(n_units, return_state=True)
	encoder_outputs, state_h, state_c = encoder(encoder_inputs)
	encoder_states = [state_h, state_c]   #仅保留编码状态向量
	# 训练模型中的decoder
	decoder_inputs = Input(shape=(None, n_output))
	decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
	decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
	decoder_dense = Dense(n_output, activation='softmax')
	decoder_outputs = decoder_dense(decoder_outputs)
	model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
	# 新序列预测时需要的encoder
	encoder_model = Model(encoder_inputs, encoder_states)
	# 新序列预测时需要的decoder
	decoder_state_input_h = Input(shape=(n_units,))
	decoder_state_input_c = Input(shape=(n_units,))
	decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
	decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
	decoder_states = [state_h, state_c]
	decoder_outputs = decoder_dense(decoder_outputs)
	decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    # 返回需要的三个模型
	return model, encoder_model, decoder_model
  • 序列预测过程
def predict_sequence(infenc, infdec, source, n_steps, cardinality):
	# 输入序列编码得到编码状态向量
	state = infenc.predict(source)
	# 初始目标序列输入:通过开始字符计算目标序列第一个字符,这里是0
	target_seq = array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
	# 输出序列列表
	output = list()
	for t in range(n_steps):
		# predict next char
		yhat, h, c = infdec.predict([target_seq] + state)
		# 截取输出序列,取后三个
		output.append(yhat[0,0,:])
		# 更新状态
		state = [h, c]
		# 更新目标序列(用于下一个词预测的输入)
		target_seq = yhat
	return array(output)
  • 模型结果评估
# 评估模型效果
total, correct = 100, 0
for _ in range(total):
	X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1)
	target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features)
	if array_equal(one_hot_decode(y[0]), one_hot_decode(target)):
		correct += 1
print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))

  • 模型完整代码
from numpy import array
from numpy import argmax
from numpy import array_equal
from keras.utils import to_categorical
from keras.models import Model
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Dense
 
# 随机产生在(1,n_features)区间的整数序列,序列长度为n_steps_in
def generate_sequence(length, n_unique):
	return [randint(1, n_unique-1) for _ in range(length)]
 
# 构造LSTM模型输入需要的训练数据
def get_dataset(n_in, n_out, cardinality, n_samples):
	X1, X2, y = list(), list(), list()
	for _ in range(n_samples):
		# 生成输入序列
		source = generate_sequence(n_in, cardinality)
		# 定义目标序列,这里就是输入序列的前三个数据
		target = source[:n_out]
		target.reverse()
		# 向前偏移一个时间步目标序列
		target_in = [0] + target[:-1]
		# 直接使用to_categorical函数进行on_hot编码
		src_encoded = to_categorical(source, num_classes=cardinality)
		tar_encoded = to_categorical(target, num_classes=cardinality)
		tar2_encoded = to_categorical(target_in, num_classes=cardinality)
		
		X1.append(src_encoded)
		X2.append(tar2_encoded)
		y.append(tar_encoded)
	return array(X1), array(X2), array(y)
 
# 构造Seq2Seq训练模型model, 以及进行新序列预测时需要的的Encoder模型:encoder_model 与Decoder模型:decoder_model
def define_models(n_input, n_output, n_units):
	# 训练模型中的encoder
	encoder_inputs = Input(shape=(None, n_input))
	encoder = LSTM(n_units, return_state=True)
	encoder_outputs, state_h, state_c = encoder(encoder_inputs)
	encoder_states = [state_h, state_c]   #仅保留编码状态向量
	# 训练模型中的decoder
	decoder_inputs = Input(shape=(None, n_output))
	decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
	decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
	decoder_dense = Dense(n_output, activation='softmax')
	decoder_outputs = decoder_dense(decoder_outputs)
	model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
	# 新序列预测时需要的encoder
	encoder_model = Model(encoder_inputs, encoder_states)
	# 新序列预测时需要的decoder
	decoder_state_input_h = Input(shape=(n_units,))
	decoder_state_input_c = Input(shape=(n_units,))
	decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
	decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
	decoder_states = [state_h, state_c]
	decoder_outputs = decoder_dense(decoder_outputs)
	decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    # 返回需要的三个模型
	return model, encoder_model, decoder_model
 
def predict_sequence(infenc, infdec, source, n_steps, cardinality):
	# 输入序列编码得到编码状态向量
	state = infenc.predict(source)
	# 初始目标序列输入:通过开始字符计算目标序列第一个字符,这里是0
	target_seq = array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
	# 输出序列列表
	output = list()
	for t in range(n_steps):
		# predict next char
		yhat, h, c = infdec.predict([target_seq] + state)
		# 截取输出序列,取后三个
		output.append(yhat[0,0,:])
		# 更新状态
		state = [h, c]
		# 更新目标序列(用于下一个词预测的输入)
		target_seq = yhat
	return array(output)
 
# one_hot解码
def one_hot_decode(encoded_seq):
	return [argmax(vector) for vector in encoded_seq]
 
# 参数设置
n_features = 50 + 1
n_steps_in = 6
n_steps_out = 3
# 定义模型
train, infenc, infdec = define_models(n_features, n_features, 128)
train.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
# 生成训练数据
X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 100000)
print(X1.shape,X2.shape,y.shape)
# 训练模型
train.fit([X1, X2], y, epochs=1)
# 评估模型效果
total, correct = 100, 0
for _ in range(total):
	X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1)
	target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features)
	if array_equal(one_hot_decode(y[0]), one_hot_decode(target)):
		correct += 1
print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))
# 查看预测结果
for _ in range(10):
	X1, X2, y = get_dataset(n_steps_in, n_steps_out, n_features, 1)
	target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features)
	print('X=%s y=%s, yhat=%s' % (one_hot_decode(X1[0]), one_hot_decode(y[0]), one_hot_decode(target)))
  • 结果展示
100000/100000 [==============================] - 28s 284us/step - loss: 0.6441 - acc: 0.7941
Accuracy: 99.00%
X=[16, 14, 28, 22, 9, 17] y=[28, 14, 16], yhat=[28, 14, 16]
X=[48, 36, 32, 49, 13, 5] y=[32, 36, 48], yhat=[32, 36, 48]
X=[27, 37, 6, 50, 30, 16] y=[6, 37, 27], yhat=[6, 37, 27]
X=[7, 7, 4, 45, 6, 16] y=[4, 7, 7], yhat=[4, 7, 7]
X=[4, 25, 33, 34, 21, 12] y=[33, 25, 4], yhat=[33, 25, 4]
X=[44, 44, 9, 35, 5, 16] y=[9, 44, 44], yhat=[9, 44, 44]
X=[20, 29, 3, 7, 17, 29] y=[3, 29, 20], yhat=[3, 29, 20]
X=[38, 45, 5, 35, 46, 21] y=[5, 45, 38], yhat=[5, 45, 38]
X=[16, 10, 48, 27, 39, 3] y=[48, 10, 16], yhat=[48, 10, 16]
X=[16, 45, 7, 45, 29, 25] y=[7, 45, 16], yhat=[45, 45, 16]

参考文献

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值