简单的LSTM问题,能够预测一句话的下一个字词是什么
固定长度的句子,一个句子有3个词。
使用one-hot编码
各种引用
import keras from keras.models import Sequential from keras.layers import LSTM, Dense, Dropout import numpy as np
数据预处理
data = 'abcdefghijklmnopqrstuvwxyz' data_set = set(data) word_2_int = {b:a for a,b in enumerate(data_set)} int_2_word = {a:b for a,b in enumerate(data_set)} word_len = len(data_set) print(word_2_int) print(int_2_word)
一些辅助函数
def words_2_ints(words): ints = [] for itmp in words: ints.append(word_2_int[itmp]) return ints print(words_2_ints('ab')) def words_2_one_hot(words, num_classes=word_len): return keras.utils.to_categorical(words_2_ints(words), num_classes=num_classes) print(words_2_one_hot('a')) def get_one_hot_max_idx(one_hot): idx_ = 0 max_ = 0 for i in range(len(on