import codecs
def read_glove_vecs(glove_file):
with open(glove_file, 'r', encoding='utf-8') as f:
words = set()
word_to_vec_map = {}
for line in f:
line = line.strip().split()
curr_word = line[0]
words.add(curr_word)
word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
return words, word_to_vec_map
def build_matrix(word_index, path):
w,embedding_index = read_glove_vecs(path)
embedding_matrix = np.zeros((len(word_index) + 1, 50))
for word, i in word_index.items():
try:
embedding_matrix[i] = embedding_index[word]
except KeyError:
pass
return embedding_matrix
# f为你下载下来的glove/fasttext训练好的模型
embedding_matrix = build_matrix(tokenizer.word_index, r'D:/Python/data/glove.6B.50d.txt')