首先是下载glove文件
格式为txt,每一行开头是单词,后面是100个float类型数,空格隔开,因此我们载入这个文件,并取出每一行
def get_numpy_word_embed(word2ix):
row = 0
file = 'zhs_wiki_glove.vectors.100d.txt'
path = '/home/socialbird/platform/aion-autonlp/Downloads'
whole = os.path.join(path, file)
words_embed = {}
with open(whole, mode='r')as f:
lines = f.readlines()
for line in lines:
# print(line)
# print(len(line.split()))
line_list = line.split()
word = line_list[0]
embed = line_list[1:]
embed = [float(num) for num in embed]
words_embed[word] = embed
if row > 20000:
break
row += 1
# word2ix = {}
ix2word = {ix: w for w, ix in word2ix.items()}
id2emb = {}
for ix in range(l