代码参考:
github:
2018 EMNLP JMEE:https://github.com/lx865712528/EMNLP2018-JMEE
2019 EMNLEP MOGANED:https://github.com/ll0iecas/MOGANED
一 加载词向量文件
# 加载词向量文件,文件里面包含了PAD、NONE的嵌入
def load_embedding(wordemb_path):
word2idx = {
}
wordemb = []
with open(wordemb_path,'r',encoding='utf-8') as f:
for line in f:
splt = line.split()
assert len(splt)==WORD_DIM+1
vector = list(map(float, splt[-WORD_DIM:]))
word = splt[0]
word2idx[word] = len(word2idx)
wordemb.append(vector)
return word2idx, torch.DoubleTensor(wordemb)
# 加载词向量文件,文件里面不包含了PAD、NONE的嵌入
# 返回:word2idx,idx2word,wordemb
# idx2word这个其实没什么用,但是个人觉得方便检查数据,就加上了
def load_embedding(word