'''
sent = [['I','am', 'a', 'student', '.'],['who', 'are', 'you','?'],['my', 'name', 'is', 'student']]
生成词汇集合
'''
def build_vocab(sentences):
# build vocabulary
word_counts = Counter(itertools.chain(*sentences))
# Mapping from index to word
vocabulary_inv = [x[0] for x in word_counts.most_common()] # most_common(n)返回一个TopN列表, 如果n没有被指定,则返回所以元素
python片段,生成语料中词以及词对应Id
最新推荐文章于 2024-06-03 15:41:12 发布