from gensim.models import word2vec
import logging
from gensim import corpora,models,similarities
# logging.basicConfig(format="%(asctime)s : %(levelname)s : %(message)s",level=logging.INFO)
# raw_sentences = [" the quick brown fox jumps over the lazy dogs","yoyoyo you go home now to sleep"]
# sentences = [s.split() for s in raw_sentences]
# print(sentences)
# model = word2vec.Word2Vec(sentences , min_count= 1)
# print(model.similarity("dogs","you"))
a = [["一","一","二"],["一","二","三"]]
b = ["一","一","三","四","四"]
dictionary = corpora.Dictionary(a)
print(dictionary) # Dictionary(3 unique tokens: ['一', '三', '二'])
print(dictionary.dfs) # 字典 {单词id , 在多少文档中出现} {0: 2, 1: 2, 2: 1}
print(dictionary.num_docs) # 文档数 2
print(dictionary.items()) #
print(dictionary.id2token)
print(dictionary.token
gensim基础用法
最新推荐文章于 2022-10-30 14:34:45 发布