参考链接: https://www.codenong.com/60246570/
报错原因:D:\software\Anaconda\envs\LDA\lib\site-packages\gensim\topic_coherence\direct_confirmation_measure.py:204: RuntimeWarning: divide by zero encountered in double_scalars
m_lr_i = np.log(numerator / denominator)
D:\software\Anaconda\envs\LDA\lib\site-packages\gensim\topic_coherence\indirect_confirmation_measure.py:323: RuntimeWarning: invalid value encountered in double_scalars
return cv1.T.dot(cv2)[0, 0] / (_magnitude(cv1) * _magnitude(cv2))
nan
数据准备
dataAll, data = data_dispose.loaddata()
# print(dataAll.content_cutted)
train = []
for line in dataAll.content_cutted:
line = [word.strip() for word in line.split(' ')]
train.append(line)
print(type(train))
# exit()
dictionary = corpora.Dictionary(train)
corpus = [dictionary.doc2bow(test) for test in train]
修改前
def coherence(num_topics):
lda = models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, passes=60, alpha=5, eta=0.01,
random_state=1)
print(lda.print_topics(num_topics=num_topics, num_words=10))
ldacm = models.CoherenceModel(model=lda, texts=corpus, dictionary=dictionary, coherence='c_v')
print(ldacm.get_coherence())
return ldacm.get_coherence()
修改完(将CoherenceModel函数中的texts参数值从corpus修改为train)
def coherence(num_topics):
lda = models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, passes=60, alpha=5, eta=0.01,
random_state=1)
print(lda.print_topics(num_topics=num_topics, num_words=10))
ldacm = models.CoherenceModel(model=lda, texts=train, dictionary=dictionary, coherence='c_v')
print(ldacm.get_coherence())
return ldacm.get_coherence()
错误原因:一致性模型需要原始文本,而不是输入到LDA_Model的训练语料库