import jieba
import jieba.analyse as analyse
import gensim
from gensim import corpora, models, similarities
# 停用词表加载方法
def get_stopword_list():
# 停用词表存储路径,每一行为一个词,按行读取进行加载
# 进行编码转换确保匹配准确率
stop_word_path = './stopword.txt'
stopword_list = [sw.replace('\n', '') for sw in open(stop_word_path, encoding='utf-8').readlines()]
return stopword_list
import jieba
import jieba.analyse as analyse
import gensim
from gensim import corpora, models, similarities
# 停用词表加载方法
def get_stopword_list():
# 停用词表存储路径,每一行为一个词,按行读取进行加载
# 进行编码转换确保匹配准确率
stop_word_path = './stopword.txt'
stopword_list = [sw.replace('\n', '') for sw in open(stop_word_path, encoding='utf-8').readlines()]
return stopword_list
# 停用词
stop_word = get_stopword_list()
text = input()
# 分词
sentences = []
segs = jieba.lcut(text)
segs = list(filter(lambda x: x not in stop_word, segs))
sentences.append(segs)
# 构建词袋模型
dictionary = corpora.Dictionary(sentences)
corpus = [dictionary.doc2bow(sentence) for sentence in sentences]
result = ""
# 任务:使用gensim模块中的函数构造LDA模型,得出最佳主题词的分析结果保存到result变量中。
# ********** Begin *********#
lda = gensim.models.ldamodel.LdaModel(corpus=corpus,id2word=dictionary, num_topics=8)
result=lda.print_topic(1, topn=1)
# ********** End **********#
print(result.split('*')[1],end="")
一定要评测两遍