import pandas as pd from gensim import corpora from gensim.models import LdaModel # 读取文本数据 df = pd.read_excel('新闻情感分析结果.xlsx') combined_texts = df['Combined Text'].tolist() # 准备文档集合 documents = combined_texts # 构建词袋模型 texts = [[word for word in document.split()] for document in documents] dictionary = corpora.Dictionary(texts) corpus = [dictionary.doc2bow(text) for text in texts] # 创建LDA模型 num_topics = 8 lda_model = LdaModel(corpus, num_topics=num_topics, id2word=dictionary, passes=20, iterations=100) # 显示带有权重的主题及其相关的词汇 topics = lda_model.print_topics(num_words=10) # 每个主题显示前10个相关词汇 for topic in topics: print(topic) # 退出代码为 0 表示正常结束 print("进程已结束,退出代码为 0")
LDA主题关键词挖掘,关键词带权重版
最新推荐文章于 2024-02-25 15:28:09 发布