python中LDA主题模型可视化代码
import jieba
import pyLDAvis
import pyLDAvis.sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import pandas as pd
import warnings
warnings.filterwarnings(“ignore”)
df = pd.read_csv(r’C:/Users/HP/Desktop/数据/2.csv’)
df[‘评论’] = df[‘评论’].str.replace("[^\u4e00-\u9fa5]", “”)
newdf = df[‘评论’].apply(lambda x:’’.join(jieba.lcut(x)))
vectorizer = CountVectorizer()
doc_term_matrix = vectorizer.fit_transform(newdf)
lda_model = LatentDirichletAllocation()
lda_model.fit(doc_term_matrix)data = pyLDAvis.sklearn.prepare(lda_model, doc_term_matrix, vectorizer)
pyLDAvis.display(data)