from nltk.tokenize import RegexpTokenizer
from stop_words import get_stop_words
from nltk.stem.porter import PorterStemmer
from gensim import corpora, models
import gensim
import csv
import jieba
import codecs
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import pyLDAvis.gensim
from gensim import corpora
from gensim.models import LdaModel
def is_number(s):
try:
float(s)
return True
except ValueError:
pass
try:
import unicodedata
unicodedata.numeric(s)
return True
except (TypeError, ValueError):
pass
return False
info = []
def data_g(filename):
csv_reader = csv.reader(open(filename))
sta = 0
for row in csv_reader:
if sta == 0:
sta = 1
continue
if len(row[8]) != 0:
info.append(row[8])
data_g('C:\\Users\\imac\\Desktop\\2018\\bigdata\\py\\data.csv&#
LDA+可视化
最新推荐文章于 2020-08-31 20:17:27 发布
这段代码演示了如何利用jieba分词和gensim库进行LDA主题建模及可视化。首先,从CSV文件中读取数据,去除停用词和数字,然后创建词典和文档-词项矩阵。接着,训练LDA模型并打印主题。最后,使用pyLDAvis展示主题模型的可视化结果。
摘要由CSDN通过智能技术生成