导入结巴分词
from wordcloud import WordCloud
import jieba
# 词频计算
import jieba.analyse as analyse
from scipy.misc import imread
import os
from os import path
import matplotlib.pyplot as plt
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
放置字体(不然会报错:OSError: cannot open resource)
simsun.ttf
os.chdir('/Users/jacquelin/Documents/文件/中大/')
comment_text = open('中大维修描述.txt','r').read() #结巴分词
cut_text = " ".join(jieba.cut(comment_text))
result = jieba.analyse.textrank(cut_text, topK=1000, withWeight=True)
keywords = dict()
for i in result:
keywords[i[0]] = i[1]
wc = WordCloud(font_path="simsun.ttf", # 设置字体
background_color="white", # 背景颜色
max_words=1000, # 词云显示的最大词数
max_font_size=500, # 字体最大值
min_font_size=20, #字体最小值
random_state=42, #随机数
collocations=False, #避免重复单词
width=1600,height=1200,margin=10, #图像宽高,字间距
plt.figure(dpi=xx)放缩才有效
).generate(cut_text)
#wc.generate(cut_text)
plt.figure(dpi=100) #通过这里可以放大或缩小
plt.imshow(wc, interpolation='catrom',vmax=1000)
plt.axis("off") #隐藏坐标
plt.figure()
plt.show()
ps:
初学者绘制时只需要导入结巴分词部分、替换路径里的文件
在你将数据库或者自己爬取的文本复制进txt文本,需保存为utf-8格式