import jieba
import collections
from zhon.hanzi import punctuation
from wordcloud import WordCloud
from imageio import imread
def filter_nouseful(x):
return (x not in ['的', '了', '呢', '时', '且', '也', '是']) and (x not in punctuation)
mask = imread('bg.jpg') #读入背景图片,背景图片除了白色部分都可以被单词填充
w = WordCloud(font_path='msyh.ttc', width=1000, height=700, background_color='white', mask=mask, collocations=False)
f = open('word.txt', 'r', encoding='utf-8')
t = f.read() # read()一次把全部内容读入内存
f.close() # 关闭文件句柄
ans = jieba.lcut(t) #分词为精确分词模式
ans = list(filter(filter_nouseful, ans))
txt = ' '.join(ans)
word_counts = collections.Counter(ans)
word_counts_top20 = word_counts.most_common(20) # 获取前20最高频的词
print(word_counts_top20)
w.generate(txt)
w.to_file('wordcloud.png')
print('完成')
词云 wordcloud库
于 2019-02-10 16:13:42 首次发布