词云 wordcloud库

镇长1998

已于 2022-06-22 21:04:14 修改

阅读量299

点赞数

分类专栏： python 文章标签：算法

于 2019-02-10 16:13:42 首次发布

本文链接：https://blog.csdn.net/weixin_41514525/article/details/86910652

版权

python 专栏收录该内容

32 篇文章 0 订阅

订阅专栏

import jieba
import collections
from zhon.hanzi import punctuation
from wordcloud import WordCloud
from imageio import imread

def filter_nouseful(x):
    return (x not in ['的', '了', '呢', '时', '且', '也', '是']) and (x not in punctuation)


mask = imread('bg.jpg')  #读入背景图片，背景图片除了白色部分都可以被单词填充

w = WordCloud(font_path='msyh.ttc', width=1000, height=700, background_color='white', mask=mask, collocations=False)

f = open('word.txt', 'r', encoding='utf-8')

t = f.read()   # read()一次把全部内容读入内存
f.close()      # 关闭文件句柄

ans = jieba.lcut(t)    #分词为精确分词模式

ans = list(filter(filter_nouseful, ans))

txt = ' '.join(ans)

word_counts = collections.Counter(ans)
word_counts_top20 = word_counts.most_common(20) # 获取前20最高频的词
print(word_counts_top20)

w.generate(txt)
w.to_file('wordcloud.png')

print('完成')