1.得到文本
这可以使用爬虫从税法网爬取文字,我技术不行,就不展示了。我使用的是直接下载一个文本
2.相关代码
# coding = utf-8
import jieba
from collections import Counter
from wordcloud import WordCloud
def get_words(txt):
dict1 = {}
seg_list = jieba.cut(txt)
c = Counter()
for x in seg_list:
if len(x) > 1 and x != '\r\n':
c[x] += 1
print('常用词频度统计结果')
for (k, v) in c.most_common(10):
dict1[k] = v
print('%s%s %s %d' % (' ' * (5 - len(k)), k, '*' * int(v / 3), v))
return dict1
def get_cloud(xxx):
wcd = WordCloud(background_color='pink', width=600, height=300, repeat=True, font_path="bS/simkai.ttf") # 这个字体需要先复制到当前文件夹
wcd.fit_words(xxx)
img = wcd.to_image()
img.show()
img.save('img/shuifa.PNG') # 保存图片
def drop_cloud_word():
jieba.suggest_freq('的')
if __name__ == '__main__':
with open('Txt/shuifa.txt', 'r', encoding='utf-8') as f: # 这个需要自己传入文件
txt = f.read()
xx = get_words(txt)
print(xx)
get_cloud(xx)