python词云图—政府工作报告的重点词汇展示

最新推荐文章于 2024-01-22 13:37:05 发布

just_begain

最新推荐文章于 2024-01-22 13:37:05 发布

阅读量621

点赞数 2

文章标签： python 数据可视化

本文链接：https://blog.csdn.net/just_begain/article/details/108024549

版权

效果图展示：
在这里插入图片描述

在这里插入图片描述

from wordcloud import WordCloud
import matplotlib.pyplot as plt
from imageio import imread
import jieba.analyse

# 读取原始的报告文字
def read_text(file_path):
    with open(file_path,mode='r',encoding='utf-8') as report_text:
        return report_text.read()

# 导入中文停用词
jieba.analyse.set_stop_words('./chineseStopWords.txt')

def key_word(text_word):
    word_list= jieba.analyse.extract_tags(text_word, topK=50, withWeight=True)
    dic= {}
    for word in word_list:
        dic[word[0]] = word[1]
    return dic

def paint_picture(dic,name):
    font = r'C:\Windows\Fonts\msyh.ttf'
# 使用imread()函数读取外部词云形状图像文件    
    mk =imread("./中国地图.jpg")
    poetry = WordCloud(mask= mk,width =800,height =400,
                   font_path = font,colormap ='rainbow',random_state =20, background_color='white')
    picture = poetry.generate_from_frequencies(dic)
    plt.imshow(picture)
    plt.axis("off")
    plt.title(str(name))
    plt.savefig(str(name)+'.jpg',dpi=800)

plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签

file_path_2018 = r'./2018政府工作报告.txt'
file_path_2019 = r'./2019年政府工作报告.txt'
file_path_2020 = r'./2020年政府工作报告.txt'   

report_text_2018 = read_text(file_path_2018)
report_text_2019 = read_text(file_path_2019)
report_text_2020 = read_text(file_path_2020)

report_text_2018_keyword = key_word(report_text_2018)
report_text_2019_keyword = key_word(report_text_2019)
report_text_2020_keyword = key_word(report_text_2020)

paint_picture(report_text_2018_keyword, name='2018政府工作报告')
paint_picture(report_text_2019_keyword, name='2019政府工作报告')
paint_picture(report_text_2020_keyword, name='2020政府工作报告')