效果图展示:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from imageio import imread
import jieba.analyse
# 读取原始的报告文字
def read_text(file_path):
with open(file_path,mode='r',encoding='utf-8') as report_text:
return report_text.read()
# 导入中文停用词
jieba.analyse.set_stop_words('./chineseStopWords.txt')
def key_word(text_word):
word_list= jieba.analyse.extract_tags(text_word, topK=50, withWeight=True)
dic= {}
for word in word_list:
dic[word[0]] = word[1]
return dic
def paint_picture(dic,name):
font = r'C:\Windows\Fonts\msyh.ttf'
# 使用imread()函数读取外部词云形状图像文件
mk =imread("./中国地图.jpg")
poetry = WordCloud(mask= mk,width =800,height =400,
font_path = font,colormap ='rainbow',random_state =20, background_color='white')
picture = poetry.generate_from_frequencies(dic)
plt.imshow(picture)
plt.axis("off")
plt.title(str(name))
plt.savefig(str(name)+'.jpg',dpi=800)
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
file_path_2018 = r'./2018政府工作报告.txt'
file_path_2019 = r'./2019年政府工作报告.txt'
file_path_2020 = r'./2020年政府工作报告.txt'
report_text_2018 = read_text(file_path_2018)
report_text_2019 = read_text(file_path_2019)
report_text_2020 = read_text(file_path_2020)
report_text_2018_keyword = key_word(report_text_2018)
report_text_2019_keyword = key_word(report_text_2019)
report_text_2020_keyword = key_word(report_text_2020)
paint_picture(report_text_2018_keyword, name='2018政府工作报告')
paint_picture(report_text_2019_keyword, name='2019政府工作报告')
paint_picture(report_text_2020_keyword, name='2020政府工作报告')