思路:读取源文档,形成分词列表,然后读取停用词,将不必要的词语进行去除,然后统计词频,词云参数设置,绘制词云图。
"""导入相关库"""
import jieba
import pandas as pd
from imageio import imread
from wordcloud import WordCloud
from matplotlib import pyplot as plt
"""读取源文件,然后形成分词列表"""
with open('E:/yuanwenjian.txt','r',encoding='utf-8') as f:
txt = f.read()
txt = txt.split()
data_cut = [jieba.lcut(x) for x in txt] #分词后结果,形式为二维列表(里面是列表)
all_words = [] #转化成一维列表(里面是字符串)
for i in data_cut:
all_words.extend(i)
# all_words.count('词语') #统计词频
"""读取停用词文档"""
with open("E:\\stopwords.txt",'r',encoding='utf-8') as f:
stop=f.read()
stop = stop.split()
stop = [' ']+stop
data_after = [[j for j in i if j not in stop] for i in data_cut] #判断是否为停用词
"""统计词频"""
all_words = []
for i in data_after:
all_words.extend(i)
num = pd.Series(all_words).value_counts()
"""读取背景图片"""
pic = imread('E:/素材/Logo/python.png')
"""词云参数"""
wc = WordCloud(background_color = 'white',font_path='C:\\Windows\\Fonts\\simkai.ttf',mask=pic)
'''
wc = WordCloud(background_color = 'white',
font_path='C:\\Windows\\Fonts\\simkai.ttf',
max_words=200,
max_font_size=10,
mask=pic)
'''
wc2 = wc.fit_words(num) #词频传入
"""词云展示"""
plt.figure(figsize=(9,9)) #图片的大小
plt.imshow(wc2)
plt.axis('off') #关闭坐标
plt.show()
wc.to_file("ciyun.png") #保存图片