# 图片加载使用
from PIL import Image
# 将图片转换为二进制数据流
import numpy as np
# jieba分词
import jieba
# 绘图
import matplotlib.pyplot as plt
# 词云, 图片处理器
from wordcloud import WordCloud, ImageColorGenerator
class WordCountUtil(object):
def __init__(self):
self.imagePath = ""
self.readImageFile = ""
self.saveImageFile = ""
self.ttfPath = ""
self.readTxtPath = ""
self.readTxtFile = ""
def readImage(self):
"""
通过PIL 进行图片读取
:return:
"""
return Image.open(self.imagePath + self.readImageFile)
def readTxt(self):
"""
将TXT文件全部数据读取出来
:return:
"""
with open(self.readTxtPath + self.readTxtFile, "r", encoding="UTF-8") as file:
return file.read()
def execute(self):
result = jieba.cut(self.readTxt())
arrImage = np.array(self.readImage())
wc = WordCloud(
# 指定字体文件路径
font_path=self.ttfPath,
# 指定词云图背景颜色,默认黑色
background_color="white",
# 定义词频背景
mask=arrImage,
# 指定词云中能显示的最多单词数
max_words=2000,
# 指定词云中字体最大字号
max_font_size=50,
random_state=100,
margin=5,
scale=8
)
wc.generate("/".join(result))
plt.figure("词云图")
plt.imshow(wc)
plt.axis("off")
plt.savefig(self.imagePath + self.saveImageFile)
plt.show()
if __name__ == "__main__":
result = WordCountUtil().execute()