最近整理学习Python时写过的一些代码,为了方便以后查看,将其作为笔记记录下来。。。
模块安装
安装Wordcloud包。
pip install wordcloud
安装Jieba分词工具。
pip install jieba
生成词云
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import pickle
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import jieba
import codecs
import os
fin = codecs.open('诛仙.txt', 'r', encoding='utf-8')
print(len(fin.readlines()))
def wordCloud(stopwords):
"""
第一次运行时将分好的词存入text文件中,然后加载并创建词云
:param stopwords:
:return:
"""
text = ''
with codecs.open('诛仙.txt', "r", encoding='utf-8') as fin:
for i, line in enumerate(fin.readlines()):
line = line.strip()
text += segTool(line, stopwords)
text += ' '
# print(text)
fout = open('text', 'wb')
pickle.dump(text, fout)
fout.close()
# 直接从文件加载数据
fr = open('text', 'rb')
text = pickle.load(fr)
fr.close()
backgroud_Image = plt.imread('11.jpg')
wc = WordCloud(background_color='white', # 设置背景颜色
mask=backgroud_Image, # 设置背景图片
max_words=100, # 设置最大现实的字数
stopwords=STOPWORDS, # 设置停用词
font_path='qihei55.ttf', # 设置字体格式,如不设置,显示不了中文
max_font_size=100, # 设置字体最大值
random_state=30, # 设置有多少种随机生成状态,即有多少种配色方案
)
wc.generate(text)
# 改变字体颜色
image_colors = ImageColorGenerator(backgroud_Image)
# 字体颜色为背景图片的颜色
wc.recolor(color_func=image_colors)
# 绘制图片
plt.imshow(wc)
# 消除坐标抽
plt.axis('off')
# 显示图片
plt.show()
# 获得模块所在的路径的
d = os.path.dirname(__file__)
# d = os.getcwd()
# 将多个路径组合后返回
wc.to_file(os.path.join(d, "cy1.jpg"))
def segTool(line, stopwords):
""" 结巴分词,并去掉停用词 """
seg_list = jieba.cut(line, cut_all=False)
segResult = ""
for word in seg_list:
if word not in stopwords and len(word) > 1:
segResult += word
segResult += " "
return segResult.strip()
def stopwordslist(filepath):
""" 创建停用词list """
stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]
return stopwords
if __name__ == "__main__":
stopwords = stopwordslist('stopwords.txt')
wordCloud(stopwords)
效果图