main.py
#coding: utf-8
'''
程序运行前,需要用词云图文件夹下的wordcloud.py替换掉以下路径中的wordcloud.py
Anaconda3\Lib\site-packages\wordcloud
以上路径为pip install wordcloud下载包所在路径,如果涉及虚拟环境,则进入以下路径
Anaconda3\envs\***\Lib\site-packages\wordcloud
替换原因:原来的wordcloud.py无法返回生成的词语和频率
'''
import jieba.analyse
import jieba
import wordcloud
# import pandas as pd
def cut(text):
# 选择分词模式
word_list = jieba.cut(text, cut_all=True)
# 分词后在单独个体之间加上空格
result = " ".join(word_list)
# 返回分词结果
return result
if __name__ == '__main__':
# 载入自定义词典
jieba.load_userdict('dict.txt')
src = 'text.txt'
f = open(src, 'r', encoding='utf-8')
text = f.read()
text = cut(text)
# print(text)
w = wordcloud.WordCloud(font_path='msyh.ttc', width=1000, height=700, background_color='white')
w.generate(text)
words_dict = w.return_words # 字典
# for k, v in words.items():
# print(k)
w.to_file('grwordcloud.png')
wordcloud.py
class WordCloud(object):
def __init__(self,...)
...
self.return_words = {}
...
def generate_from_frequencies(self, frequencies, max_font_size=None):
...
for word, freq in frequencies:
...
self.return_words[word] = freq
...
...
return self