worlcloud 词云创建

最新推荐文章于 2024-06-16 18:30:53 发布

hello~bye~

最新推荐文章于 2024-06-16 18:30:53 发布

阅读量246

点赞数

分类专栏： python 文章标签： worldcloud

原文链接：https://blog.csdn.net/lili_wuwu/article/details/82632162

版权

python 专栏收录该内容

20 篇文章 1 订阅

订阅专栏

安装wordcloud

1、下载安装文件：https://www.lfd.uci.edu/~gohlke/pythonlibs/#wordcloud

注意：python命令看清楚自己的python版本和电脑64位还是32位，cp36代表的是python是3.6，同理其他。

Clipboard Image.png

2、Anaconda Prompt 下进入wordcloud的目录运行 pip install 文件名

Clipboard Image.png

第一个作业啦啦啦啦，感谢101python

Clipboard Image.png


 
 
   
   
    
    
   
   
   
   
    
    
     
     # coding:utf8
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     import sys
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     reload(sys)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     sys.setdefaultencoding(
     
     "utf8")
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     from PIL 
     
     import Image, ImageSequence
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     import numpy 
     
     as np
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     import matplotlib.pyplot 
     
     as plt
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     from wordcloud 
     
     import WordCloud, ImageColorGenerator
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     import os
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     def read_content(content_path):
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     '''读取目录下的所有文件并合并成一个内容块返回'''
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     #初始化内容为空
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         content=
     
     ''
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     #使用os模块的listdir函数枚举文件夹下的所有文件
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     for f 
     
     in os.listdir(content_path):
    
    
   
   

   
   
    
    
   
   
   
   
    
            
     
     #拼接文件完整路径
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
             file_fullpath=os.path.join(content_path,f)
    
    
   
   

   
   
    
    
   
   
   
   
    
            
     
     #判断是否是文件
    
    
   
   

   
   
    
    
   
   
   
   
    
            
     
     if os.path.isfile(file_fullpath):
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
                 print(
     
     'loading{}'.format(file_fullpath))
    
    
   
   

   
   
    
    
   
   
   
   
    
                
     
     #将文件内容进行拼接
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
                 content+=open(file_fullpath,
     
     'r').read()
    
    
   
   

   
   
    
    
   
   
   
   
    
                
     
     #每首歌词之间用换行符分割
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
                 content+=
     
     '\n'
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         print(
     
     'done loading')
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     return content
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     #读取文件夹内容
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     content=read_content(
     
     'E:/song/data')
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     print(
     
     '\n显示内容的前面部分...\n')
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     print(content[:
     
     99])
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     import jieba.analyse
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     # 这里使用jieba的textrank提取出1000个关键词及其比重
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     result = jieba.analyse.textrank(content, topK=
     
     1000, withWeight=
     
     True)
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     # 生成关键词比重字典
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     keywords = dict()
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     for i 
     
     in result:
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         keywords[i[
     
     0]] = i[
     
     1]
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     print(keywords)
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     # 初始化图片
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     image = Image.open(
     
     'E:/song/data/12.png')
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     graph = np.array(image)
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     # 生成云图，这里需要注意的是WordCloud默认不支持中文，所以这里需要加载中文黑体字库
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     # wc = WordCloud(font_path='E:/song/fonts/simkai.ttf', #路径不存在时读取系统字体路径
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     #     background_color='white', max_words=1000, mask=graph)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     wc = WordCloud(font_path=
     
     'C:\Windows\Fonts\simkai.ttf',
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
         background_color=
     
     'white', max_words=
     
     1000, mask=graph)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     wc.generate_from_frequencies(keywords)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     image_color = ImageColorGenerator(graph)
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     # 显示图片
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     plt.imshow(wc)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     plt.imshow(wc.recolor(color_func=image_color))
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     plt.axis(
     
     "off") 
     
     # 关闭图像坐标系
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     plt.show()