WordCloud(词云图)
词云图,也叫文字云,是对文本中出现频率较高的“关键词”予以视觉化的展现,词云图过滤掉大量的低频低质的文本信息,使得浏览者只要一眼扫过文本就可领略文本的主旨。
from pyecharts import WordCloud
name = [
'Sam S Club', 'Macys', 'Amy Schumer', 'Jurassic World', 'Charter Communications',
'Chick Fil A', 'Planet Fitness', 'Pitch Perfect', 'Express', 'Home', 'Johnny Depp',
'Lena Dunham', 'Lewis Hamilton', 'KXAN', 'Mary Ellen Mark', 'Farrah Abraham',
'Rita Ora', 'Serena Williams', 'NCAA baseball tournament', 'Point Break']
value = [
10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112,
965, 847, 582, 555, 550, 462, 366, 360, 282, 273, 265]
wordcloud = WordCloud(width=1000, height=300)
wordcloud.add("", name, value, word_size_range=[30, 70])
wordcloud
<div id="80912fc2768f47cd8f5446104f750146" style="width:1000px;height:300px;"></div>
names = ['无可奉告','身经百战','闷声发大财','jia']
wordcloud = WordCloud(width=1000, height=400)
wordcloud.add("", name, value, word_size_range=[20, 50],
shape='star')
wordcloud
- shape -> list
- 词云图轮廓,有’circle’, ‘cardioid’, ‘diamond’, ‘triangle-forward’, ‘triangle’, ‘pentagon’, 'star’可选
- word_gap -> int
- 单词间隔,默认为 20。
- word_size_range -> list
- 单词字体大小范围,默认为 [12, 60]。
- rotate_step -> int
- 旋转单词角度,默认为 45
词云图轮廓,有’circle’, ‘cardioid’, ‘diamond’, ‘triangle-forward’, ‘triangle’, ‘pentagon’, 'star’可选
wordcloud = WordCloud(width=1000, height=400)
wordcloud.add("", name, value, word_size_range=[20, 50],
shape='star')
wordcloud
<div id="3373f3b4ff7b4c34a0dc0a4334d6e9da" style="width:1000px;height:400px;"></div>
import os
def read_content(content_path):
'''
read all text files under the path
'''
content = ''
for f in os.listdir(content_path):
file_fullpath = os.path.join(content_path,f)
if os.path.isfile(file_fullpath):
print('loading {}'.format(file_fullpath))
content += open(file_fullpath, 'r', encoding='utf-8').read()
content += '\n'
print('loading is done!')
return content
content = read_content(r'.\词云文本')
loading .\词云文本\不为谁而作的歌.txt
loading .\词云文本\可惜没如果.txt
loading .\词云文本\她说.txt
loading .\词云文本\当你.txt
loading .\词云文本\江南.txt
loading .\词云文本\醉赤壁.txt
loading .\词云文本\飞云之下.txt
loading is done!
import jieba.analyse
result = jieba.analyse.textrank(content,topK=100,
withWeight=True
)
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-4-04574144ff8c> in <module>()
----> 1 import jieba.analyse
2
3 result = jieba.analyse.textrank(content,topK=100,
4 withWeight=True
5 )
ModuleNotFoundError: No module named 'jieba'
result[:5]
[('不到', 1.0),
('回忆', 0.7642400408081645),
('记忆', 0.6971421533457647),
('天黑', 0.6960071351347239),
('眼神', 0.6913764191971373)]
from pyecharts import WordCloud
wordcloud = WordCloud(width=1000, height=500)
name, value = wordcloud.cast(result)
wordcloud.add("", name, value, word_size_range=[20, 70], word_gap=7, shape='diamond')
wordcloud
<div id="937a07632045475481deda9a9378d41a" style="width:1000px;height:500px;"></div>
from pyecharts import Page
page = Page()
for s in ['circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon', 'star']:
wordcloud = WordCloud(title=s, width=1000, height=500)
wordcloud.add("", name, value, word_size_range=[10, 70], word_gap=10, shape=s)
page.add(wordcloud)
page.render(path='词云图不同图形展示.html')
# page