1,使用到的第三方库
requests
BeautifulSoup 美味汤
worldcloud 词云
jieba 中文分词
matplotlib 绘图
2,代码实现部分
import requests
import wordcloud
import jieba
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
from pylab import mpl
#设置字体
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
url = 'https://s.weibo.com/top/summary?Refer=top_hot&topnav=1&wvr=6'
try:
#获取数据
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
soup = BeautifulSoup(r.text,'html.parser')
data = soup.find_all('a')
d_list = []
for item in data:
d_list.append(item.text)
words = d_list[4:-11:]
#中文分词
result = list(jieba.cut(words[0]))
for word in words[1::]:
result.extend(jieba.cut(word))
redata = []
for it in result:
if len(it) <= 1:
continue
else:
redata.append(it)
result_str = ' '.join(redata)
#输出词云图
font = r'C:\Windows\Fonts\simhei.ttf'
w = wordcloud.WordCloud(font_path=font,width=600,height=400)
w.generate(result_str)
w.to_file('微博热搜关键词词云.png')
key = list(set(redata))
x,y = [],[]
#筛选数据
for st in key:
count = redata.count(st)
if count <= 1:
continue
else:
x.append(st)
y.append(count)
x.sort()
y.sort()
#绘制结果图
plt.plot(x,y)
plt.show()
except Exception as e:
print(e)
3,运行结果