新浪微博的热搜榜网址是http://s.weibo.com/top/summary,总共有50条,如图所示
使用BeautifulSoup包,直接上代码:
import requests
import json
from lxml import html
from bs4 import BeautifulSoup
etree = html.etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
data = {
'cate': 'realtimehot'
}
try:
r = requests.get('http://s.weibo.com/top/summary?', params=data, headers=headers)
print(r.url)
if r.status_code == 200:
html = r.text
except:
html = ""
f =open("weibohotnews.txt", "w", encoding='utf-8')
soup = BeautifulSoup(html,'lxml')
tr = soup.find(id='pl_top_realtimehot').find_all('tr', class_="")
for i, item in enumerate(tr):
if i > 0:
# print(item)
title = item.find('a').get_text()
print(title)
num = item.find('span').get_text()
print(num)
id = item.find('td', class_="td-01 ranktop").get_text()
print(id)
f.write(id+'\t'+title+"\t"+num+'\n')