获取百度热搜数据30

import requests
from bs4 import BeautifulSoup
import re
from sendEmail2 import SendQQEmail



url = 'https://top.baidu.com/board?tab=realtime'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
}

r = requests.get(url, headers=headers)
# with open('HotSearch.html', 'w', encoding='utf-8') as f:
#     f.write(r.text)

#soup = BeautifulSoup(open('./HotSearch.html', 'r', encoding='utf-8'), "html.parser")
soup = BeautifulSoup(r.text, "html.parser")

#print(soup.prettify())


 #排行:
rank_div = soup.select('.category-wrap_iQLoo')
print(len(rank_div))
ranking = []
patten = re.compile('> (\d+) <')
for i in rank_div:
   # print(i.select('.index_1Ew5p'))
    s = ''.join("%s" %s for s in (i.select('.index_1Ew5p')))
    ranking.append(patten.search(s).group(1))
print(ranking)

# -----------------标题------------------------
title = []
title_div = soup.select('.c-single-text-ellipsis')
for i in title_div:
    #print(i)
    #print(i.string)
    title.append(i.string.replace(' ', ''))
print(title)

#  -----------------内容,链接------------------------

summary = []
href = []
patten = re.compile('> (.+?) <a')
summary_div = soup.select('.large_nSuFU ')
for i in summary_div:
    #print(i)
    #print(type(i))
    #print(i.a['href'])
    href.append(i.a['href'])
    #print('----------------------------------------')
    if patten.search(str(i)):
        summary.append(patten.search(str(i)).group(1))
    else:
        summary.append(" ")
print(summary)
print(href)

#  -----------------热搜指数------------------------

hot = []
hot_div = soup.select('.hot-index_1Bl1a')
for i in hot_div:
    #print(i)
    #print(i.string)
    hot.append(i.string.replace(' ',''))

print(hot)



  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值