#目标url:https://item.jd.com/100002019841.html
#爬取百度今日热点事件排行榜
#今天时间短,爬个小玩意
import requests #自动爬去html页面,自动请求网络提交
from bs4 import BeautifulSoup #解析HTML/XMl页面,提取数据或信息
url = 'http://top.baidu.com/buzz?b=341&c=513&fr=topbuzz_b1'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
urls = requests.get(url, headers=headers)
urls.encoding = urls.apparent_encoding
text = urls.text
soup = BeautifulSoup(text, 'lxml') #网页解析器
"""
#爬取标题
for i in soup.find_all(class_="list-title"):
print(i.get_text())
"""
a = soup.find_all(class_="list-title")
aa = [i.get_text() for i in a]
"""
#搜索指数
for i in soup.find_all('td', class_="last"):
print(i.get_text().strip())
"""
q = soup.find_all('td', class_="last")
qq = [i.get_text().strip() for i in q]
#将一个一个功能找出来之后,将他们合并在一起
print('{:25}\t{}'.format('标题', '搜索指数'))
for i,y in zip(aa,qq):
print('{:20}\t{}\n'.format(i,y))
运行结果: