#首先准备request库和lxml库
import requests
from lxml import etree
#豆瓣网址
url="https://movie.douban.com/chart"
#设置请求头防止反扒
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"
}
#发送请求
res = requests.get(url=url,headers=headers)
#设置编码格式
res.encoding='utf-8'
s = res.text
selector = etree.HTML(s)
#查看返回值。返回值200表明连接正常
print(res)
for item in selector.xpath('/html/body/div[3]'):
id=item.xpath('.//div[@class="name"]/a/text()')
wed_site = item.xpath('.//div[@class="name"]/a/@href')
print("豆瓣一周口碑榜单:")
for i in range(10):
print(id[i][21:25])
print(wed_site[i])```
#另附HTTP状态码对照表:https://blog.csdn.net/t_332741160/article/details/81408597
python爬虫爬取豆瓣一周榜单
最新推荐文章于 2022-09-28 21:40:46 发布