import requests
from lxml import etree
url = 'https://movie.douban.com/'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
'referer': 'https://www.douban.com/'
}
response = requests.get(url, headers=headers)
text = response.text
html = etree.HTML(text)
movie_all = html.xpath("//div[@class='screening-bd']//li[@class='ui-slide-item' or @class='ui-slide-item s']")
for movie_all in movie_all:
movie_link = movie_all.xpath('.//li[@class="poster"]/a/@href')
movie_picture = movie_all.xpath('.//li[@class="poster"]//img/@src')
movie_title = movie_all.xpath('./@data-title')
movie_rating = movie_all.xpath('./@data-rate')
print(movie_link, movie_picture, movie_title, movie_rating)
通过网页定位得到正在热映的电影
通过xpath语法得到详情页和图片
打印爬取的结果