import requests
from lxml import etree
Headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36'
}
Url = 'https://movie.douban.com/top250'
web_data = requests.get(Url, headers=Headers)
web_html = etree.HTML(web_data.text)
#//*[@id="content"]/div/div[1]/ol/li[1]/div/div[2]/div[1]/a/span[1]
movie_name = web_html.xpath('//*[@id="content"]/div/div[1]/ol/li/div/div[2]/div[1]/a/span[1]')
# print(type(movie_name))
# print(movie_name[0])
print(movie_name[0].text)
1. 用xpath获取关键数据~!
//*[@id="content"]/div/div[1]/ol/li[1]/div/div[2]/div[1]/a/span[1]
2.最终得到结果
肖申克的救赎