# //div[@class='billboard-bd']//a/text()
import requests
from lxml import etree
url = 'https://movie.douban.com/'
headers = {'User-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'}
with requests.request("GET", url, headers=headers) as response:
html = response.text
# print(html)
root = etree.HTML(html)
print(root.tag)
titles = root.xpath('//div[@class="billboard-bd"]//a/text()')
print(titles)
print(len(titles))
lxml,etree简单爬取数据
最新推荐文章于 2023-08-04 10:51:06 发布