import requests
from lxml import etree
# 拿到页面源代码
url = "https://www.zbj.com/fw/?k=Python"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
}
# 提取和解析数据
resp = requests.get(url, headers=headers)
# print(resp.text)
# 解析
html = etree.HTML(resp.text)
# 拿到每个服务商的div
dives = html.xpath("//*[@id='__layout']/div/div[3]/div[1]/div[4]/div/div[2]/div/div[2]/div")
for div in dives: # 每一个服务商的div
price = div.xpath(".//div[@class='price']//span/text()")[0].strip("¥")
title = 'python'.join(div.xpath(".//div[@class='name-pic-box']//span/text()"))
con_name = div.xpath(".//div[@class='shop-detail']//div/text()")
address = div.xpath(".//div[@class='tabstwo']//span[2]/text()")
print(address)
resp.close()
Python爬虫 学习Xpath 爬取zbj网站 部分信息
于 2024-09-02 16:01:40 首次发布