1.确定目标url:‘httpshttps://jms.zu.anjuke.com/?kw=’ get请求
2.发起请求,获得响应
3.使用xpath
import requests
from lxml import etree
headers1={
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'
}
url1='https://jms.zu.anjuke.com/?kw='
response=requests.get(url1,headers=headers1)
tree=etree.HTML(response.text)
#分析发现 每一条数据 都是在div[class='zu-itemmod clearfix']元素下,我们只要定位到这个div就可以拿到每一条数据
div_list=tree.xpath('//div[@class="zu-itemmod clearfix"]')
#print(len(div_list))
for data in div_list :
title=data.xpath('.//div[@class="zu-info"]/h3/a/b/text()')#标题
info = data.xpath('.//div[@class="zu-info"]/p[@class="details-item tag"]//text()')#房屋详情
address = data.xpath('.//div[@class="zu-info"]/address[@class="details-item tag"]//text()')#房屋地址
label = data.xpath('.//div[@class="zu-info"]/p[@class="details-item bot-tag"]//text()')#房屋标签
price = data.xpath('.//div[@class="zu-side"]//text()')[1]#价格
title1 = ''.join(title).replace('\n', '').replace(' ', '')
info1=''.join(info).replace('\n','').replace(' ','')
address1=''.join(address).replace('\n','').replace(' ','').replace('\xa0','')
label1 = ''.join(label).replace('\n','').replace(' ','')
print(title1,info1,address1,label1,price)
#xpath可以写索引值 索引从1开始
#拿当前div在路径最前加.
解析
5038

被折叠的 条评论
为什么被折叠?



