from lxml import html
import requests
if __name__ == '__main__':
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36',
}
# 获取网页
url = 'https://sh.58.com/ershoufang/'
resp = requests.get(url=url, headers=headers)
resp.encoding = 'utf8'
# 解析网页
etree = html.etree
tree = etree.HTML(resp.text)
# r = tree.xpath('/html/body/div[1]/div/div/section/section[3]/section[1]//h3/text()')
list = tree.xpath('//div[@class="property"]')
action = [] # es批量保存对象
for i in list:
href = i.xpath('./a/@href')[0]
detail = i.xpath('.//div[@class="property-content-detail"]')[0]
title = detail.xpath('.//div[@class="property-content-title"]/h3/@title')[0]
src = detail.xpath('.//div[@class="property-content-title"]/img/@src')[0]
action.append({
'title': title,
'src': src,
'href': href
})
print(action)
python爬虫入门学习5-xpath-52ershoufang
最新推荐文章于 2024-05-06 12:53:06 发布