python爬虫入门学习5-xpath-52ershoufang

最新推荐文章于 2024-05-06 12:53:06 发布

Evol_ve

最新推荐文章于 2024-05-06 12:53:06 发布

阅读量130

点赞数

分类专栏： python

本文链接：https://blog.csdn.net/qq_39223083/article/details/118251965

版权

python 专栏收录该内容

14 篇文章 0 订阅

订阅专栏

from lxml import html
import requests

if __name__ == '__main__':
	headers = {
	        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36',
	    }
    # 获取网页
    url = 'https://sh.58.com/ershoufang/'
    resp = requests.get(url=url, headers=headers)
    resp.encoding = 'utf8'
    # 解析网页
    etree = html.etree
    tree = etree.HTML(resp.text)
    # r = tree.xpath('/html/body/div[1]/div/div/section/section[3]/section[1]//h3/text()')
    list = tree.xpath('//div[@class="property"]')
    action = []  # es批量保存对象
    for i in list:
        href = i.xpath('./a/@href')[0]
        detail = i.xpath('.//div[@class="property-content-detail"]')[0]
        title = detail.xpath('.//div[@class="property-content-title"]/h3/@title')[0]
        src = detail.xpath('.//div[@class="property-content-title"]/img/@src')[0]
        action.append({
                'title': title,
                'src': src,
                'href': href
        })
    print(action)