python爬虫入门学习5-xpath-52ershoufang

from lxml import html
import requests

if __name__ == '__main__':
	headers = {
	        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36',
	    }
    # 获取网页
    url = 'https://sh.58.com/ershoufang/'
    resp = requests.get(url=url, headers=headers)
    resp.encoding = 'utf8'
    # 解析网页
    etree = html.etree
    tree = etree.HTML(resp.text)
    # r = tree.xpath('/html/body/div[1]/div/div/section/section[3]/section[1]//h3/text()')
    list = tree.xpath('//div[@class="property"]')
    action = []  # es批量保存对象
    for i in list:
        href = i.xpath('./a/@href')[0]
        detail = i.xpath('.//div[@class="property-content-detail"]')[0]
        title = detail.xpath('.//div[@class="property-content-title"]/h3/@title')[0]
        src = detail.xpath('.//div[@class="property-content-title"]/img/@src')[0]
        action.append({
                'title': title,
                'src': src,
                'href': href
        })
    print(action)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值