需求
爬取58二手房中的房源信息
网站:https://bj.58.com/ershoufang/
代码
import requests
from lxml import etree
headers = {
'User-Agent' :'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.56'
}
#爬取到页面源码数据
url='https://bj.58.com/ershoufang/'
page_text = requests.get(url=url,headers=headers).text
#print(page_text)
#实例化etree对象
html = etree.HTML(page_text)
#选取所有div元素,且这些元素拥有值为property的class属性。
div_list = html.xpath('//div[@class="property"]')
for div in div_list:
#局部解析
title = div.xpath('./a/div[@class="property-content"]/div/div[@class="property-content-title"]/h3/text()')[0]
print(title)