本次主要用xpath方法确定所需标签的准确位置
上源代码
from lxml import etree
import time
import requests
import random
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'
}
url='https://bj.58.com/ershoufang/'
page_text=requests.get(url=url,headers=headers).text
tree=etree.HTML(page_text)
section_list=tree.xpath('//section[@class="list-main"]/section[@class="list-left"]/section[@class="list"][1]/div')
fp=open('58.txt','w',encoding='utf-8')
for section in section_list:
title=section.xpath('.//div[@class="property-content-title"]/h3/text()')[0]
print(title)
fp.write(title+'\n')
time.sleep(float(format(random.uniform(0,2), '.2f')))