from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
# 实现反监测(三不)
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitches',['enable-automation'])
# 实例化一个浏览器对象
bro = webdriver.Chrome(chrome_options=option)
bro.maximize_window()
sleep(3)
for page in range(1,6):
#爬虫的目标对象网址
url = f'https://sz.fang.ke.com/loupan/pg{page}/'
bro.get(url)
sleep(3)
li_list = bro.find_elements(By.XPATH,'/html/body/div[6]/ul[2]/li')
sleep(3)
# 打印一下获取的内容
# print(li_list)
for li in li_list:
info = li.text.split('\n')[1:]
# print(info)
# print(len(info))
'''
['鹏瑞尚府 在售 住宅', '盐田区/沙头角/广东省深圳市盐田区田心东路35', '户型: 3室 / 4室 建面 98-138㎡', '新房顾问:张学君', '大型社区', '57500 元/㎡(均价)', '总价502-759(万/套)']
'''
# 房屋标题
house_title0 = info[0]
# print(house_title)
house_title = house_title0.split(' ')[0]
# print(house_title)
# 地区、街道、具体位置
house_ad_info0 = info[1]
house_ad_info = house_ad_info0.split('/')
#地区
house_region = house_ad_info[0]
#街道
house_street = house_ad_info[1]
#具体位置
house_address = house_ad_info[-1]
#户型和建筑面积
house_square_info0 = info[2]
house_square_info = house_square_info0.split(' ')
# print(house_square_info)
'''
['户型:', '3室', '/', '4室', '建面', '98-138㎡']
'''
# 房屋户型
if '室' in house_square_info0:
if len(house_square_info)>5:
house_type = house_square_info[1] + '/'+ house_square_info[3] + '/' + house_square_info[5]
else:
house_type = house_square_info[1] + '/' + house_square_info[3]
else:
house_type = '暂无数据'
# print(house_type)
#建筑面积
if '建面' in house_square_info0:
house_area = house_square_info[-1]
else:
house_area = '暂无数据'
# print(house_area)
# 价格
for i in range(0,len(info)-1):
if "元/" in info[i]:
house_sal = info[i]
house_sal = house_sal.replace('元/㎡(均价)','')
else:
house_sal = '价格待定'
# print(house_sal)
fp = open('贝壳找房.csv','a' ,encoding='gb18030')
fp.write(house_title+','+house_region+ ','+house_street+ ','+house_address+ ','+house_type+ ',' +house_area+ ',' +house_sal)
fp.write('\n')
fp.close()
bro.quit()
house_贝壳
最新推荐文章于 2024-07-29 14:37:54 发布