house_贝壳

from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By

# 实现反监测(三不)
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitches',['enable-automation'])

# 实例化一个浏览器对象
bro = webdriver.Chrome(chrome_options=option)
bro.maximize_window()
sleep(3)

for page in range(1,6):
    #爬虫的目标对象网址
    url = f'https://sz.fang.ke.com/loupan/pg{page}/'
    bro.get(url)
    sleep(3)

    li_list = bro.find_elements(By.XPATH,'/html/body/div[6]/ul[2]/li')
    sleep(3)
    # 打印一下获取的内容
    # print(li_list)
    for li in li_list:
        info = li.text.split('\n')[1:]
        # print(info)
        # print(len(info))

        '''
        ['鹏瑞尚府 在售 住宅', '盐田区/沙头角/广东省深圳市盐田区田心东路35', '户型: 3室 / 4室 建面 98-138㎡', '新房顾问:张学君', '大型社区', '57500  元/㎡(均价)', '总价502-759(万/套)']
        '''

        # 房屋标题
        house_title0 = info[0]
        # print(house_title)
        house_title = house_title0.split(' ')[0]
        # print(house_title)

        # 地区、街道、具体位置
        house_ad_info0 = info[1]
        house_ad_info = house_ad_info0.split('/')
        #地区
        house_region = house_ad_info[0]
        #街道
        house_street = house_ad_info[1]
        #具体位置
        house_address = house_ad_info[-1]

        #户型和建筑面积
        house_square_info0 = info[2]
        house_square_info = house_square_info0.split(' ')
        # print(house_square_info)
        '''
        ['户型:', '3室', '/', '4室', '建面', '98-138㎡']
        '''
        # 房屋户型
        if '室' in house_square_info0:
            if len(house_square_info)>5:
                house_type = house_square_info[1] + '/'+ house_square_info[3] + '/' + house_square_info[5]
            else:
                house_type = house_square_info[1] + '/' + house_square_info[3]
        else:
            house_type = '暂无数据'
        # print(house_type)

        #建筑面积
        if '建面' in house_square_info0:
            house_area = house_square_info[-1]
        else:
            house_area = '暂无数据'
        # print(house_area)


        # 价格
        for i in range(0,len(info)-1):
            if "元/" in info[i]:
                house_sal = info[i]
                house_sal = house_sal.replace('元/㎡(均价)','')
            else:
                house_sal = '价格待定'
        # print(house_sal)

        fp = open('贝壳找房.csv','a' ,encoding='gb18030')
        fp.write(house_title+','+house_region+ ','+house_street+ ','+house_address+ ','+house_type+ ',' +house_area+ ',' +house_sal)
        fp.write('\n')
        fp.close()

bro.quit()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值