房天下租房

import requests
from selenium import webdriver
from lxml import etree

class Fantaixia(object):
    def __init__(self,url):
        self.url = url
        self.parse()
    def get_xpath_by_request(self,url):
        headers=  {
            'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
            'referer': 'https://zu.fang.com/house/c20-d21000/',
             'upgrade-insecure-requests': '1',
            'cookie': 'city=www; global_cookie=t9mmpqtin4j8nf5tnbx87fwnw18k4i834na; Integrateactivity=notincludemc; integratecover=1; g_sourcepage=zf_fy%5Elb_pc; __utma=147393320.1830412335.1577092390.1577092390.1577953883.2; __utmc=147393320; __utmz=147393320.1577953883.2.2.utmcsr=fang.com|utmccn=(referral)|utmcmd=referral|utmcct=/; ASP.NET_SessionId=fzfvqfwnuyrsw5fjsjgubbdx; keyWord_recenthousebj=%5b%7b%22name%22%3a%22%e6%b5%b7%e6%b7%80%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a00%2f%22%2c%22sort%22%3a1%7d%2c%7b%22name%22%3a%22%e4%b8%9c%e5%9f%8e%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a02%2f%22%2c%22sort%22%3a1%7d%2c%7b%22name%22%3a%22%e4%b8%b0%e5%8f%b0%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a06%2f%22%2c%22sort%22%3a1%7d%5d; __utmt_t0=1; __utmt_t1=1; __utmt_t2=1; unique_cookie=U_bsgj421nek8sgaaqr8a9x0yov1ok4wgzv2m*16; Captcha=2B696F72454245454F626D6D4539526B55736254666E4F4B756A3379594F2B2B7868346A3564557866772B797376306D346D4C45677A6142347A38304C506C7A61354530794E6F336939383D; __utmb=147393320.39.10.1577953883',

        }
        response = requests.get(url,headers=headers)
        if response.status_code==200:
            return etree.HTML(response.text)
        else:
            print(response.status_code)
            return ''
    def get_text(self,text):
        if text:
            return text[0]
        return ''
    def parse_page(self,url):
        html = self.get_xpath_by_request(url)
        dl_list = html.xpath('//div[@class="houseList"]/dl')
        for dl in dl_list:
            title = self.get_text(dl.xpath('.//p[@class="title"]/a/@title'))
            price = self.get_text(dl.xpath('.//span[@class="price"]/text()'))
            infos = dl.xpath('.//p[@class="font15 mt12 bold"]/text()')
            if infos:
                area_size = infos[2]
                house_scal = infos[1]
                location = infos[3]
                item = {}
                item['title'] = title
                item['price'] = price

                item['area_size'] = area_size
                item['house_scal'] = house_scal
                item['location'] = location
                print(item)

    def parse_area(self,url):
        html = self.get_xpath_by_request(url)
        max_page = self.get_text(html.xpath('//div[@id="rentid_D10_01"]/span/text()'))
        print(max_page)
        if max_page:
            max_page_num = max_page[1:-1]
            print(max_page_num)
            for i in range(1,int(max_page_num)+1):
                page_url = url+'i31/'
                print(page_url)
                self.parse_page(page_url)
    def parse(self):
        html = self.get_xpath_by_request(self.url)
        area_list = html.xpath('//dl[@id="rentid_D04_01"]/dd/a[position()>1]/@href')
        print(area_list)
        for area in area_list:
            area_url = 'https://zu.fang.com'+area
            self.parse_area(area_url)

if __name__ == '__main__':
    base_url= 'https://zu.fang.com/'
    Fantaixia(base_url)

运行结果的一部分
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值