代码成功运行但结果为空

本人编程新手,由于需要爬取租房数据,就照着一些文章写了以下代码,但是代码能成功运行最后结果为空,此外,房天下里边房屋的经纬度我不知道怎么爬取,求各位大佬救我狗命!
需要爬取codex和codey

import requests
import io
import sys
from lxml import etree

sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030')

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
    'cookie': 'rp1sgpymjjg22qyksi4w2ndn51yk0qobbdl; integratecover=1; city=wuhan; keyWord_recenthousewuhan=%5b%7b%22name%22%3a%22%e6%b4%aa%e5%b1%b1%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a016756%2f%22%2c%22sort%22%3a1%7d%2c%7b%22name%22%3a%22%e6%b1%9f%e5%b2%b8%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a016752%2f%22%2c%22sort%22%3a1%7d%2c%7b%22name%22%3a%22%e4%b8%9c%e8%a5%bf%e6%b9%96%22%2c%22detailName%22%3a%22%22%2c%22url%22%3a%22%2fhouse-a016758%2f%22%2c%22sort%22%3a1%7d%5d; ASP.NET_SessionId=3qykqj1ahsh2giuf4jcxxycd; unique_cookie=U_yiiict5fikmpste4n1czmn0pn1vk67x7ckr*5; g_sourcepage=zf_fy%5Elb_pc; Captcha=496A6D443072576C2F622B54584A38656A754D4F4F7A4C476F7468396971773478453644526B6E4B364B537037654F624E577474567958486E6D704B6646594F3379354769734A71576E413D; __utma=147393320.1458657227.1568896244.1580817609.1580823059.14; __utmc=147393320; __utmz=147393320.1580823059.14.13.utmcsr=wuhan.fang.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmt_t0=1; __utmt_t1=1; __utmt_t2=1; __utmb=147393320.3.10.1580823059'
}
#获取详情页面url

def get_detail_urls(url):

    #https://wuhan.zu.fang.com

    resp = requests.get(url,headers=headers)
    text = resp.text


    #提取数据
    html = etree.HTML(text)
    lis = html.xpath('//div[@class="houseList"]/dl/dt')
    detail_urls = []

    for dt in lis:
        detail_url = dt.xpath('./a/@href')
        detail_url = 'https://wuhan.zu.fang.com' + detail_url[0]
        #print(detail_url)
        detail_urls.append(url)

    return detail_urls



#提取详情数据
def parse_detail_pages(detail_url):
    resp = requests.get(detail_url)
    text = resp.text
    html = etree.HTML(text)

    #info
    title = html.xpath('//div[@class="title"]/text()')

    price = html.xpath('//div[@class="trl-item sty1"]/i/text()')

    info1 = html.xpath('//div[@class="trl-item1 w130"]/div[@class="tt"]/text()')

    info2 = html.xpath('//div[@class="trl-item1 w162"]/div[@class="tt"]/text()')

    area = html.xpath('//div[@class="trl-item1 w100"]/div[@class="tt"]/text()')

    fix = html.xpath('//div[@class="trl-item1 w00"]/div[@class="tt"]/text()')

    address = html.xpath('//div[@class="trl-item2 clearfix"]/div[@class="rcont"]/a/text()')


    infos = {}
    infos['标题'] = title
    infos['价格'] = price
    infos['出租方式和朝向'] = info1
    infos['户型和高度'] = info2
    infos['面积'] = area
    infos['装修情况'] = fix
    infos['地址'] = address

    return infos


def save_data(infos,f):

    f.write('{},{},{},{},{},{},{}\n'.format(infos['标题'],infos['价格'],infos['出租方式和朝向'],infos['户型和高度'],infos['面积'],infos['装修情况'],infos['地址']))

def main():
    url = 'https://wuhan.zu.fang.com/house/i3{}/'
    with open('fangtianxia.csv', 'a', encoding='utf-8') as f:
        for x in range(1, 100):
            url = url.format(x)
            detail_urls = get_detail_urls(url)

            for detail_url in detail_urls:
                infos = parse_detail_pages(detail_url)
                print(infos)
                save_data(infos, f)

main()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值