房多多scrapy爬虫实例

# -*- coding: utf-8 -*-
import scrapy
import os
import sys
sys.path.append("C:/Users/***/scrapy/fhdodo")
from  fhdodo.items import FhdodoItem


class FhdoSpider(scrapy.Spider):
    name = 'fhdo'
    allowed_domains = ['https://suzhou.fangdd.com/']
    start_urls = []
    host = 'https://suzhou.fangdd.com/esf-a0-a150_s1-s2_l70_x0/?pageNo={}'
    count = 1
    while count <31:
        url = host.format(str(count))
        start_urls.append(url)
        count = count+1
    def parse(self, response):
        teacher_list = response.xpath("//li[@class='LpList-item']")
        for each in teacher_list:
            item = FhdodoItem()
            whvi = each.xpath("./a/@href").extract()
            title = each.xpath("./div[@class='LpList-cont']/h4/a/span/text()").extract()
            info = each.xpath("./div[@class='LpList-cont']/p[@class='LpList-type']/text()").extract()
            addr = each.xpath("./div[@class='LpList-cont']/p[@class='LpList-address ellipsis']/a/text()").extract()
            price = each.xpath("./div[@class='LpList-cont']/div[@class='LpList-pricebox']/p/strong/text()").extract()
            up = each.xpath("./div[@class='LpList-cont']/div[@class='LpList-pricebox']/p/text()").extract()
            
            item['whvi'] = whvi[0].split()
            item['title'] = title[0].split()
            item['info0'] = info[0].split()
            item['info1'] = info[1].split()
            item['addr0'] = addr[0].split()
            item['addr1'] = addr[1].split()
            item['addr2'] = addr[2].split()
            item['price'] = price[0].split()
            item['up'] = up[1].split()
            yield item

主代码,别的都参考前一个实例

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值