Scrapy实例__链家租房爬虫
创建爬虫项目
scrapy startproject lianjia_zf
定义爬取的数据项
import scrapy
class LianjiaZfItem(scrapy.Item):
title = scrapy.Field() # 标题
update_time = scrapy.Field() # 房源更新时间
price = scrapy.Field() # 月租
tags = scrapy.Field() # 标签
rent_method = scrapy.Field() # 出租方式
house_type = scrapy.Field() # 房屋类型
towards_and_floor = scrapy.Field() # 朝向楼层
basic_info = scrapy.Field() # 房屋信息
supporting_facilities = scrapy.Field() # 配套设施
description = scrapy.Field() # 房源描述
url = scrapy.Field() # 详情页链接
编码spider爬取数据
# -*- coding: utf-8 -*-
import scrapy
from items import LianjiaZfItem
class LianjiaSpider(scrapy.Spider):
name = 'lianjia'
allowed_domains = ['lianjia.com']
start_urls = ['https://sz.lianjia.com/zufang/']
def parse(self, response):
house_items = response.xpath('//*[@id="content"]/div[1]/div[1]/child::div')
for house_item in house_items:
href_xpath = './a/@href'
house_url = house_item.xpath(href_xpath