爬取链家租房频道的房源信息,含翻页,含房间详情页的内容爬取。
items.py
import scrapy
class ScrapytestItem(scrapy.Item):
# define the fields for your item here like:
title = scrapy.Field()#房源名称
price = scrapy.Field()#价格
url = scrapy.Field()#详情页地址
introduce_item = scrapy.Field()#房源描述
pipelines.py
import json
class ScrapytestPipeline(object):
#打开文件
def open_spider(self,spider):
self.file = open('58_chuzu.txt','w',encoding='utf-8')
print('文件被打开了')
#写入文件
def process_item(self, item, spider):
line = '{}\n'.format(json.dumps(dict(item),ensure_ascii=False))
self.file.write(line)
return item
#关闭文件
def close_spider(self,spider):
self.file.close()
print('文件被关闭了')
spider