# -*- coding: utf-8 -*-
import json
import pymongo
from scrapy.exporters import JsonItemExporter,JsonLinesItemExporter
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
class GxrcspiderPipeline(object):
def open_spider(self, spider):
# self.f = open('gxrc.json', 'wb+')
# self.exporter = JsonLinesItemExporter(self.f, ensure_ascii=False, encoding='utf-8',indent=4)
# self.exporter.start_exporting()
self.client = pymongo.MongoClient('mongodb://192.168.100.201:27017/')
self.db = self.client['spider']
def process_item(self, item, spider):
# self.exporter.export_item(item)
self.db.gxrc.insert_one({
'position': item['position'],
'company': item['company'],
'salary': item['salary'],
'address': item['address'],
'publish_time': item['publish_time'],
'company_type': item['company_type'],
'position_detail': item['position_detail'],
})
return item
def close_spider(self, spider):
# self.f.close()
self.client.close()
scrapy导出数据到mongodb
最新推荐文章于 2024-06-29 14:31:29 发布