网上查找很多在scrapy中向mongodb存储数据,但是都没说明如何输入密码,那就自己总结下,根据公司项目优化了代码,从settings导入host,port,password,user,db等信息
把mongodb配置相关信息放在scrapy项目中的settings.py里,如下图
# mongoDB配置
# MONGO_HOST = "120.0.0.1" # 主机IP
# MONGO_PORT = 27017 # 端口号
# MONGO_DB = "spider" # 库名
# MONGO_USER = "spider" #用户
# MONGO_PSW = "spider" #密码
pipelines.py文件存储管道如下所示
import pymongo
from pymongo import MongoClient
class mongodbPipeline(object):
def __init__(self,MONGO_HOST,MONGO_PORT,MONGO_PSW,MONGO_USER,MONGO_DB):
# 链接数据库
# # 数据库登录需要帐号密码的话
mongo_url = 'mongodb://{0}:{1}@{2}:{3}/?authSource={4}&authMechanism=SCRAM-SHA-1'.format(MONGO_USER, MONGO_PSW,
MONGO_HOST,MONGO_PORT, MONGO_DB)
print('mongo_url',mongo_url)
self.client = MongoClient(mongo_url)
self.db = self.client[MONGO_DB] # 获得数据库的句柄
# self.coll = self.db[MONGO_COLL] # 获得collection的句柄
@classmethod
def from_crawler(cls, crawler):
return cls(MONGO_HOST=crawler.settings.get('MONGO_HOST'),
MONGO_PORT=crawler.settings.get('MONGO_PORT'),
MONGO_PSW=crawler.settings.get('MONGO_PSW'),
MONGO_USER=crawler.settings.get('MONGO_USER'),
MONGO_DB=crawler.settings.get('MONGO_DB'),
# MONGO_COLL=crawler.settings.get('MONGO_COLL'),
)
def process_item(self, item, spider):
postItem = dict(item) # 把item转化成字典形式
coll = self.db[item.table]
coll.insert(postItem) # 向数据库插入一条记录
return item
def close_spider(self):
self.client.close()
最后记得在settings中打开mongodb的 ITEM_PIPELINES 就可以愉快的保存数据到mongodb了