item转为字典存入mongodb
只需要在setting中给出MongoDB的url和库名就可以用
class WeibMongoPipline(object):
def __init__(self, mongo_uri, mongo_db):
self.mongo_uri = mongo_uri
self.mongo_db = mongo_db
@classmethod
def from_crawler(cls, crawler):
return cls(
mongo_uri=crawler.settings.get('MONGO_URI'),
mongo_db=crawler.settings.get('MONGO_DATABASE'),
)
def open_spider(self, spider):
self.client = pymongo.MongoClient(self.mongo_uri)
self.db = self.client[self.mongo_db]
def process_item(self, item, spider):
if isinstance(item, WeiboItem):
self._process_use_item(item)
elif isinstance(item, WeiboContentItem):
self._process_cont_item(item)
return item
def _process_use_item(self, item):
# 以id去重插入 db后面的是你自己的集合名
self.db.user.update({'id': item['id']}, {'$set': dict(item)}, True)
print(u'微博用户插入成功')
def _process_cont_item(self, item):
self.db.content.update({'cont_id': item['cont_id']}, {'$set': dict(item)}, True)
print(u'微博内容插入成功')