a65420321a
203 天前
@zdnyp
from .settings import ITEM_KEY
import json, redis
class RedisPipeline:
def __init__(self, redis_host, redis_port, redis_db):
self.redis_host = redis_host
self.redis_port = redis_port
self.redis_db = redis_db
@classmethod
def from_crawler(cls, crawler):
return cls(
redis_host=crawler.settings.get('REDIS_HOST'),
redis_port=crawler.settings.get('REDIS_PORT'),
redis_db=crawler.settings.get('REDIS_DB')
)
def open_spider(self, spider):
self.pool = redis.ConnectionPool(host=self.redis_host,
port=self.redis_port,
db=self.redis_db)
self.conn = redis.StrictRedis(connection_pool=self.pool)
print('#### pipelines.open_spider')
def close_spider(self, spider):
pass
def process_item(self, item, spider):
self.conn.rpush(ITEM_KEY, json.dumps(item))
return item
这样没错吧?
我没搞懂的是,在 middleware 和 spider 里面要怎么调用这个 self.conn 呢?