from twisted.enterprise import adbapi
class MySQLAsyncPipeline:
def open_spider(self, spider):
db = spider.settings.get('MYSQL_DB_NAME', 'scrapy_default')
host = spider.settings.get('MYSQL_HOST', 'localhost')
port = spider.settings.get('MYSQL_PORT', 3306)
user = spider.settings.get('MYSQL_USER', 'root')
passwd = spider.settings.get('MYSQL_PASSWORD', 'root')
self.dbpool = adbapi.ConnectionPool('MySQLdb', host=host, db=db,
user=user, passwd=passwd, port=port, charset='utf8')
def close_spider(self, spider):
self.dbpool.close()
def process_item(self, item, spider):
self.dbpool.runInteraction(self.insert_db, item)
return item
def insert_db(self, tx, item):
values = (
item['f1'],
item['f2'],
)
sql = 'INSERT INTO books VALUES (%s,%s)'
tx.execute(sql, values)
#redis入库
import redis
from scrapy import Item
class RedisPipeline:
def open_spider(self, spider):
db_host = spider.settings.get('REDIS_HOST', 'localhost')
db_port = spider.settings.get('REDIS_PORT', 6379)
db_index = spider.settings.get('REDIS_DB_INDEX', 0)
self.db_conn = redis.StrictRedis(host=db_host, port=db_port, db=db_index)
self.item_i = 0
def close_spider(self, spider):
self.db_conn.connection_pool.disconnect()
def process_item(self, item, spider):
self.insert_db(item)
return item
def insert_db(self, item):
if isinstance(item, Item):
item = dict(item)
self.item_i += 1
self.db_conn.hmset('book:%s' % self.item_i, item)