为什么要用异步存储?
因为scrapy下载速度会高于存储速度(个人理解,还在改善中。)
from twisted.enterprise import adbapi
import pymysql
class TwistedMysqlPipeline
def __init__(self, pool):
self.pool = pool
#@classmethod是声明一个类方法,而平常见到的声明一个实例方法
# cls代表这个类本身,self是表示该类的一个实例
@classmethod
def from_settings(cls, settings):
parm = dict(
host=settings['HOST'],
user=settings['USER'],
password=settings['PASSWORD'],
db=settings['DB'],
charset='utf8',
cursorclass=pymysql.cursors.DictCursor,
)
pool = adbapi.ConnectionPool('pymysql', **parm)
return cls(pool)
def process_item(self, item, spider):
#这一步是执行异步存储
query = self.pool.runInteraction(self.do_insect, item)
query.addErrback(self.handle_error, item, spider)
def do_insect(self, cursors, item):
sql = """
insert into tb_info (id, title, genre)
values (%s, %s, %s)
"""
cursors.execute(sql, (item['id'], item['title'], item['genre']))
def handle_error(self, failure, item, spider):
print(failure)