from twisted.enterprise import adbapi
from MySQLdb.cursors import DictCursor
class MysqlTwistedPipeline:
def __init__(self, adbpool):
self.adbpool = adbpool
@classmethod
def from_settings(cls, settings):
from MySQLdb.cursors import DictCursor
dbparams = dict(
host=settings["MYSQL_HOST"],
db=settings["MYSQL_DB"],
user=settings["MYSQL_USER"],
password=settings["MSQL_PASSWORD"],
charset="utf8",
cursorclass=DictCursor,
use_unicode=True
)
adbpool = adbapi.ConnectionPool("MySQLdb", **dbparams)
return cls(adbpool)
def process_item(self, item, spider):
query = self.adbpool.runInteraction(self.do_insert, item)
query.addErrback(self.handle_error, item, spider)
return item
def handle_error(self, failure, item, spider):
print(failure)
def do_insert(self, cursor, item):
insert_sql = """
insert into jobbole_article(title, url) values(%s, %s) ON DUPLICATE KEY UPDATE fav_nums=VALUES(fav_nums)
"""
parmas = []
parmas.append(item.get("title", ""))
parmas.append(item.get("url", ""))
cursor.execute(insert_sql, tuple(parmas))
注意:"ON DUPLICATE KEY UPDATE url=VALUES(url)"表示当主键发生冲突时更新该条记录
Python爬虫框架scrapy学习(mysql存储异步)
最新推荐文章于 2021-03-27 17:21:00 发布