python3 scrapy + adbapi 修改和添加数据到MySQL

 说明:主要使用 twisted.enterprise.adbapi 与 pymysql 在 scrapy pipelines 中的应用。包含查询,修改以及新增。

1、pipelines.py


import pymysql
from twisted.enterprise import adbapi
from NCSpider.items import NCItem


class MysqlPipeline(object):

    def __init__(self, dbpool):
        self.dbpool = dbpool

    @classmethod
    def from_settings(cls, settings):
        """
        连接MySQL数据库
        :param settings: 
        :return: 
        """
        params = dict(
            host=settings["MYSQL_HOST"],
            port=settings['MYSQL_PORT'],
            db=settings["MYSQL_DB"],
            user=settings["MYSQL_USER"],
            password=settings["MYSQL_PASSWORD"],
            charset='utf8mb4',
            cursorclass=pymysql.cursors.DictCursor,
            use_unicode=True,
        )
        dbpool = adbapi.ConnectionPool("pymysql", **params)
        return cls(dbpool=dbpool)

    @staticmethod
    def get_update_sql(item, table_name, _id):
        """
        获取修改数据库sql
        :param item: 
        :param table_name: 
        :param _id: 数据id
        :return: 
        """
        v = ','.join(["{}=%s".format(k) for k in item.keys()])
        sql = """update `{0}` set {1} where id={2}""".format(table_name, v, _id)
        return sql

    def _do_novel_update(self, cursor, table_name, item, spider):
        """
        修改novel表
        :param cursor: 
        :param table_name: 
        :param item: 
        :param spider: 
        :return: 
        """
        sql = """select id from `{0}` where novel_id='{1}' limit 1;""".format(table_name, item['novel_id'])
        cursor.execute(sql)
        # result 是 字典类型
        result = cursor.fetchone()
        if result:
            update_sql = self.get_update_sql(item, table_name, result['id'])
            cursor.execute(update_sql, tuple(item.values()))
            spider.logger.info('update novel Item2Sql over -- ' + item['novel_en_name'])

    def _do_comment_update_or_insert(self, cursor, table_name, item, spider):
        """
        修改comments 表
        :param cursor: 
        :param table_name: 
        :param item: 
        :param spider: 
        :return: 
        """
        sql = """select id, vote_num from `{0}` where comment_id='{1}' limit 1;""".format(table_name, item['comment_id'])
        cursor.execute(sql)
        # result 是 字典类型
        result = cursor.fetchone()
        if result:
            if str(result['vote_num']) != str(item['vote_num']):
                update_sql = self.get_update_sql(item, table_name, result['id'])
                cursor.execute(update_sql, tuple(item.values()))
                spider.logger.info('update comments Item2Sql over -- ' + item['comment_id'])
        else:

            insert_sql = self.get_insert_sql(table_name, item)
            cursor.execute(insert_sql)
            spider.logger.info('save comments Item2Sql over -- ' + item['comment_id'])

    @staticmethod
    def get_insert_sql(table_name, item):
        """
        获取插入数据库sql
        :param table_name: 
        :param item: 
        :return: 
        """
        keys = ""
        values = ""
        for key, value in item.items():
            keys += "`" + key + "`,"
            if type(value) in [int, float]:
                values += str(value) + ","
            else:
                if value is None:
                    values += " null,"
                else:
                    values += "'" + pymysql.escape_string(value) + "',"
        sql = "INSERT INTO `" + table_name + "` (" + keys[0:-1] + ") VALUES (" + values[0:-1] + ");"
        return sql

    def process_item(self, item, spider):
        try:
            if isinstance(item, NCItem):
                comment_table = 'comments'
                d = self.dbpool.runInteraction(self._do_comment_update_or_insert, comment_table, item, spider)
                d.addErrback(self._handle_error, item, spider)
            else:
                table = 'novel'
                d = self.dbpool.runInteraction(self._do_novel_update, table, item, spider)
                d.addErrback(self._handle_error, item, spider)

        except Exception as e:
            spider.logger.error('save Item2Sql Error -- ' + str(e))
        return item

    @staticmethod
    def _handle_error(failure, item, spider):
        """Handle occurred on db interaction."""
        spider.logger.error(failure)

注意事项:在使用查询语句时,fetchone() 返回的是字典类型,查询字段作为key。

2、 settings.py 主要配置

# mysql 配置
MYSQL_HOST = 'localhost'
MYSQL_PORT = 3306
MYSQL_USER = 'root'
MYSQL_PASSWORD = ''
MYSQL_DB = ''

ITEM_PIPELINES = {
    'NCSpider.pipelines.MysqlPipeline': 300,    
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值