Python重复代码自动封装的方法

最新推荐文章于 2023-09-13 20:58:18 发布

strive try hard

最新推荐文章于 2023-09-13 20:58:18 发布

阅读量278

点赞数

分类专栏： mysql 文章标签： python

本文链接：https://blog.csdn.net/weixin_48423550/article/details/120082715

版权

mysql 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

Python重复代码自动封装的方法

Refactor>Extract>Method

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
import pymysql
from scrapy.crawler import Crawler

from demo.items import MovieItem


class MovieItemPipeline:

    def __init__(self, host, port, username, password, database, charset):
        self.params = []
        self.conn = pymysql.connect(host=host,
                                    port=port,
                                    user=username,
                                    password=password,
                                    database=database,
                                    charset=charset,
                                    autocommit=True)

    @classmethod
    def from_crawler(cls, crawler: Crawler):
        return cls(
            crawler.settings.get('DB_HOST', 'localhost'),
            crawler.settings.get('DB_PORT', 3306),
            crawler.settings.get('DB_USER', 'root'),
            crawler.settings.get('DB_PASS', ''),
            crawler.settings.get('DB_NAME', 'demo'),
            crawler.settings.get('CHARSET', 'utf8mb4')
        )

    def close_spider(self, spider):
        if len(self.params) > 0:
            self.write_to_db()
        self.conn.close()

    def process_item(self, item: MovieItem, spider):
        self.params.append((item['title'], item['score'], item['motto']))
        if len(self.params) == 100:
            self.write_to_db()
        return item

    def write_to_db(self):
        try:
            with self.conn.cursor() as cursor:
                cursor.executemany(
                    'insert into tb_movie (movie_title, movie_score, movie_motto) '
                    'values (%s, %s, %s)',
                    self.params
                )
                self.params.clear()
        except pymysql.MySQLError as err:
            print(err)