Python重复代码自动封装的方法
Refactor>Extract>Method
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
import pymysql
from scrapy.crawler import Crawler
from demo.items import MovieItem
class MovieItemPipeline:
def __init__(self, host, port, username, password, database, charset):
self.params = []
self.conn = pymysql.connect(host=host,
port=port,
user=username,
password=password,
database=database,
charset=charset,
autocommit=True)
@classmethod
def from_crawler(cls, crawler: Crawler):
return cls(
crawler.settings.get('DB_HOST', 'localhost'),
crawler.settings.get('DB_PORT', 3306),
crawler.settings.get('DB_USER', 'root'),
crawler.settings.get('DB_PASS', ''),
crawler.settings.get('DB_NAME', 'demo'),
crawler.settings.get('CHARSET', 'utf8mb4')
)
def close_spider(self, spider):
if len(self.params) > 0:
self.write_to_db()
self.conn.close()
def process_item(self, item: MovieItem, spider):
self.params.append((item['title'], item['score'], item['motto']))
if len(self.params) == 100:
self.write_to_db()
return item
def write_to_db(self):
try:
with self.conn.cursor() as cursor:
cursor.executemany(
'insert into tb_movie (movie_title, movie_score, movie_motto) '
'values (%s, %s, %s)',
self.params
)
self.params.clear()
except pymysql.MySQLError as err:
print(err)