# 操作数据库存储数据是 IO 操作,IO操作比较消耗时间,所以可以异步操作
from twisted.enterprise import adbapi
from pymysql.cursors import DictCursor
from pymysql
class JianshuTwistedSpiderPipeline(object):
def __init__(self):
dbdata = {
'host': 'localhost',
'user': 'root',
'password': 'root',
'database': 'jianshu',
'port': 3306,
'charset': 'utf8',
'cursorclass': DictCursor
}
self.dbpool = adbapi.ConnectionPool('pymysql', **dbdata)
self._sql = None
@property
def sql(self):
if not self._sql:
self._sql = """
insert into detail(id,title,conllection,pub_time,wordage,views_count,conmments_count,likes_count,detai_url) values(null,%s,%s,%s,%s,%s,%s,%s,%s)
"""
return self._sql
return self._sql
def process_item(self, item, spider):
defer = self.dbpool.runInteraction(self.insert_item, item)
defer.addErrback(self.handle_error, item, spider) # 处理错误
def handle_error(self, error, item, spider):
print('*'*10, 'error', '*'*10)
print(error)
print('*'*10, 'error', '*'*10)
def insert_item(self, cursor, item):
title, collection, pub_time, wordage, views_count, comments_count, likes_count, detail_url = item.values()
cursor.execute(self.sql, (title, collection, pub_time, wordage, views_count, comments_count, likes_count, detail_url))