采用scrapy 爬取数据,采用异步方式保存数据到数据库时候,运行 报错如下:
2020-06-03 17:28:37 [scrapy.core.engine] INFO: Spider closed (finished)
[Failure instance: Traceback: <class 'AttributeError'>: 'NoneType' object has no attribute 'encoding'
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/threading.py:916:_bootstrap_inner
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/threading.py:864:run
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/_threads/_threadworker.py:46:work
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/_threads/_team.py:190:doWork
--- <exception caught here> ---
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/python/threadpool.py:250:inContext
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/python/threadpool.py:266:<lambda>
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/python/context.py:122:callWithContext
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/python/context.py:85:callWithContext
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/enterprise/adbapi.py:460:_runInteraction
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/enterprise/adbapi.py:36:__init__
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/enterprise/adbapi.py:76:reconnect
/Users/gengli/code/venv/lib/python3.6/site-packages/twisted/enterprise/adbapi.py:427:connect
/Users/gengli/code/venv/lib/python3.6/site-packages/pymysql/__init__.py:94:Connect
/Users/gengli/code/venv/lib/python3.6/site-packages/pymysql/connections.py:283:__init__
相关代码如下:
class MysqlTwistedPipeline(object):
def __init__(self, dbpool):
self.dbpool = dbpool
@classmethod
def from_settings(cls,settings):
from pymysql.cursors import DictCursor
dbparms = dict(
host =settings['MYSQL_HOST'],
db =settings['MYSQL_DBNAME'],
user =settings['MYSQL_USER'],
passwd =settings['MYSQL_PASSWORD'],
charset ='utf-8',
cursorclass =DictCursor,
use_unicode = True
)
dbpool = adbapi.ConnectionPool('pymysql',**dbparms)
return cls(dbpool)
def process_item(self, item, spider):
query = self.dbpool.runInteraction(self.do_insert,item)
query.addErrback(self.hand_error,item,spider)
def hand_error(self, failure, item, spider):
'''
回调异常处理
:param failure:
:param item:
:param spider:
:return:
'''
print(failure)
def do_insert(self, cursor, item):
'''
数据插入
:param cursor:
:param item:
:return:
'''
print('@'*100)
insert_sql = '''insert into jobbole_article
(title,url,url_object_id,front_image_url,front_image_path,parise_nums,comment_nums,fav_nums,tags,content,create_date)
values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
'''
params=list()
params.append(item.get('title',''))
params.append(item.get('url', ''))
params.append(item.get('url_object_id', ''))
img_url = item.get('front_image_url', [])
if img_url:
img_url = ','.join(img_url)
params.append(img_url)
params.append(item.get('front_image_path', ''))
params.append(item.get('parise_nums', 0))
params.append(item.get('comment_nums', 0))
params.append(item.get('fav_nums', 0))
params.append(item.get('tags', ''))
params.append(item.get('content', ''))
params.append(item.get('create_date', '2020-06-03'))
cursor.execute(insert_sql,tuple(params))
解决办法是:
@classmethod
def from_settings(cls,settings):
from pymysql.cursors import DictCursor
dbparms = dict(
host =settings['MYSQL_HOST'],
db =settings['MYSQL_DBNAME'],
user =settings['MYSQL_USER'],
passwd =settings['MYSQL_PASSWORD'],
charset ='utf-8', #改为utf8
cursorclass =DictCursor,
use_unicode = True
)