我正在使用scrapy,并试图将从spider抓取的数据保存到MySql数据库中。我正在使用管道来实现这一点,但没有运气。下面是我的管道代码:from scrapy import log
from scrapy.core.exceptions import DropItem
from twisted.enterprise import adbapi
import time
import MySQLdb.cursors
class FilterWordsPipeline(object):
"""A pipeline for filtering out items which contain certain words in their
description"""
# put all words in lowercase
words_to_filter = ['politics', 'religion']
def process_item(self, spider, item):
print spider
for word in self.words_to_filter:
if word in unicode(item['description']).lower():
raise DropItem("Contains forbidden word: %s" % word)
else:
return item
class MySQLStorePipeline(object):
def __init__(self):
# @@@ hardcoded db settings
# TODO: make settings configurable through settings
self.dbpool = adbapi.ConnectionPool('adress_to_db',
db='my_db',
user='my_user',
passwd='my_pw',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, spider, item):
# run db query in thread pool
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
# create record if doesn't exist.
# all this block run on it's own thread
tx.execute("select * from scrapytest where link = %s", (item['link'][0], ))
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
else:
tx.execute(\
"insert into scrapytest (title, link, desc) "
"values (%s, %s, %s)",
(item['title'][0],
item['link'][0],
item['desc'][0]
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
我得到的错误信息是:
^{pr2}$
我不知道从哪里开始,所以非常感谢你的帮助!在