在setting中设置MySQL连接信息
HOST='101.201.70.139' MYSQL_DBNAME='anttest' MYSQL_PASSWORD='Myjr678!@#' MYSQL_USER='root' 2、在pipeline中添加mysql异步连接类
from twisted.enterprise import adbapi
import MySQLdb
from MySQLdb import cursors
class mysqlTwistedpipline(object):
def __init__(self,dbpool): self.dbpool=dbpool @classmethod def from_settings(cls,settings): dbparms = dict( host = settings['HOST'], db = settings['MYSQL_DBNAME'], user = settings['MYSQL_USER'], passwd = settings['MYSQL_PASSWORD'], charset='utf8', cursorclass=MySQLdb.cursors.DictCursor, use_unicode=True ) dbpool = adbapi.ConnectionPool('MySQLdb',**dbparms) return cls(dbpool) 3、重写 process_item方法
def process_item(self, item, spider): query = self.dbpool.runInteraction(self.do_insert,item) query.addErrback(self.handle_error) 4.定义错误处理
def handle_error(self,failure): print failure 5、添加向数据库添加数据的方法
def do_insert(self,cursor,item): SQL插入语句 无需输入conn.commit
6、在setting.py中添加item——pipeline路径 #完整文件
# -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html from twisted.enterprise import adbapi import MySQLdb from MySQLdb import cursors class ZhongguosoudiPipeline(object): def process_item(self, item, spider): return item class mysqlTwistedpipline(object): def __init__(self,dbpool): self.dbpool=dbpool @classmethod def from_settings(cls,settings): dbparms = dict( host = settings['HOST'], db = settings['MYSQL_DBNAME'], user = settings['MYSQL_USER'], passwd = settings['MYSQL_PASSWORD'], charset='utf8', cursorclass=MySQLdb.cursors.DictCursor, use_unicode=True ) dbpool = adbapi.ConnectionPool('MySQLdb',**dbparms) return cls(dbpool) def process_item(self, item, spider): query = self.dbpool.runInteraction(self.do_insert,item) query.addErrback(self.handle_error) def handle_error(self,failure): print failure def do_insert(self,cursor,item): sql_in = 'insert into land_deals(title,publish_date,province_id,province_name,articles_link,content,create_time,transact_status)VALUES ("%s","%s","%d","%s","%s","%s",now(),"%d")'%(item['title'],item['publish_time'],0,"",item['url'],item['con'].replace('"','\''),1) cursor.execute(sql_in)