# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import MySQLdb
DBKWARGS={'db':'test','user':'root', 'passwd':'',
'host':'localhost','use_unicode':True, 'charset':'utf8'}
class TutorialPipeline(object):
def __init__(self):
try:
self.con = MySQLdb.connect(**DBKWARGS)
except Exception,e:
print "Connect db error:",e
def process_item(self, item, spider):
cur = self.con.cursor()
sql = "insert into dmoz_book values(%s,%s,%s)" ###数据库名
lis = (''.join(item["title"]),''.join(item["link"]), ''.join(item["desc"])) ###提取内容标题、链接、描述
try:
cur.execute(sql,lis)
except Exception,e:
print "Insert error:",e
self.con.rollback()
else:
self.con.commit()
cur.close()
return item
def __del__(self):
try:
self.con.close()
except Exception,e:
print "Close db error",e
当然也可以简写:
import MySQLdb
#这段代码可以写在settings.py文件中
# database connection parameters
#DBKWARGS={'db':'ippool','user':'root', 'passwd':'toor','host':'localhost','use_unicode':True, 'charset':'utf8'}
class CollectipsPipeline(object):
def process_item(self, item, spider):
DBKWARGS = spider.settings.get('DBKWARGS')
con = MySQLdb.connect(**DBKWARGS)
cur = con.cursor()
sql = ("insert into proxy(IP,PORT,TYPE,POSITION,SPEED,LAST_CHECK_TIME) "
"values(%s,%s,%s,%s,%s,%s)")
lis = (item['IP'],item['PORT'],item['TYPE'],item['POSITION'],item['SPEED'],
item['LAST_CHECK_TIME'])
try:
cur.execute(sql,lis)
except Exception,e:
print "Insert error:",e
con.rollback()
else:
con.commit()
cur.close()
con.close()
return item