主要是两个文件~
pipelines
from twisted.enterprise import adbapi
import pymysql
class Www0577HomePipeline(object):
def __init__(self,mysql_config):
self.dbpool = adbapi.ConnectionPool(
mysql_config['DRIVER'],
host=mysql_config['HOST'],
port=mysql_config['PORT'],
user=mysql_config['USER'],
password=mysql_config['PASSWORD'],
db=mysql_config['DATABASE'],
charset='utf8'
)
@classmethod
def from_crawler(cls,crawler):
# 只要重写了from_crawler方法,那么以后创建对象的时候,就会调用这个方法来获取pipline对象
mysql_config = crawler.settings['MYSQL_CONFIG']
return cls(mysql_config)
def process_item(self, item, spider):
result = self.dbpool.runInteraction(self.insert_item,item)
result.addErrback(self.insert_error)
return item
#SQL语句~
def insert_item(self,cursor,item):
sql = "insert into data(id,name,address,sell_address,price,type,developer,telephone,years,time_dev,time_ok,link) values(null,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
args = (item['name'],item['address'],item['sell_address'],item['price'],item['type'],item['developer'],item['telephone'],item['years'],item['time_dev'],item['time_ok'],item['link'])
cursor.execute(sql,args)
def insert_error(self,failure):
print("="*30)
print(failure)
print("="*30)
def close_spider(self,spider):
self.dbpool.close()
Settings.py文件需要添加数据库信息
MYSQL_CONFIG = {
'DRIVER': "pymysql",
'HOST': "127.0.0.1",
'PORT': 3306, # port必须为整形
'USER': 'root',
'PASSWORD': 'root',
'DATABASE': "XXXX"
}
天勤 , 版权所有丨如未注明 , 均为原创丨本网站采用BY-NC-SA协议进行授权
转载请注明原文链接:Scrapy爬虫数据存入到MySql数据库