一、MONGODB交互:
import Pymongo
class MongoPipeline(object):
@classmethod
def from_crawler(cls, crawler):
return cls(
mongo_url=crawler.settings.get('MONGO_URI'),
mongo_db=crawler.settings.get('MONGO_DB')
)
#def __init__要写在@classmethod之前
def __init__(self, mongo_uri, mongo_db):
self.mongo_url = mongo_uri
self.mongo_db = mongo_db
def open_spider(self, spider):
self.client = pymongo.MongoClient(self.mongo_uri)
self.db = self.client[self.mongo_db]
def process_item(self, item, spider):
self.db[collection].insert(dict(item))
return item
def close_spider(self, spider):
self.client.close()
二、MYSQL交互:
import pymysql
class MysqlPipeline(object):
@classmethod
def from_crawler(self, crawler):
return cls(
host=crawler.settings.get('MYSQL_HOST')
database = crawler.settings.get('MYSQL_DATABASE')
user = crawler.settings.get('MYSQL_USER')
password = crawler.settings.get('MYSQL_PASSWORD')
port = crawler.settings.get('MYSQL_PORT')
)
def __init__(self, host, database, user, password, port):
self.host = host
self.database = database
self.user = user
self.password = password
self.port = port
def open_spider(self, spider):
self.db = pymysql.connect(self.host, self.user, self.password, self.database, charset='utf-8',
port=self.port)
self.cursor = self.db.cursor()
def process_item(self, item, spider):
data = dict(item)
keys = ','.join(data.keys())
values = ','.join(['%s'] * len(data))
sql = 'insert into %s (%s) values (%s)' % (table, keys, values)
self.cursor.execute(sql, tuple(data.values()))
self.db.commit()
return item