redis语法,python使用redis_郑*杰的博客-CSDN博客
python-pymongo模块_郑*杰的博客-CSDN博客
python操作mysql数据库_郑*杰的博客-CSDN博客
基本步骤:python—scrapy数据解析、存储_郑*杰的博客-CSDN博客
正文:
当前文件:D:\python_test\scrapyProject\scrapyProject\settings.py
ITEM_PIPELINES = {
#数字表示管道类被执行的优先级,数字越小表示优先级越高
'xiaoshuoPro.pipelines.MysqlPipeline': 300,
'xiaoshuoPro.pipelines.RedisPipeLine': 301,
'xiaoshuoPro.pipelines.MongoPipeline': 302,
}
当前文件:D:\python_test\scrapyProject\scrapyProject\pipelines.py
from itemadapter import ItemAdapter
import pymysql
import redis
import pymongo
# 数据存储到mysql
class MysqlPipeline:
def open_spider(self,spider):
self.conn = pymysql.Connect(
host = '127.0.0.1',
port = 3306,
user = 'root',
password = 'root',
db = 'test',
charset = 'utf8'
)
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
title = item['title']
sql = f'insert into xiaoshuo (title) values ("{title}")'
self.cursor.execute(sql)
self.conn.commit()
print('成功写入一条数据!')
# 爬虫文件只会将item提交给优先级最高的管道类。优先级最高的管道类的process_item中需要写return item操作,该操作表示将item对象传递给下一个管道类
return item
def close_spider(self,spider):
self.cursor.close()
self.conn.close()
# 数据存储到redis中
class RedisPipeLine:
def open_spider(self,spider):
self.conn = redis.Redis(
host='127.0.0.1',
port=6379
)
def process_item(self,item,spider):
self.conn.lpush('xiaoshuo',item)
print('数据存储redis成功!')
return item
def close_spider(self,spider):
self.conn.close()
# 数据存储到Mongo中
class MongoPipeline:
def open_spider(self, spider):
self.conn = pymongo.MongoClient(host='127.0.0.1',port=27017)
self.db_test = self.conn['test']
def process_item(self, item, spider):
self.db_test['xiaoshuo'].insert_one({'title': item['item_title']})
print('插入成功!')
return item
def close_spider(self,spider):
self.conn.close()