scrapy 数据的插入 json 和mongo的操作 Pipeline篇

最新推荐文章于 2022-07-19 18:10:12 发布

龙神ldy

最新推荐文章于 2022-07-19 18:10:12 发布

阅读量171

点赞数

文章标签： SCRAPY

本文链接：https://blog.csdn.net/qa111111/article/details/103828317

版权

class MyspiderPipeline(object):

def process_item(self, item, spider):
# 获取当前工作目录

if spider.name == "zfcg":
global filename
base_dir = os.getcwd()
filename = base_dir + '/news.json'
# 打开json文件，向里面以dumps的方式吸入数据
# 注意需要有一个参数ensure_ascii=False ，不然数据会直接为utf编码的方式存入比如
# :“/xe15”
with open(filename, 'a',encoding="utf-8") as f:
line = json.dumps(dict(item), ensure_ascii=False) + '\n'
f.write(line)
print("加入成功！")
return item
class MyspiderPipeline1(object):

def process_item(self, item, spider):

# 获取当前工作目录
# if isinstance(item,MyspiderItems):
# print(item)
# if spider.name == "zfcgy":
global filenames
base_dir = os.getcwd()
filenames = base_dir + '/news1.json'
# 打开json文件，向里面以dumps的方式吸入数据
# 注意需要有一个参数ensure_ascii=False ，不然数据会直接为utf编码的方式存入比如
# :“/xe15”
with open(filenames, 'a',encoding="utf-8") as f:
line = json.dumps(dict(item), ensure_ascii=False) + '\n'
f.write(line)

return item
class SaveToMongoPipeline(object): # 将数据保存到mongodb中
def __init__(self, mongo_url, mongo_db):
self.mongo_url = mongo_url
self.mongo_db = mongo_db

def process_item(self, item, spider):

if spider.name == "zfcgy":

self.db.zfcg.insert(dict(item))
print(item)
print("加入成功！")
return item

def open_spider(self, spider):
self.client = MongoClient(self.mongo_url)
self.db = self.client[self.mongo_db]

def close_spider(self):
self.client.close()

@classmethod
def from_crawler(cls, crawler):
return cls(
mongo_url=crawler.settings.get('MONGO_URL'),
mongo_db=crawler.settings.get('MONGO_DB')
)