class MyspiderPipeline(object):
def process_item(self, item, spider):
# 获取当前工作目录
if spider.name == "zfcg":
global filename
base_dir = os.getcwd()
filename = base_dir + '/news.json'
# 打开json文件,向里面以dumps的方式吸入数据
# 注意需要有一个参数ensure_ascii=False ,不然数据会直接为utf编码的方式存入比如
# :“/xe15”
with open(filename, 'a',encoding="utf-8") as f:
line = json.dumps(dict(item), ensure_ascii=False) + '\n'
f.write(line)
print("加入成功!")
return item
class MyspiderPipeline1(object):
def process_item(self, item, spider):
# 获取当前工作目录
# if isinstance(item,MyspiderItems):
# print(item)
# if spider.name == "zfcgy":
global filenames
base_dir = os.getcwd()
filenames = base_dir + '/news1.json'
# 打开json文件,向里面以dumps的方式吸入数据
# 注意需要有一个参数ensure_ascii=False ,不然数据会直接为utf编码的方式存入比如
# :“/xe15”
with open(filenames, 'a',encoding="utf-8") as f:
line = json.dumps(dict(item), ensure_ascii=False) + '\n'
f.write(line)
return item
class SaveToMongoPipeline(object): # 将数据保存到mongodb中
def __init__(self, mongo_url, mongo_db):
self.mongo_url = mongo_url
self.mongo_db = mongo_db
def process_item(self, item, spider):
if spider.name == "zfcgy":
self.db.zfcg.insert(dict(item))
print(item)
print("加入成功!")
return item
def open_spider(self, spider):
self.client = MongoClient(self.mongo_url)
self.db = self.client[self.mongo_db]
def close_spider(self):
self.client.close()
@classmethod
def from_crawler(cls, crawler):
return cls(
mongo_url=crawler.settings.get('MONGO_URL'),
mongo_db=crawler.settings.get('MONGO_DB')
)