由于之前用scrapy写过爬虫,很喜欢用里面的callback与Pipeline,特别是callback,让方法调用路径看起来非常直观。于是突发奇想,模仿scrapy中的callback写一个demo。
# 模拟scrapy中的回调函数
# @File:模拟scrapy中的回调函数.py
# @Author:王星球
# @E-mail:973625535@qq.com
# Created_date:20200528
#
#
#
#
#
#
#
#
#
#
#
#
# Demo 1
class FormRequest(object):
def __init__(self, callback, args):
self.callback = callback
self.args = args
def __call__(self, *args, **kwargs):
return self.callback(self.args)
class Pipeline(object):
def __init__(self, return_or_yield):
self.file = open("20200528回调函数_%s.txt" % return_or_yield, encoding='utf-8', mode='a')
def write_csv(self, obj):
self.file.write(str(obj) + "\n")
print("已经写入文件")
def __del__(self):
self.file.close()
class Spider1(object):
def __init__(self):
self.start_url()()()
def start_url(self):
print("return FormRequest 1")
return FormRequest(callback=self.parse,
args=({
"url": "baidu.com",
"text": "回调函数测试",
"Author": "王星球",
"E-mail": "973625535@qq.com"
}))
def parse(self, response):
print("return FormRequest 1")
return FormRequest(callback=Pipeline("return").write_csv, args=(response,))
class Spider2(object):
def __init__(self):
level2_status = True
level1 = self.start_url()
while level2_status:
try:
level2 = level1.__next__()
level2().__next__()()
except StopIteration:
level2_status = False
def start_url(self):
print("yield FormRequest 1")
yield FormRequest(callback=self.parse,
args=({
"url": "baidu.com",
"text": "回调函数测试",
"Author": "王星球",
"E-mail": "973625535@qq.com"
}))
def parse(self, response):
print("yield FormRequest 2")
yield FormRequest(callback=Pipeline("yield").write_csv, args=(response,))
if __name__ == '__main__':
Spider1()
Spider2()
说明
1.demo版本的代码
2.核心代码:__call__方法
3.生成器方式或者直接return都可以
运行结果截图
生成的文件截图
文件1内容截图
文件2内容截图