转载: https://blog.csdn.net/mouday/article/details/81514569
原理:
1个进程 -> 多个子进程 -> scrapy进程
代码示例
将以下代码文件放入scrapy项目中任意位置即可
# -*- coding: utf-8 -*-
from multiprocessing import Process
from scrapy import cmdline
import time
import logging
# 配置参数即可, 爬虫名称,运行频率
confs = [
{
"spider_name": "hexun_pdf",
"frequency": 2,
},
]
def start_spider(spider_name, frequency):
args = ["scrapy", "crawl", spider_name]
while True:
start = time.time()
p = Process(target=cmdline.execute, args=(args,))
p.start()
p.join()
logging.debug("### use time: %s" % (time.time() - start))
time.sleep(frequency)
if __name__ == '__main__':
for conf in confs:
process = Process(target=start_spider,
args=(conf["spider_name"], conf["frequency"]))
process.start()
time.sleep(10)