最开始是使用commands方法启动的多个爬虫,但是这种方法在使用定时任务的时候会出现只能跑一次的情况
from scrapy.commands import ScrapyCommand
from scrapy.utils.project import get_project_settings
class Command(ScrapyCommand):
requires_project = True
def syntax(self):
return '[options]'
def short_desc(self):
return 'Runs all of the spiders'
def run(self, args, opts):
spider_list = self.crawler_process.spiders.list()
for name in spider_list:
self.crawler_process.crawl(name, **opts.__dict__)
self.crawler_process.start()
cmdline.execute("scrapy crawlall".split())
后来采用schedule+CrawlerProcess方法,发现跑到第二次还是会报错
def job1():
print('Job1:每隔10秒执行一次的任务')
print('Job1-startTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
process = CrawlerProcess(get_project_settings())
process.crawl('****')
process.crawl('***')
process.crawl('***')
process.start()
print('Job1-endTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
print('------------------------------------------------------------------------')
if __name__ == '__main__':
schedule.every(10).seconds.do(job1)
while True:
schedule.run_pending()
报错:
schedule.run_pending()
File "C:\python3.6\lib\site-packages\schedule\__init__.py", line 563, in run_pending
default_scheduler.run_pending()
File "C:\python3.6\lib\site-packages\schedule\__init__.py", line 94, in run_pending
self._run_job(job)
File "C:\python3.6\lib\site-packages\schedule\__init__.py", line 147, in _run_job
ret = job.run()
File "C:\python3.6\lib\site-packages\schedule\__init__.py", line 466, in run
ret = self.job_func()
File "G:/demo/12.27(盘口直播源整合)/Live/Live/start.py", line 24, in job1
process.start()
File "C:\python3.6\lib\site-packages\scrapy\crawler.py", line 309, in start
reactor.run(installSignalHandlers=False) # blocking call
File "C:\python3.6\lib\site-packages\twisted\internet\base.py", line 1282, in run
self.startRunning(installSignalHandlers=installSignalHandlers)
File "C:\python3.6\lib\site-packages\twisted\internet\base.py", line 1262, in startRunning
ReactorBase.startRunning(self)
File "C:\python3.6\lib\site-packages\twisted\internet\base.py", line 765, in startRunning
raise error.ReactorNotRestartable()
twisted.internet.error.ReactorNotRestartable
后面还是使用了os模块+schedule的方法完美解决了问题
schedule = sched.scheduler(time.time, time.sleep)
# 被周期性调度触发的函数
def func():
os.system("scrapy crawl ***")
os.system("scrapy crawl ***")
os.system("scrapy crawl ***")
def perform1(inc):
schedule.enter(inc, 0, perform1, (inc,))
func()
def mymain():
schedule.enter(0, 0, perform1, (180,))
if __name__ == "__main__":
mymain()
schedule.run()