最开始是使用commands方法启动的多个爬虫,但是这种方法在使用定时任务的时候会出现只能跑一次的情况
from scrapy.commands import ScrapyCommand
from scrapy.utils.project import get_project_settings
class Command(ScrapyCommand):
requires_project = True
def syntax(self):
return '[options]'
def short_desc(self):
return 'Runs all of the spiders'
def run(self, args, opts):
spider_list = self.crawler_process.spiders.list()
for name in spider_list:
self.crawler_process.crawl(name, **opts.__dict__)
self.crawler_process.start()
cmdline.execute("scrapy crawlall".split())
后来采用schedule+CrawlerProcess方法,发现跑到第二次还是会报错
def job1():
print('Job1:每隔10秒执行一次的任务')
print('Job1-startTime:%s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
process = CrawlerProcess(get_project_settings())
process.crawl('*