一开始是用的这种方法,发现不行,不知道什么原因,我测试的时候要么运行一次,要么运行两次,反正后面程序就是不跑了。。。
import os
import sched
import time
from scrapy.commands import ScrapyCommand
from scrapy.utils.project import get_project_settings
schedule = sched.scheduler(time.time, time.sleep)
# 被周期性调度触发的函数
def func():
os.system("scrapy crawl bai")
os.system("scrapy crawl sync_handel")
# os.system("scrapy crawl ***")
def perform1(inc=60*10):
schedule.enter(inc, 0, perform1, (inc,))
func()
def mymain():
schedule.enter(0, 0, perform1, (600,))
if __name__ == "__main__":
mymain()
schedule.run()
一顿艰辛的搜索尝试,最后用的这种方法可以了
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from twisted.internet import reactor
from twisted.internet.task import deferLater
def sleep(self, *args, seconds):
"""Non blocking sleep callback"""
return deferLater(reactor, seconds, lambda: None)
process = CrawlerProcess(get_project_settings())
def _crawl(result, spider):
deferred = process.crawl(spider)
deferred.addCallback(lambda results: print('waiting 100 seconds before restart...'))
deferred.addCallback(sleep, seconds=100)
deferred.addCallback(_crawl, spider)
return deferred
_crawl(None, 'bai') # 第二个参数是爬虫名
_crawl(None, 'sync_handel')
process.start()