在scrapy.telnet里有一个TelnetConsole类
class TelnetConsole(protocol.ServerFactory):
def __init__(self, crawler):
if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'):
raise NotConfigured
if not TWISTED_CONCH_AVAILABLE:
raise NotConfigured
self.crawler = crawler
self.noisy = False
self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')]
self.host = crawler.settings['TELNETCONSOLE_HOST']
self.crawler.signals.connect(self.start_listening, signals.engine_started)
self.crawler.signals.connect(self.stop_listening, signals.engine_stopped)
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
def start_listening(self):
self.port = listen_tcp(self.portrange, self.host, self)
h = self.port.getHost()
logger.debug("Telnet console listening on %(host)s:%(port)d",
{'host': h.host, 'port': h.port},
extra={'crawler': self.crawler})
def stop_listening(self):
self.port.stopListening()
def protocol(self):
telnet_vars = self._get_telnet_vars()
return telnet.TelnetTransport(telnet.TelnetBootstrapProtocol,
insults.ServerProtocol, manhole.Manhole, telnet_vars)
可以自己扩展爬虫在出现哪个信号时做出什么行为。
里面有以下信号:
engine_started = object()
engine_stopped = object()
spider_opened = object()
spider_idle = object()
spider_closed = object()
spider_error = object()
request_scheduled = object()#请求放入调度器
request_dropped = object()#丢弃请求
response_received = object()#响应被接收
response_downloaded = object()#响应被下载
item_scraped = object()#获得item
item_dropped = object()#丢弃item
例如我在更目录下创建一个extend.py文件,里面创建一个类MyExtend,在settings中设置ITEM_PIPELINES = { 'shan.pipelines.ShanPipeline': 300, }
from scrapy import signals
class MyExtend:
def __init__(self, crawler):
self.crawler = crawler
self.crawler.signals.connect(self.start, signals.engine_started)
self.crawler.signals.connect(self.stop, signals.engine_stopped)
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
def start(self):
print("signals.engine_started")
def stop(self):
print("signals.engine_stop")
我规定在引擎开启时打印signals.engine_started,引擎关闭时打印signals.engine_stop。
(venv) D:\shan>scrapy crawl chouti --nolog
D:\shan\shan\spiders\chouti.py:9: ScrapyDeprecationWarning: Module `scrapy.dupefilter` is deprecated, use `scrapy.dupefilters` instead
from scrapy.dupefilter import RFPDupeFilter
D:\shan\shan\spiders\chouti.py:11: ScrapyDeprecationWarning: Module `scrapy.telnet` is deprecated, use `scrapy.extensions.telnet` instead
from scrapy.telnet import TelnetConsole
signals.engine_started
{"result":{"code":"9999", "message":"", "data":{"complateReg":"0","destJid":"cdu_53923279913"}}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671818766000","lvCount":"13","nick":"Danbro","uvCount":"30","voteTime":"小于1分钟前"}
}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819521000","lvCount":"7","nick":"Danbro","uvCount":"31","voteTime":"小于1分钟前"}}
}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"30010", "message":"你已经推荐过了", "data":""}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819621000","lvCount":"9","nick":"Danbro","uvCount":"35","voteTime":"小于1分钟前"}}
}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819634000","lvCount":"20","nick":"Danbro","uvCount":"35","voteTime":"小于1分钟前"}
}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819614000","lvCount":"124","nick":"Danbro","uvCount":"35","voteTime":"小于1分钟前"
}}}
{"result":{"code":"9999", "message":"推荐成功", "data":{"jid":"cdu_53923279913","likedTime":"1539671819663000","lvCount":"32","nick":"Danbro","uvCount":"35","voteTime":"小于1分钟前"}
}}
signals.engine_stop
开启爬虫时可以看见这两句话。