def parse( self, response):
papers = response. xpath(".// *[@class=' day']")
from scrapy. shell import inspect_ response
inspect_ response( response, self)
for paper in papers:
url = paper. xpath(".// *[@class=' postTitle']/ a/@ href"). extract()[ 0]
title = paper. xpath(".// *[@class=' postTitle']/ a/ text()"). extract()[ 0]
time = paper. xpath(".// *[@class=' dayTitle']/ a/ text()"). extract()[ 0]
content = paper. xpath(".// *[@class=' postTitle']/ a/ text()"). extract()[ 0]
item = CnblogspiderItem( url= url, title= title, time= time, content= content)
request = scrapy. Request( url= url, callback= self. parse_ body)
request. meta[' item'] = item
yield request next_ page = Selector( response). re( u'< a href="(\ S*)"> 下 一页</ a>')
if next_ page:
yield scrapy. Request( url= next_ page[ 0], callback= self. parse)
scrapy crawl spdiername 运行时 程序停在 inspect _response() 一行 可以进行调试
REDIRECT_ENABLED =False ???