入门scrapy不久,尝试着写一段代码爬取诗词网.
spider如下:
class ShiciSpider(scrapy.Spider):
name = 'shici'
url = 'http://www.shicimingju.com/chaxun/zuozhe/44_'
page = 1
start_urls = [url + str(page) +'.html']
def parse(self, response):
#判断是否是主页
judge = response.xpath('//head/title/text()').extract_first()
if judge ==