import scrapy,time
# scrapy shell "https://3RRvhw2" (其他系统可以使用单引号)
# scrapy runspider quotes_spider.py -o quotes.json
# 要运行这个爬虫只需要在当前目录下的命令行输入这条命令。
# 这将会在当前目录下生成一个josn文件
class QuotesSpider(scrapy.Spider):
name = 'quotes'
start_urls = [
'https://read.qidian.com/chapter/Hx0Qvc7tb9hrZK4x-CuJuw2/8jkDJBJJ6CrM5j8_3RRvhw2',
]
count=0
def parse(self, response):
self.count+=1
# for i in response.css() 若爬行结果里面是个列表,也就是有多个item 可以循环返回。
yield {
'updateTime': response.css('.j_updateTime::text').get(), #get() 等价于extract_first()
'chapterName': response.css('.j_chapterName::text').get(),
}
time.sleep(1)
if self.count<20:
next_page = response.css('#j_chapterNext::attr("href")').get()
if next_page is not None:
# 应用follow很方便的创建一个跟随请求
yield response.follow(next_page, self.parse)