每次看小说,都是弹窗广告,烦人,还费流量。
使用了scrapy爬取小说
# coding=utf-8
import scrapy
class UuxsSpider(scrapy.Spider):
name = "xiaoshuo"
start_urls = [
'http://www.xiaoshuo.net/book/0/34/19322.html',
]
def parse(self, response):
title = response.css('h1#BookTitle::text').extract_first()
content = response.css('div#BookText::text').extract_first()
self.log('开始下载 %s' % title.encode('utf8'))
with open('小说.txt', 'a') as f:
f.write(title.encode("utf8") + "\n")
f.write(content.encode("utf8") + "\n")
next_page = response.css('a#book-next::attr(href)').extract_first()
if next_page is not None:
yield response.follow(next_page, callback=self.parse)