# 使用前请先安装如下gem
# gem install http
# gem install nokogiri
require 'http'
require 'nokogiri'
url = 'https://www.85xs.cc/book/douluodalu1/1.html'
page_index = 1
loop do
# max_hops: 自动重定向次数
resp = HTTP.follow(max_hops: 3).get(url,:headers => {'User-Agent'=>'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'})
doc = Nokogiri::HTML(resp.body.to_s)
info = doc.xpath('//div[@class="m-post"]/p/text()')
# 获取标题
title = doc.xpath('//h1/text()')[0].content
p "正在保存#{title}第#{page_index}页"
# 保存至文件
filename = ".\斗罗大陆.txt"
File.open(filename,'a') do |f|
f.write "#{title}\n\n#{info}\n\n"
end
# 获取下一页链接
next_page = doc.xpath('//tr/td[2]/a/@href')[0].content
url = "https://www.85xs.cc#{next_page}"
page_index += 1
# 退出循环
break if next_page == '/book/douluodalu1/1.html'
end
本文由【产品经理不是经理】gzh 同步发布,欢迎关注