//1. 确定首页URL: 重生柯南当侦探小说的第一章的URL
String indexUrl = "https://read.qidian.com/chapter/nKVO7k6YUptmzDX0o03xsg2/6Ko8rgNY4EBp4rPq4Fd4KQ2";
//创建一个输出流,将爬到的小说以txt形式保存在硬盘
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("D:\\重生柯南当侦探"+".txt")));
while (true) {
//2. 发送请求, 获取数据
//2.1 创建httpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2.2 设置请求方式
HttpGet httpGet = new HttpGet(indexUrl);
//2.3 设置请求参数 和请求头
httpGet.setHeader("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36");
//2.4 发送请求, 获取响应
CloseableHttpResponse response = httpClient.execute(httpGet);
//2.5 获取数据
String html = EntityUtils.toString(response.getEntity(), "UTF-8");
//System.out.println(html);
//3. 解析数据:
Document document = Jsoup.parse(html);
//获得本章的章节名称 并输出到文本中
Elements chapterName = document.select(".j_chapterName");
System.out.println(chapterName.text());
bw.write(chapterName.text());
bw.newLine();
bw.flush();
//获得本章的小说内容 并输出到文本中
Elements pEl = document.select("[class=read-content j_readContent] p");
for (Element p : pEl) {
bw.write(p.text());
System.out.println(p.text());
bw.newLine();
bw.flush();
}
//下一章内容:
Elements aEl = document.select("#j_chapterNext[href*=chapter]");
System.err.println("aEl:"+aEl);
if (aEl == null || aEl.size() == 0) {
break;
}
Object nextUrl = aEl.attr("href");
indexUrl = "http:" + nextUrl;
System.out.println(indexUrl);
//4. 关闭httpClient对象
httpClient.close();
}