String indexUrl = "https://www.qu.la/book/116/92229.html";
//创建一个输出流,将爬到的小说以txt形式保存在硬盘
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("D://诛仙"+".txt")));
while (true) {
//2. 发送请求, 获取数据
//2.1 创建httpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2.2 设置请求方式
HttpGet httpGet = new HttpGet(indexUrl);
//2.3 设置请求参数 和请求头
httpGet.setHeader("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36");
//2.4 发送请求, 获取响应
CloseableHttpResponse response = httpClient.execute(httpGet);
//2.5 获取数据
String html = EntityUtils.toString(response.getEntity(), "UTF-8");
//3. 解析数据:
Document document2 = Jsoup.parse(html);
//获得本章的章节名称 并输出到文本中
Elements chapterName = document2.select("h1");
System.err.println(chapterName.text());
bw.write(chapterName.text());
bw.newLine();
bw.flush();
//获得本章的小说内容 并输出到文本中
Elements pEl = document2.select("#content");
for (Element p : pEl) {
System.err.println("p.text():"+p.text());
bw.write(p.text());
//System.out.println(p.text());
bw.newLine();
bw.flush();
}
//下一章内容:
Element aEl = document2.select(".bottem2>a[href*=html]").last();
System.err.println(aEl);
System.err.println("aEl:"+aEl);
if (aEl == null) {
break;
}
if(document2.select(".bottem2>a[href*=html]").size()<2) {
break;
}
String nextUrl = aEl.attr("href");
System.err.println(nextUrl);
indexUrl = "https://www.qu.la/book/116/" + nextUrl;
System.out.println(indexUrl);
// //4. 关闭httpClient对象
httpClient.close();
}