前言:
案例中用到的Httpclientutil工具类请参考上一篇文章https://blog.csdn.net/qq_15076569/article/details/83015044
import com.xucj.jsoup.Httpclientutil;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
public class QiDianSpider {
public static void main(String[] args) throws IOException {
String url = "https://read.qidian.com/chapter/R-_rpa5go0s1/eSlFKP1Chzg1";
boolean first = true;
FileOutputStream fileOutputStream = null;
File f = null;
while (true){
String html = Httpclientutil.getHtmlByUrl(url);
Document document = Jsoup.parse(html);
if(first){
//书名
Elements bookName = document.select("#j_textWrap .book-cover-wrap h1");
System.out.println(bookName.text());
//作者
Elements author = document.select("#j_textWrap .book-cover-wrap h2");
System.out.println(author.text());
//类型
Elements typeAndTextCount = document.select("[class=info-list cf] ul li p");
System.out.println(typeAndTextCount.text().split(" ")[0]);
//连载字数
System.out.println(typeAndTextCount.text().split(" ")[1]);
//上架时间
Elements sjDate = document.select("[class=info-list cf] ul li em");
System.out.println(sjDate.text());
first = false;
f = new File("C:\\Users\\DELL\\Desktop\\"+bookName.text()+".txt");
fileOutputStream = new FileOutputStream(f);
if(!f.exists()){//不存在则创建路径
f.mkdirs();
}
}
//内容
StringBuffer sb = new StringBuffer();
//1.内容标题
Elements contextTitle = document.select(".main-text-wrap .j_chapterName");
sb.append(contextTitle.text());
//2.章节内容
Elements contextText = document.select("[class=read-content j_readContent] p");
for (Element text : contextText) {
sb.append("\r\n"+text.text());
}
System.out.println(sb.toString());
//写入到文本中
fileOutputStream.write(sb.toString().getBytes());
//获取下一章内容
Elements next = document.select("#j_chapterNext[href*=read.qidian.com]");
if(next == null || next.size() == 0){
fileOutputStream.close();
break;
}
url = "https:"+next.attr("href");
System.out.println(url);
}
}
}