public class JsoupQidian {
public static void main(String[] args){
try {
//直接加载url
Document docs = Jsoup.connect("http://www.qidian.com/BookReader/2372415.aspx").get();
//获得body
Element body=docs.body();
//通过类jquery 选择器选中html元素
Elements ls= body.getElementsByTag("a");//select(".grid_3:contains(ActiveMQ) ");
// 遍历元素
for (Element el :ls){
String title=el.attr("title");
if(title != null && title.startsWith("字数:")){
System.out.println("http://www.qidian.com/"+el.attr("href"));
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
Document temdoc = Jsoup.connect("http://www.qidian.com/"+el.attr("href")).get();
System.out.println(temdoc.body().text());
}
}
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
}
程序是没问题的,但是起点做了防抓取检测,明天我会尝试结合httpclient抓取起点内容
public static void main(String[] args){
try {
//直接加载url
Document docs = Jsoup.connect("http://www.qidian.com/BookReader/2372415.aspx").get();
//获得body
Element body=docs.body();
//通过类jquery 选择器选中html元素
Elements ls= body.getElementsByTag("a");//select(".grid_3:contains(ActiveMQ) ");
// 遍历元素
for (Element el :ls){
String title=el.attr("title");
if(title != null && title.startsWith("字数:")){
System.out.println("http://www.qidian.com/"+el.attr("href"));
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
Document temdoc = Jsoup.connect("http://www.qidian.com/"+el.attr("href")).get();
System.out.println(temdoc.body().text());
}
}
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
}
程序是没问题的,但是起点做了防抓取检测,明天我会尝试结合httpclient抓取起点内容