Java中使用jsoup爬取网页数据简单示例
package com.jsoup;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class FirstJsoup {
private static Document doc;
public static void main(String[] args) {
try {
doc = Jsoup.connect("https://www.cnblogs.com/").get();
} catch (IOException e) {
e.printStackTrace();
}
Blogs();
}
public static void Blogs() {
Elements tests = doc.select("div#post_list>div.post_item");
for (Element test : tests) {
String txt = test.select("div.post_item_body>h3>a.titlelnk").text();
System.out.println("标题:" + txt);
String href = test.select("div.post_item_body>h3>a.titlelnk").attr("href");
System.out.println("链接:" + href);
String author = test.select("div.post_item_foot > a.lightblue").text();
System.out.println("作者:" + author);
System.out.println("---------------------------");
}
}
}
运行结果