引入依赖包
<!-- jsoup解析html -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>**
public static void main(String[] args) {
//要爬的网站
String url = "https://www.cnblogs.com/";
try {
//使用dom
Document doc = Jsoup.connect(url).get();
//System.out.println(doc);
//根据盒子类名称寻找属性
Elements post_item = doc.select("div.post_item");
//System.out.println(post_list);
for (Element element : post_item) {
Elements post_item_body = element.select("div.post_item_body");
System.out.println("标题:"+post_item_body.select("h3").select("a").text());
System.out.println("介绍:"+post_item_body.select("p").text() );
System.out.println("作者:"+post_item_body.select("div.post_item_foot").select("a").text());
System.out.println("连接"+post_item_body.select("a").attr("href"));
Document doc2 = Jsoup.connect(post_item_body.select("a").attr("href")).get();
Elements postBody = doc.select("div#cnblogs_post_body");
System.out.println(doc.select("div#mainContent"));
System.out.println("");
System.out.println("");
System.out.println("");
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}