String html = EntityUtils.toString(entity, "utf-8");
//解析网页 Document document = Jsoup.parse(html); //获取网页中的title //通过tag获取 Element title = document.getElementsByTag("title").first(); String text = title.text(); System.out.println(text);
//通过id获取 Element site_nav_top = document.getElementById("site_nav_top"); String text1 = site_nav_top.text(); System.out.println(text1);
//通过class获取 Elements post_item = document.getElementsByClass("post_item"); for (Element e : post_item) { System.out.println(e.html()); }
// 根据属性名来查询DOM Elements widthElements = document.getElementsByAttribute("width"); for (Element e : widthElements) { System.out.println(e.toString()); }
// 根据属性名和属性值来查询DOM Elements targetElements=document.getElementsByAttributeValue("target", "_blank"); for(Element e:targetElements){ System.out.println(e.toString()); }
String html = EntityUtils.toString(entity, "utf-8");
//解析网页 Document document = Jsoup.parse(html);
// 查找所有帖子DOM Elements select = document.select(".post_item .post_item_body h3 a"); for (Element e : select) { System.out.println("博客标题:" + e.text()); System.out.println("-------------"); } // 带有href属性的a元素 Elements hrefElements = document.select("a[href]"); for (Element e : hrefElements) { System.out.println(e.toString()); System.out.println("-------------");
} // 查找扩展名为.png的图片DOM节点 Elements imgElements = document.select("img[src$=.png]"); for (Element e : imgElements) { System.out.println(e.toString()); System.out.println("-------------"); } // 获取tag是title的所有DOM元素 Element element = document.getElementsByTag("title").first(); String title = element.text(); System.out.println("网页标题是:" + title);
// 带有href属性的a元素 Elements hrefElements = document.select("a[href]"); for (Element e : hrefElements) { System.out.println(e.toString()); System.out.println("-------------");