selector 选择器的方法爬取
package org.xdemo.example.jsoupdemo.extracter;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
public class Selector {
public static void main(String[] args) {
Document doc;
try {
doc=Jsoup.connect("http://xxx.com/").get();
/*****获取单一元素******/
Element content = doc.getElementById("content");
/*****一下方法的返回值都是Elements集合******/
content.getElementsByTag("a");
doc.getElementsByClass("divClass");
doc.getAllElements();
doc.getElementsByAttribute("href");
doc.getElementsByAttributeStarting("data-");
doc.getElementsByAttributeValue("href","http://xdemo.org");
doc.getElementsByAttributeValueNot("href","http://xdemo.org");
doc.getElementsByAttributeValueContaining("href", "xdemo");
doc.getElementsByAttributeValueEnding("href", "org");
doc.getElementsByAttributeValueStarting("href","http://xdemo");
doc.getElementsByAttributeValueMatching("href",Pattern.compile("[\u4e00-\u9fa5]"));
doc.getElementsByAttributeValueMatching("href", "[\u4e00-\u9fa5]");
doc.getElementsByIndexEquals(0);
doc.getElementsByIndexGreaterThan(0);
doc.getElementsByIndexLessThan(10);
for (Element link : content.getElementsByTag("a")) {
String linkHref = link.attr("href");
String linkText = link.text();
}
/**************一些其他常用的方法**************/
doc.title();
doc.text();
content.addClass("newClass");
content.attr("id");
content.children();
content.text();
content.siblingElements();
} catch (Exception e) {
e.printStackTrace();
}
}