Java Code Examples for org.jsoup.select.Elements

http://www.programcreek.com/java-api-examples/index.php?api=org.jsoup.select.Elements

Code Example 1:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/integration/.

Source ParseTest.java

@Test
public void testSmhBizArticle() throws IOException {
    File in = getFile("/htmltests/smh-biz-article-1.html");
    Document doc = Jsoup.parse(in, "UTF-8",
        "http://www.smh.com.au/business/the-boards-next-fear-the-female-quota-20100106-lteq.html");
    assertEquals("The board’s next fear: the female quota",
        doc.title()); // note that the apos in the source is a literal ’ (8217), not escaped or '
    assertEquals("en", doc.select("html").attr("xml:lang"));

    Elements articleBody = doc.select(".articleBody > *");
    assertEquals(17, articleBody.size());
    // todo: more tests!

}
 

Code Example 2:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/integration/.

Source ParseTest.java

@Test
public void testGoogleSearchIpod() throws IOException {
    File in = getFile("/htmltests/google-ipod.html");
    Document doc = Jsoup.parse(in, "UTF-8", "http://www.google.com/search?hl=en&q=ipod&aq=f&oq=&aqi=g10");
    assertEquals("ipod - Google Search", doc.title());
    Elements results = doc.select("h3.r > a");
    assertEquals(12, results.size());
    assertEquals(
        "http://news.google.com/news?hl=en&q=ipod&um=1&ie=UTF-8&ei=uYlKS4SbBoGg6gPf-5XXCw&sa=X&oi=news_group&ct=title&resnum=1&ved=0CCIQsQQwAA",
        results.get(0).attr("href"));
    assertEquals("http://www.apple.com/itunes/",
        results.get(1).attr("href"));
}
 

Code Example 3:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/parser/.

Source AttributeParseTest.java

@Test public void strictAttributeUnescapes() {
    String html = "<a id=1 href='?foo=bar&mid&lt=true'>One</a> <a id=2 href='?foo=bar&lt;qux&lg=1'>Two</a>";
    Elements els = Jsoup.parse(html).select("a");
    assertEquals("?foo=bar&mid&lt=true", els.first().attr("href"));
    assertEquals("?foo=bar<qux&lg=1", els.last().attr("href"));
}
 

Code Example 4:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/parser/.

Source HtmlParserTest.java

@Test public void parsesUnterminatedOption() {
    // bit weird this -- browsers and spec get stuck in select until there's a </select>
    Document doc = Jsoup.parse("<body><p><select><option>One<option>Two</p><p>Three</p>");
    Elements options = doc.select("option");
    assertEquals(2, options.size());
    assertEquals("One", options.first().text());
    assertEquals("TwoThree", options.last().text());
}
 

Code Example 5:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/parser/.

Source HtmlParserTest.java

@Test public void handlesTextArea() {
    Document doc = Jsoup.parse("<textarea>Hello</textarea>");
    Elements els = doc.select("textarea");
    assertEquals("Hello", els.text());
    assertEquals("Hello", els.val());
}
 

Code Example 6:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/parser/.

Source HtmlParserTest.java

@Test public void doesNotCreateImplicitLists() {
    // old jsoup used to wrap this in <ul>, but that's not to spec
    String h = "<li>Point one<li>Point two";
    Document doc = Jsoup.parse(h);
    Elements ol = doc.select("ul"); // should NOT have created a default ul.
    assertEquals(0, ol.size());
    Elements lis = doc.select("li");
    assertEquals(2, lis.size());
    assertEquals("body", lis.first().parent().tagName());

    // no fiddling with non-implicit lists
    String h2 = "<ol><li><p>Point the first<li><p>Point the second";
    Document doc2 = Jsoup.parse(h2);

    assertEquals(0, doc2.select("ul").size());
    assertEquals(1, doc2.select("ol").size());
    assertEquals(2, doc2.select("ol li").size());
    assertEquals(2, doc2.select("ol li p").size());
    assertEquals(1, doc2.select("ol li").get(0).children().size()); // one p in first li
}
 

Code Example 7:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/parser/.

Source HtmlParserTest.java

@Test public void handlesBaseTags() {
    // only listen to the first base href
    String h = "<a href=1>#</a><base href='/2/'><a href='3'>#</a><base href='http://bar'><a href=/4>#</a>";
    Document doc = Jsoup.parse(h, "http://foo/");
    assertEquals("http://foo/2/", doc.baseUri()); // gets set once, so doc and descendants have first only

    Elements anchors = doc.getElementsByTag("a");
    assertEquals(3, anchors.size());

    assertEquals("http://foo/2/", anchors.get(0).baseUri());
    assertEquals("http://foo/2/", anchors.get(1).baseUri());
    assertEquals("http://foo/2/", anchors.get(2).baseUri());

    assertEquals("http://foo/2/1", anchors.get(0).absUrl("href"));
    assertEquals("http://foo/2/3", anchors.get(1).absUrl("href"));
    assertEquals("http://foo/4", anchors.get(2).absUrl("href"));
}
 

Code Example 8:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/parser/.

Source HtmlParserTest.java

@Test public void handlesUnknownTags() {
    String h = "<div><foo title=bar>Hello<foo title=qux>there</foo></div>";
    Document doc = Jsoup.parse(h);
    Elements foos = doc.select("foo");
    assertEquals(2, foos.size());
    assertEquals("bar", foos.first().attr("title"));
    assertEquals("qux", foos.last().attr("title"));
    assertEquals("there", foos.last().text());
}
 

Code Example 9:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/parser/.

Source HtmlParserTest.java

@Test public void handlesUnclosedDefinitionLists() {
    // jsoup used to create a <dl>, but that's not to spec
    String h = "<dt>Foo<dd>Bar<dt>Qux<dd>Zug";
    Document doc = Jsoup.parse(h);
    assertEquals(0, doc.select("dl").size()); // no auto dl
    assertEquals(4, doc.select("dt, dd").size());
    Elements dts = doc.select("dt");
    assertEquals(2, dts.size());
    assertEquals("Zug", dts.get(1).nextElementSibling().text());
}
 

Code Example 10:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void getNamespacedElementsByTag() {
    Document doc = Jsoup.parse("<div><abc:def id=1>Hello</abc:def></div>");
    Elements els = doc.getElementsByTag("abc:def");
    assertEquals(1, els.size());
    assertEquals("1", els.first().id());
    assertEquals("abc:def", els.first().tagName());
}
 

Code Example 11:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void testGetParents() {
    Document doc = Jsoup.parse("<div><p>Hello <span>there</span></div>");
    Element span = doc.select("span").first();
    Elements parents = span.parents();

    assertEquals(4, parents.size());
    assertEquals("p", parents.get(0).tagName());
    assertEquals("div", parents.get(1).tagName());
    assertEquals("body", parents.get(2).tagName());
    assertEquals("html", parents.get(3).tagName());
}
 

Code Example 12:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void testElementSiblingIndex() {
    Document doc = Jsoup.parse("<div><p>One</p>...<p>Two</p>...<p>Three</p>");
    Elements ps = doc.select("p");
    assertTrue(0 == ps.get(0).elementSiblingIndex());
    assertTrue(1 == ps.get(1).elementSiblingIndex());
    assertTrue(2 == ps.get(2).elementSiblingIndex());
}
 

Code Example 13:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void testGetElementsWithAttributeDash() {
    Document doc = Jsoup.parse("<meta http-equiv=content-type value=utf8 id=1> <meta name=foo content=bar id=2> <div http-equiv=content-type value=utf8 id=3>");
    Elements meta = doc.select("meta[http-equiv=content-type], meta[charset]");
    assertEquals(1, meta.size());
    assertEquals("1", meta.first().id());
}
 

Code Example 14:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void testAddNewElement() {
    Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>");
    Element div = doc.getElementById("1");
    div.appendElement("p").text("there");
    div.appendElement("P").attr("class", "second").text("now");
    assertEquals("<html><head></head><body><div id=\"1\"><p>Hello</p><p>there</p><p class=\"second\">now</p></div></body></html>",
            TextUtil.stripNewlines(doc.html()));

    // check sibling index (with short circuit on reindexChildren):
    Elements ps = doc.select("p");
    for (int i = 0; i < ps.size(); i++) {
        assertEquals(i, ps.get(i).siblingIndex);
    }
}
 

Code Example 15:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void testPrependRowToTable() {
    Document doc = Jsoup.parse("<table><tr><td>1</td></tr></table>");
    Element table = doc.select("tbody").first();
    table.prepend("<tr><td>2</td></tr>");

    assertEquals("<table><tbody><tr><td>2</td></tr><tr><td>1</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html()));

    // check sibling index (reindexChildren):
    Elements ps = doc.select("tr");
    for (int i = 0; i < ps.size(); i++) {
        assertEquals(i, ps.get(i).siblingIndex);
    }
}
 

Code Example 16:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void testAddNewHtml() {
    Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>");
    Element div = doc.getElementById("1");
    div.append("<p>there</p><p>now</p>");
    assertEquals("<p>Hello</p><p>there</p><p>now</p>", TextUtil.stripNewlines(div.html()));

    // check sibling index (no reindexChildren):
    Elements ps = doc.select("p");
    for (int i = 0; i < ps.size(); i++) {
        assertEquals(i, ps.get(i).siblingIndex);
    }
}
 

Code Example 17:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void testPrependNewHtml() {
    Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>");
    Element div = doc.getElementById("1");
    div.prepend("<p>there</p><p>now</p>");
    assertEquals("<p>there</p><p>now</p><p>Hello</p>", TextUtil.stripNewlines(div.html()));

    // check sibling index (reindexChildren):
    Elements ps = doc.select("p");
    for (int i = 0; i < ps.size(); i++) {
        assertEquals(i, ps.get(i).siblingIndex);
    }
}
 

Code Example 18:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void testHasText() {
    Document doc = Jsoup.parse("<div><p>Hello</p><p></p></div>");
    Element div = doc.select("div").first();
    Elements ps = doc.select("p");

    assertTrue(div.hasText());
    assertTrue(ps.first().hasText());
    assertFalse(ps.last().hasText());
}
 

Code Example 19:

  1 
vote

From project jsoup, under directory /src/test/java/org/jsoup/nodes/.

Source ElementTest.java

@Test public void elementIsNotASiblingOfItself() {
    Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three</div>");
    Element p2 = doc.select("p").get(1);

    assertEquals("Two", p2.text());
    Elements els = p2.siblingElements();
    assertEquals(2, els.size());
    assertEquals("<p>One</p>", els.get(0).outerHtml());
    assertEquals("<p>Three</p>", els.get(1).outerHtml());
}
 

Code Example 20:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/examples/.

Source ListLinks.java

public static void main(String[] args) throws IOException {
    Validate.isTrue(args.length == 1, "usage: supply url to fetch");
    String url = args[0];
    print("Fetching %s...", url);

    Document doc = Jsoup.connect(url).get();
    Elements links = doc.select("a[href]");
    Elements media = doc.select("[src]");
    Elements imports = doc.select("link[href]");

    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)",
                    src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
                    trim(src.attr("alt"), 20));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }

    print("\nImports: (%d)", imports.size());
    for (Element link : imports) {
        print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel"));
    }

    print("\nLinks: (%d)", links.size());
    for (Element link : links) {
        print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
    }
}
 

Code Example 21:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Document.java

private void normaliseStructure(String tag, Element htmlEl) {
    Elements elements = this.getElementsByTag(tag);
    Element master = elements.first(); // will always be available as created above if not existent
    if (elements.size() > 1) { // dupes, move contents to master
        List<Node> toMove = new ArrayList<Node>();
        for (int i = 1; i < elements.size(); i++) {
            Node dupe = elements.get(i);
            for (Node node : dupe.childNodes)
                toMove.add(node);
            dupe.remove();
        }

        for (Node dupe : toMove)
            master.appendChild(dupe);
    }
    // ensure parented by <html>
    if (!master.parent().equals(htmlEl)) {
        htmlEl.appendChild(master); // includes remove()            
    }
}
 

Code Example 22:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

public Elements parents() {
    Elements parents = new Elements();
    accumulateParents(this, parents);
    return parents;
}
 

Code Example 23:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

private static void accumulateParents(Element el, Elements parents) {
    Element parent = el.parent();
    if (parent != null && !parent.tagName().equals("#root")) {
        parents.add(parent);
        accumulateParents(parent, parents);
    }
}
 

Code Example 24:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

public Elements children() {
    // create on the fly rather than maintaining two lists. if gets slow, memoize, and mark dirty on change
    List<Element> elements = new ArrayList<Element>();
    for (Node node : childNodes) {
        if (node instanceof Element)
            elements.add((Element) node);
    }
    return new Elements(elements);
}
 

Code Example 25:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

public Elements siblingElements() {
    if (parentNode == null)
        return new Elements(0);

    List<Element> elements = parent().children();
    Elements siblings = new Elements(elements.size() - 1);
    for (Element el: elements)
        if (el != this)
            siblings.add(el);
    return siblings;
}
 

Code Example 26:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

public Elements getElementsByTag(String tagName) {
    Validate.notEmpty(tagName);
    tagName = tagName.toLowerCase().trim();

    return Collector.collect(new Evaluator.Tag(tagName), this);
}
 

Code Example 27:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

public Element getElementById(String id) {
    Validate.notEmpty(id);
    
    Elements elements = Collector.collect(new Evaluator.Id(id), this);
    if (elements.size() > 0)
        return elements.get(0);
    else
        return null;
}
 

Code Example 28:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

public Elements getElementsByClass(String className) {
    Validate.notEmpty(className);

    return Collector.collect(new Evaluator.Class(className), this);
}
 

Code Example 29:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

public Elements getElementsByAttribute(String key) {
    Validate.notEmpty(key);
    key = key.trim().toLowerCase();

    return Collector.collect(new Evaluator.Attribute(key), this);
}
 

Code Example 30:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

public Elements getElementsByAttributeStarting(String keyPrefix) {
    Validate.notEmpty(keyPrefix);
    keyPrefix = keyPrefix.trim().toLowerCase();

    return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
}
 

Code Example 31:

  1 
vote

From project jsoup, under directory /src/main/java/org/jsoup/nodes/.

Source Element.java

public Elements getElementsByAttributeValueMatching(String key, String regex) {
    Pattern pattern;
    try {
        pattern = Pattern.compile(regex);
    } catch (PatternSyntaxException e) {
        throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
    }
    return getElementsByAttributeValueMatching(key, pattern);
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值