import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class FirstExtract {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
/*
parseHtml();
parseBody();
parseUrl();
navigation();
extractElement();
*/
navigation();
}
public static void parseHtml()
{
System.out.println("Print the html --------------------");
String html = "<html><head><title>First parse</title></head>"
+ "<body><p>Parsed HTML into a doc.</p></body></html>";
Document doc = Jsoup.parse(html);
System.out.println(doc);
System.out.println("Print the html head --------------------");
System.out.println(doc.head());
System.out.println("Print the html body --------------------");
System.out.println(doc.body());
System.out.println("Print the html title --------------------");
System.out.println(doc.title());
}
public static void parseBody()
{
String html = "<div><p>Lorem ipsum.</p>";
Document doc = Jsoup.parseBodyFragment(html);
Element body = doc.body();
System.out.println("Print the body --------------------");
System.out.println(body);
}
public static void parseUrl()
{
try {
Document doc = Jsoup.connect("http://book.sina.cn/prog/wapsite/books/h5/vipc.php?bid=232872&cid=343466&cp=1&sort=asc&PHPSESSID=bd8c678e4c42e771dbaa3ac125aaf009&vt=4").get();
System.out.println("Print the Url --------------------");
System.out.println(doc);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void navigation()
{
String html="<html><head><title>First parse</title></head>"
+ "<body><p>Parsed HTML into a doc.</p><div id=content>"
+ "<a href='/hahaha.html'>hahaha</a>"
+ "<a href='bababa.html'>bababa</a>"
+"</div></body></html>";
Document doc = Jsoup.parse(html, "http://192.168.3.84/gamestore/index.html");
Element content = doc.getElementById("content");
Elements links = content.getElementsByTag("a");
for (Element link : links) {
String linkHref = link.attr("href");
String linkAbsHref = link.absUrl("href");
String linkText = link.text();
System.out.println(linkHref);
System.out.println(linkAbsHref);
System.out.println(linkText);
}
}
public static void extractElement()
{
String html = "<p>An <a href='http://example.com/'><b>example</b></a> link.</p>";
Document doc = Jsoup.parse(html);
Element link = doc.select("a").first();
String text = doc.body().text(); // "An example link"
String linkHref = link.attr("href"); // "http://example.com/"
String linkText = link.text(); // "example""
String linkOuterH = link.outerHtml();
// "<a href="http://example.com"><b>example</b></a>"
String linkInnerH = link.html(); // "<b>example</b>"
System.out.println(text);
System.out.println(linkHref);
System.out.println(linkText);
System.out.println(linkOuterH);
System.out.println(linkInnerH);
}
}
jsoup小例子
最新推荐文章于 2020-02-15 15:17:44 发布