Jsoup作为html新兴之星,用起来也是非常方便的
package com.dzy.jsoup;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class TestJsoup {
public static void main(String[] args) {
//TestJsoup.first();
TestJsoup.extractingdata();
}
public static void first()
{
String html="<html><head><title>First parse</title></head>"
+ "<body><p>Parsed HTML into a doc.</p></body></html>";
Document doc=Jsoup.parse(html);
System.out.println(doc.text()); //获取纯文本
System.out.println(doc.outerHtml()); //获取html
System.out.println("----------------------------------------");
doc=Jsoup.parseBodyFragment(html); //把html像body一样对待
Element body=doc.body(); //取得body部分
System.out.println(body.outerHtml());
System.out.println("---------------------------------------------");
try {
doc = Jsoup.connect("http://www.jsu.edu.cn").get();
//还可以设置.data .userAgent .cookie .timeout等属性, post方式
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(doc.title()); //url方式获取标题
System.out.println("---------------------------------------------");
//File input =new File("/file/index.html"); //项目里面的html
//doc=Jsoup.parse(input,"UTF-8","http://localhost/index.html");//本地的文件
}
private static void extractingdata() {
Document doc=null;
try {
doc=Jsoup.connect("http://www.jsu.edu.cn").get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Element content=doc.getElementById("126097369401686481"); //某一个div的id罢了
Elements links=content.getElementsByTag("a");
for (Element link : links) {
String linkhref=link.attr("href");
String linkText=link.text();
System.out.println(linkhref+linkText);
}
System.out.println("------------------解压特定地方----------------------");
String html = "<p>An <a href='http://example.com/'><b>example</b></a> link.</p>";
doc = Jsoup.parse(html);
Element link = doc.select("a").first();
String text = doc.body().text(); // "An example link"
String linkHref = link.attr("href"); // "http://example.com/"
String linkText = link.text(); // "example""
String linkOuterH = link.outerHtml();
// "<a href="http://example.com"><b>example</b></a>"
String linkInnerH = link.html(); // "<b>example</b>"
System.out.println("------------------解压特定地方----------------------");
}
}
官方example地址:example