import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
public class JsoupParseHtml {
/**
* Jsoup1.*版本
*/
@Test
public void JsoupParse() {
try {
URL url = new URL("http://www.baidu.com");
Document document = Jsoup.parse(url, 5000);
System.out.println(document.getElementById("form1").html());
// System.out.println(document.select("#form1").html());
/*Elements elist = document.select("#form1 input[type=hidden]");
for (Element element : elist) {
System.out.println(element.val());
}*/
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
@Test
public void JsoupParseBy2_9_1() {
try {
String url = "http://www.baidu.com";
Document document = Jsoup.connect(url).userAgent("Mozilla").get(); // 处理首页
// Document document = Jsoup.connect(url).userAgent("Chrome").get(); // 处理首页
Elements htmls = document.select(".bg");
// System.out.println(document.getElementById("form1").html());
String html = "";
for (Element element : htmls) {
html = element.val();
System.out.println(html);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
-- 代码中引入的jar为jsoup1.7.2.jar