import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
/**
* Created by Administrator on 2017/5/26.
*/
public class JsoupTest {
public static void main(String args[]) {
// Parse HTML String using JSoup library
String HTMLSTring = "<!DOCTYPE html>"
+ "<html>"
+ "<head>"
+ "<title>JSoup Example</title>"
+ "</head>"
+ "<body>"
// + "<table>"
// + "<tr>"
// + "<td>"
+ "<h1>"
+ "HelloWorld"
+ "<h1>"
// + "<td>"
// + "<tr>"
// + "<table>"
+ "</body>"
+ "</html>";
Document html = Jsoup.parse(HTMLSTring);
String title = html.title();
String h1 = html.body().getElementsByTag("h1").text();
System.out.println("Input HTML String to JSoup :" + HTMLSTring);
System.out.println("After parsing, Title : " + title);
System.out.println("Afte parsing, Heading : " + h1);
// JSoup Example 2 - Reading HTML page from URL
Document doc = null;
// String h = null;
try {
doc = Jsoup.connect("http://www.baidu.com/").get();
title = doc.title();
// h = doc.html();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("Input HTML String to JSoup :" + doc.html());
System.out.println("Jsoup Can read HTML page from URL, title : " + title);
// JSoup Example 3 - Parsing an HTML file in Java
//Document htmlFile = Jsoup.parse("login.html", "ISO-8859-1"); // wrong
Document htmlFile = null;
try {
htmlFile = Jsoup.parse(new File("D:\\Test\\228.html"), "UTF-8");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} // right
title = htmlFile.title();
Element div = htmlFile.getElementById("team_box");
//测试1
/*Elements div1 = htmlFile.getElementsContainingText("工商注册");
Elements div2 = htmlFile.getElementsMatchingText("法定代表人");*/
// String cssClass = div.className(); // getting class form HTML element
System.out.println("Jsoup can also parse HTML file directly");
System.out.println("title : " + title);
// System.out.println("class of div tag : " + cssClass);
//增加内容
Elements spans = htmlFile.select("[span='注册号:']");
System.out.println("\n" + spans.toString() + "\n");
//测试1
/*System.out.println("found: " + div1);
System.out.println("found: " + div2);*/
}
}