1.所需包:
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
2.JAVA Code
package sf.jsoup.microcreativity;
import java.io.File;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Whitelist;
import org.jsoup.Connection;
public class ParseHtml {
public static void main(String[] args) {
// 案例1(简单获取body):
/*String html = "<html><head><title>First parse</title></head>"
+ "<body><p>Parsed HTML into a doc.</p></body></html>";
Document doc = Jsoup.parse(html);
Jsoup.clean(html, new Whitelist());
System.out.println(doc.body());*/
// 案例2(获取WEB URLbody):
/*String url = "https://www.hao123.com/";
Connection connection = Jsoup.connect(url);
connection.data("query", "java");
connection.userAgent("Mozilla");
connection.cookie("auth", "token");
connection.timeout(3000);
try {
Document doc = connection.post();
System.out.println(doc.body());
} catch (IOException e) {
e.printStackTrace();
}*/
// 案例3(获取本地URL body):
File input = new File("d:/user/xxxxxx/桌面/in.html");
try {
Document doc = Jsoup.parse(input, "UTF-8");
System.out.println(doc.body());
} catch (IOException e) {
e.printStackTrace();
}
}
}
3.Jsoup中文文档, http://download.csdn.net/detail/icoudsoft_saas/9481597