Jsoup的Maven坐标
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.2</version>
</dependency>
Jsoup解析HTML得到Document的几种方式:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
/**
* 解析HTML的DOM数据
*/
public class JsoupDom {
public static void main(String[] args) throws IOException {
String html = "<!DOCTYPE html>\n" +
"<html lang=\"en\">\n" +
"<head>\n" +
" <meta charset=\"UTF-8\">\n" +
" <title>Title</title>\n" +
"</head>\n" +
"<body>\n" +
"\n" +
"</body>\n" +
"</html>";
//方式一:获取Document对象
Document document = Jsoup.parse(html);
System.out.println(document.title());
//方式二:获取Document对象
Document document1 = Jsoup.connect("http://www.bingosoft.net").get();
Elements elements = document1.select(".city h3");
System.out.println(elements+",,,"+elements.text());
//方式三:获取Document对象
// Document document2 = Jsoup.parse(new File("html_path"), "UTF-8");
//方式四:获取Document对象
String bodyHtml = "<a href='#'>连接</a>";
Document document3 = Jsoup.parseBodyFragment(bodyHtml);
System.out.println(document3.text());
}
}