文章目录
Jsoup:工具类
可以解析HTML或者xml文档,返回Document
parse:解析html或xml文档,返回Document
三种解析方式
parse(File in, String charsetName) :
解析xml或者html文件的。(JsoupDemo1.)
parse(String html):
解析字符串的(JsoupDemo2.)
parse(URL url, int timeoutMillis):
通过网络路径来解析html或xml文档对象(JsoupDemo3.)
student.xml
<?xml version="1.0" encoding="UTF-8" ?>
<students>
<student number="heima_0001">
<name id="itcast">
<xing>张</xing>
<ming>三</ming>
</name>
<age>100</age>
<sex>female</sex>
</student>
<student number="heima_0002">
<name>lisi</name>
<age id="sili">45</age>
<sex>female</sex>
</student>
</students>
1、Jsoup快速入门(解析XML文件)
/**
* Jsoup快速入门(解析XML文件)
*/
public class JsoupDemo1_StudentXml {
public static void main(String[] args) throws IOException {
//2、获取Document对象,根据xml文档来获取
//2.1获取student.xml的path
String path = JsoupDemo1_StudentXml.class.getClassLoader().
getResource("student.xml").getPath();
//2.2解析xml文档,加载文档进内存,获取dom树
Document document = Jsoup.parse(new File(path), "utf-8");
//3.获取元素对象Element
Elements elements = document.getElementsByTag("name");
System.out.println(elements.size());
for(Element el:elements){
System.out.println(el.text());
}
// //3.1获取第一个Element对象的name
// Element element = elements.get(0);
// //3.2获取数据
// String name = element.text();
// System.out.println(name);
}
}
2、 parse(String html):解析字符串的
/**
* parse(String html):解析字符串的
*/
public class JsoupDemo2_ParseString {
public static void main(String[] args) throws IOException {
//parse(String html):解析字符串的
String str="<!--?xml version=\"1.0\" encoding=\"UTF-8\" ?-->\n" +
"<html><head></head><body>\n" +
" <students> \n" +
" <student number=\"heima_0001\"> \n" +
" <name>zhangsan</name><age>100</age><sex>female</sex> \n" +
" </student> \n" +
" <student number=\"heima_0002\"> \n" +
" <name>lisi</name><age>45</age><sex>female</sex> \n" +
" </student> \n" +
" </students>\n" +
" </body></html>";
Document document = Jsoup.parse(str);
System.out.println(document);
}
}
3、解析URL parse(URL url, int timeoutMillis)
通过网络路径来解析html或xml文档对象
/**
* 3、解析URL parse(URL url, int timeoutMillis)
*/
public class JsoupDemo3_Jsoup_URL {
public static void main(String[] args){
try {
//代表网络中的一个资源路径
URL url = new URL("https://blog.csdn.net/weixin_41571334/article/details/84631904");
//parse(URL url, int timeoutMillis):通过网络路径来解析html或xml文档对象
Document document = Jsoup.parse(url,1000000);
System.out.println(document);
} catch (IOException e) {
System.out.println(e);
}
}
}