利用 JDK 自带的 Document + XPath 解析 XML,记录一下
准备工作
inventory.dtd 和 inventory.xml 文件
dtd 中的 PCDATA 的意思是被解析的字符数据(parsed character data)。可把字符数据想象为 XML 元素的开始标签与结束标签之间的文本。PCDATA 是会被解析器解析的文本。
CDATA 的意思是字符数据(character data)。CDATA 是不会被解析器解析的文本。
- inventory.dtd
<?xml version="1.0" encoding="UTF-8"?>
<!ELEMENT inventory (book+)>
<!ELEMENT book (title,author,dynasty,price)>
<!ATTLIST book year CDATA #REQUIRED>
<!ELEMENT title (#PCDATA)>
<!ELEMENT author (#PCDATA)>
<!ELEMENT dynasty (#PCDATA)>
<!ELEMENT price (#PCDATA)>
- inventory.xml
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE inventory SYSTEM "D:\\work-space\\springboot\\src\\main\\resources\\inventory.dtd">
<inventory>
<book year="2012">
<title>菜根谭</title>
<author>洪应明</author>
<dynasty>明朝</dynasty>
<price>38</price>
</book>
<book year="2013">
<title>曾国藩家书</title>
<author>曾国藩</author>
<dynasty>清朝</dynasty>
<price>70</price>
</book>
<book year="2014">
<title>高等代数</title>
<author>丘维声</author>
<dynasty>中华人民共和国</dynasty>
<price>86</price>
</book>
</inventory>
- 工具类
借鉴了博客【用DOM解析XML ,用xpath快速查询XML节点】的内容
package com.me.util;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import java.io.File;
/**
* @Author yanyg
* @Date 2020/6/10 16:37
* @Descripetion admin
*/
public class ParseXmlUtil {
//查询价格大于80的书的标题的内容
private static final String XPath_EXPRESSION = "//book[price>80]/title/text()";
public static void main(String[] args) {
try {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
//开启验证:
documentBuilderFactory.setValidating(true);
documentBuilderFactory.setNamespaceAware(false);
documentBuilderFactory.setIgnoringComments(true);
documentBuilderFactory.setIgnoringElementContentWhitespace(true);
documentBuilderFactory.setCoalescing(false);
documentBuilderFactory.setExpandEntityReferences(true);
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
//设置异常处理:
documentBuilder.setErrorHandler(new ErrorHandler() {
@Override
public void warning(SAXParseException exception) throws SAXException {
System.out.println("warn:" + exception.getMessage());
}
@Override
public void error(SAXParseException exception) throws SAXException {
System.out.println("error:" + exception.getMessage());
}
@Override
public void fatalError(SAXParseException exception) throws SAXException {
System.out.println("fatalError:" + exception.getMessage());
}
});
//将inventory.xml加载到一个Document的对象中:
String filePath = "D:\\work-space\\springboot\\src\\main\\resources\\inventory.xml";
Document document = documentBuilder.parse(new File(filePath));
//根据表达式查询内容
processParseXmlWithXpath(document, XPath_EXPRESSION);
} catch (Exception e) {
e.printStackTrace();
} finally {
}
}
private static void processParseXmlWithXpath(Document document, String xPathExpression) throws Exception {
// 表达式可以参考:https://blog.csdn.net/zlj_blog/article/details/54092534
xPathExpression = "/inventory/book/title";
//创建XPathFactory:
XPathFactory xPathFactory = XPathFactory.newInstance();
XPath xPath = xPathFactory.newXPath();
NodeList nodeList = (NodeList) xPath.evaluate(xPathExpression, document, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
Node item = nodeList.item(i);
System.out.println(item.getNodeName() + "=" + item.getTextContent());
}
}
private static void processParseXmlWithXpathExpress(Document document, String xPathExpression) throws Exception {
//创建XPathFactory:
XPathFactory xPathFactory = XPathFactory.newInstance();
XPath xPath = xPathFactory.newXPath();
XPathExpression expression = xPath.compile(xPathExpression);
Object result = expression.evaluate(document, XPathConstants.NODESET);
if (result instanceof NodeList) {
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
System.out.println(String.format("%s=%s", nodes.item(i).getNodeName(), nodes.item(i).getNodeValue()));
}
}
}
}