Java 解析 XML
四种解析方式:
- DOM
- SAX
- JDOM
- DOM4J
DOM、SAX 是解析 XML 的基础方式, JDOM、DOM4J 是基于底层 API 的高级封装。DOM 是通用的,具有跨语言、跨平台性,而 JDOM 和 DOM4J 则是面向 Java 语言的。
DOM
DOM 在解析 XML 文档时,会把文档中的所有元素,按照其出现的层次关系,解析成一个个 Node 对象(节点)。
- DomUtils
import org.w3c.dom.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
/**
* Dom解析XML
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class DomUtils {
public static void parse(String path) {
Document document;
try {
document = getBuilder().parse(path);
NodeList bookList = document.getElementsByTagName("book");
for (int i = 0; i < bookList.getLength(); i++) {
System.out.println("=====开始解析第" + (i + 1) + "本书=====");
Node book = bookList.item(i);
NamedNodeMap attrs = book.getAttributes();
for (int j = 0; j < attrs.getLength(); j++) {
Node attr = attrs.item(j);
System.out.println("属性: " + attr.getNodeName() + "--" + attr.getNodeValue());
}
NodeList childNodes = book.getChildNodes();
for (int j = 0; j < childNodes.getLength(); j++) {
if (childNodes.item(j).getNodeType() == Node.ELEMENT_NODE) {
System.out.println("节点: " + childNodes.item(j).getNodeName() + "--" + childNodes.item(j).getTextContent());
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void create(String path) {
Document document = getBuilder().newDocument();
document.setXmlStandalone(true);
Element bookStore = document.createElement("bookstore");
Element book = document.createElement("book");
book.setAttribute("id", "1");
Element name = document.createElement("name");
name.setTextContent("<![CDATA[ DOM创建XML ]]>");
book.appendChild(name);
bookStore.appendChild(book);
document.appendChild(bookStore);
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer tf;
try {
tf = transformerFactory.newTransformer();
tf.setOutputProperty(OutputKeys.INDENT, "yes");
tf.transform(new DOMSource(document), new StreamResult(new File(path)));
} catch (Exception e) {
e.printStackTrace();
}
}
private static DocumentBuilder getBuilder() {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
return builder;
}
}
SAX
处理方式类似流媒体,分析能够立即开始,而不用等待所有数据加载完毕。一般来说,SAX 比 DOM 快许多。
- SaxParserHandler
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
* SAX 解析处理器
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class SaxParserHandler extends DefaultHandler {
private static final String BOOK_STORE = "bookstore";
private static final String BOOK = "book";
/**
* 标识解析开始
* @throws SAXException
*/
@Override
public void startDocument() throws SAXException {
super.startDocument();
System.out.println("SAX解析开始");
}
/**
* 标识解析结束
* @throws SAXException
*/
@Override
public void endDocument() throws SAXException {
super.endDocument();
System.out.println("SAX解析结束");
}
/**
* 遍历xml文件的开始标签
* @param uri
* @param localName
* @param qName
* @param attributes
* @throws SAXException
*/
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes);
if (BOOK.equals(qName)) {
for (int i = 0; i < attributes.getLength(); i++) {
System.out.println("属性: " + attributes.getQName(i) + "--" + attributes.getValue(i));
}
} else if (!BOOK_STORE.equals(qName)) {
System.out.print("节点: " + qName);
}
}
/**
* 遍历xml文件的结束标签
* @param uri
* @param localName
* @param qName
* @throws SAXException
*/
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
super.endElement(uri, localName, qName);
if (BOOK.equals(qName)) {
System.out.println("==========");
}
}
/**
* @param ch
* @param start
* @param length
* @throws SAXException
*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
super.characters(ch, start, length);
String value = new String(ch, start, length);
if (!"".equals(value.trim())) {
System.out.println("--" + value);
}
}
}
- SaxUtils
import org.xml.sax.helpers.AttributesImpl;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileOutputStream;
/**
* Sax解析XML
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class SaxUtils {
public static void parse(String path) {
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
SAXParser parser = factory.newSAXParser();
SaxParserHandler handler = new SaxParserHandler();
parser.parse(path, handler);
} catch (Exception e) {
e.printStackTrace();
}
}
public static void create(String path) {
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
try {
TransformerHandler handler = factory.newTransformerHandler();
Transformer tf = handler.getTransformer();
tf.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
tf.setOutputProperty(OutputKeys.INDENT, "yes");
Result result = new StreamResult(new FileOutputStream(new File(path)));
handler.setResult(result);
handler.startDocument();
AttributesImpl attr = new AttributesImpl();
handler.startElement("", "", "bookstore", attr);
attr.clear();
attr.addAttribute("", "", "id", "", "1");
handler.startElement("", "", "book", attr);
attr.clear();
handler.startElement("", "", "name", attr);
handler.startCDATA();
handler.characters("SAX创建XML".toCharArray(), 0, "SAX创建XML".length());
handler.endCDATA();
handler.endElement("", "", "name");
handler.endElement("", "", "book");
handler.endElement("", "", "bookstore");
handler.endDocument();
} catch (Exception e) {
e.printStackTrace();
}
}
}
JDOM
JDOM 的目的是成为 Java 特定文档模型,它简化与 XML 的交互并且比使用 DOM 实现更快。JDOM 仅使用具体类而不使用接口。这在某些方面简化了 API,但是也限制了其灵活性。此外,API 大量使用了 Collections 类,简化开发者的使用。
导入依赖:
dependencies {
implementation 'org.jdom:jdom2:2.0.6'
}
- JdomUtils
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
/**
* JDOM解析XML
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class JdomUtils {
public static void parse(String path) {
SAXBuilder builder = new SAXBuilder();
InputStream is;
try {
is = new FileInputStream(path);
// 转码处理
InputStreamReader isr = new InputStreamReader(is, StandardCharsets.UTF_8);
Document document = builder.build(isr);
Element rootElement = document.getRootElement();
List<Element> bookList = rootElement.getChildren();
for (Element book : bookList) {
System.out.println("=====开始解析第" + (bookList.indexOf(book) + 1) + "本书=====");
List<Attribute> attrs = book.getAttributes();
for (Attribute attr : attrs) {
System.out.println("属性: " + attr.getName() + "--" + attr.getValue());
}
List<Element> bookChildren = book.getChildren();
for (Element bookChild : bookChildren) {
System.out.println("节点: " + bookChild.getName() + "--" + bookChild.getValue());
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void create(String path) {
Element bookStore = new Element("bookstore");
bookStore.setAttribute("id", "1");
Document document = new Document(bookStore);
Element book = new Element("book");
bookStore.addContent(book);
Element name = new Element("name");
book.addContent(name);
name.setText("<![CDATA[ JDOM创建XML ]]>");
Format format = Format.getCompactFormat();
format.setIndent("");
format.setEncoding("UTF-8");
XMLOutputter outputter = new XMLOutputter(format);
try {
outputter.output(document, new FileOutputStream(new File(path)));
} catch (Exception e) {
e.printStackTrace();
}
}
}
DOM4J (推荐)
DOM4J 是 JDOM 的一种智能分支。它合并了许多超出基本 XML 文档表示的功能,包括集成的 XPath 支持、XML Schema 支持以及用于大文档或流化文档的基于事件的处理。它还提供了构建文档表示的选项,它通过 DOM4J API 和标准 DOM 接口具有并行访问功能。DOM4J 使用接口和抽象基本类方法,比 JDOM 更灵活。
导入依赖:
dependencies {
implementation 'org.dom4j:dom4j:2.1.1'
}
- Dom4jUtils
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.util.Iterator;
import java.util.List;
/**
* DOM4J解析XML
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class Dom4jUtils {
public static void parse(String path) {
SAXReader reader = new SAXReader();
try {
Document document = reader.read(new File(path));
Element bookStore = document.getRootElement();
Iterator<Element> iterator = bookStore.elementIterator();
while (iterator.hasNext()) {
System.out.println("=====开始遍历一本书=====");
Element book = iterator.next();
List<Attribute> attrs = book.attributes();
for (Attribute attr : attrs) {
System.out.println("属性: " + attr.getName() + "--" + attr.getValue());
}
Iterator<Element> it = book.elementIterator();
while (it.hasNext()) {
Element bookChild = it.next();
System.out.println("节点: " + bookChild.getName() + "--" + bookChild.getStringValue());
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void create(String path) {
Document document = DocumentHelper.createDocument();
Element bookStore = document.addElement("bookstore");
bookStore.addAttribute("id", "1");
Element book = bookStore.addElement("book");
Element name = book.addElement("name");
name.setText("<![CDATA[ DOM4J创建XML ]]>");
OutputFormat format = OutputFormat.createPrettyPrint();
format.setEncoding("UTF-8");
try {
XMLWriter writer = new XMLWriter(new FileOutputStream(new File(path)), format);
writer.write(document);
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
综合比较
Property | DOM | SAX | JDOM | DOM4J |
---|---|---|---|---|
概念 | 基于DOM树 | 基于事件驱动的解析方式 | 基于底层API的高级封装,仅使用具体类而不使用接口 | JDOM的智能分支,使用接口和抽象 |
优点 | 形成树结构,直观好理解; 解析过程中树结构保留在内存中,方便修改 | 内存耗费比较小 | API 大量使用了 Collections 类,简化开发 | 性能优异、灵活性好、功能强大; 开源 |
缺点 | 内存消耗大,容易造成内存溢出 | 不易编码; 很难同时访问同一xml中的多处不同数据 | 不够灵活 |