Java 解析 XML

Java 解析 XML

四种解析方式:

  1. DOM
  2. SAX
  3. JDOM
  4. DOM4J

DOM、SAX 是解析 XML 的基础方式, JDOM、DOM4J 是基于底层 API 的高级封装。DOM 是通用的,具有跨语言、跨平台性,而 JDOM 和 DOM4J 则是面向 Java 语言的。

DOM

DOM 在解析 XML 文档时,会把文档中的所有元素,按照其出现的层次关系,解析成一个个 Node 对象(节点)。

  • DomUtils
import org.w3c.dom.*;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;

/**
* Dom解析XML
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class DomUtils {

    public static void parse(String path) {
        Document document;
        try {
            document = getBuilder().parse(path);
            NodeList bookList = document.getElementsByTagName("book");
            for (int i = 0; i < bookList.getLength(); i++) {
                System.out.println("=====开始解析第" + (i + 1) + "本书=====");
                Node book = bookList.item(i);
                NamedNodeMap attrs = book.getAttributes();
                for (int j = 0; j < attrs.getLength(); j++) {
                    Node attr = attrs.item(j);
                    System.out.println("属性: " + attr.getNodeName() + "--" + attr.getNodeValue());
                }

                NodeList childNodes = book.getChildNodes();
                for (int j = 0; j < childNodes.getLength(); j++) {
                    if (childNodes.item(j).getNodeType() == Node.ELEMENT_NODE) {
                        System.out.println("节点: " + childNodes.item(j).getNodeName() + "--" + childNodes.item(j).getTextContent());
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void create(String path) {
        Document document = getBuilder().newDocument();
        document.setXmlStandalone(true);
        Element bookStore = document.createElement("bookstore");
        Element book = document.createElement("book");
        book.setAttribute("id", "1");
        Element name = document.createElement("name");
        name.setTextContent("<![CDATA[ DOM创建XML ]]>");
        book.appendChild(name);
        bookStore.appendChild(book);
        document.appendChild(bookStore);

        TransformerFactory transformerFactory = TransformerFactory.newInstance();
        Transformer tf;
        try {
            tf = transformerFactory.newTransformer();
            tf.setOutputProperty(OutputKeys.INDENT, "yes");
            tf.transform(new DOMSource(document), new StreamResult(new File(path)));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static DocumentBuilder getBuilder() {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = null;
        try {
            builder = factory.newDocumentBuilder();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        }
        return builder;
    }
}

SAX

处理方式类似流媒体,分析能够立即开始,而不用等待所有数据加载完毕。一般来说,SAX 比 DOM 快许多。

  • SaxParserHandler
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
* SAX 解析处理器
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class SaxParserHandler extends DefaultHandler {

    private static final String BOOK_STORE = "bookstore";
    private static final String BOOK = "book";

    /**
     * 标识解析开始
     * @throws SAXException
     */
    @Override
    public void startDocument() throws SAXException {
        super.startDocument();
        System.out.println("SAX解析开始");
    }

    /**
     * 标识解析结束
     * @throws SAXException
     */
    @Override
    public void endDocument() throws SAXException {
        super.endDocument();
        System.out.println("SAX解析结束");
    }

    /**
     * 遍历xml文件的开始标签
     * @param uri
     * @param localName
     * @param qName
     * @param attributes
     * @throws SAXException
     */
    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        super.startElement(uri, localName, qName, attributes);
        if (BOOK.equals(qName)) {
            for (int i = 0; i < attributes.getLength(); i++) {
                System.out.println("属性: " + attributes.getQName(i) + "--" + attributes.getValue(i));
            }
        } else if (!BOOK_STORE.equals(qName)) {
            System.out.print("节点: " + qName);
        }
    }

    /**
     * 遍历xml文件的结束标签
     * @param uri
     * @param localName
     * @param qName
     * @throws SAXException
     */
    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        super.endElement(uri, localName, qName);
        if (BOOK.equals(qName)) {
            System.out.println("==========");
        }
    }

    /**
     * @param ch
     * @param start
     * @param length
     * @throws SAXException
     */
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        super.characters(ch, start, length);
        String value = new String(ch, start, length);
        if (!"".equals(value.trim())) {
            System.out.println("--" + value);
        }
    }
}
  • SaxUtils
import org.xml.sax.helpers.AttributesImpl;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
import java.io.FileOutputStream;

/**
* Sax解析XML
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class SaxUtils {

    public static void parse(String path) {
        SAXParserFactory factory = SAXParserFactory.newInstance();
        try {
            SAXParser parser = factory.newSAXParser();
            SaxParserHandler handler = new SaxParserHandler();
            parser.parse(path, handler);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void create(String path) {
        SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
        try {
            TransformerHandler handler = factory.newTransformerHandler();
            Transformer tf = handler.getTransformer();
            tf.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            tf.setOutputProperty(OutputKeys.INDENT, "yes");
            Result result = new StreamResult(new FileOutputStream(new File(path)));
            handler.setResult(result);

            handler.startDocument();
            AttributesImpl attr = new AttributesImpl();
            handler.startElement("", "", "bookstore", attr);
            attr.clear();
            attr.addAttribute("", "", "id", "", "1");
            handler.startElement("", "", "book", attr);
            attr.clear();
            handler.startElement("", "", "name", attr);
            handler.startCDATA();
            handler.characters("SAX创建XML".toCharArray(), 0, "SAX创建XML".length());
            handler.endCDATA();
            handler.endElement("", "", "name");
            handler.endElement("", "", "book");
            handler.endElement("", "", "bookstore");
            handler.endDocument();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

JDOM

JDOM 的目的是成为 Java 特定文档模型,它简化与 XML 的交互并且比使用 DOM 实现更快。JDOM 仅使用具体类而不使用接口。这在某些方面简化了 API,但是也限制了其灵活性。此外,API 大量使用了 Collections 类,简化开发者的使用。

导入依赖:

dependencies {
    implementation 'org.jdom:jdom2:2.0.6'
}
  • JdomUtils
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List;

/**
* JDOM解析XML
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class JdomUtils {

    public static void parse(String path) {
        SAXBuilder builder = new SAXBuilder();
        InputStream is;
        try {
            is = new FileInputStream(path);
            // 转码处理
            InputStreamReader isr = new InputStreamReader(is, StandardCharsets.UTF_8);
            Document document = builder.build(isr);
            Element rootElement = document.getRootElement();
            List<Element> bookList = rootElement.getChildren();
            for (Element book : bookList) {
                System.out.println("=====开始解析第" + (bookList.indexOf(book) + 1) + "本书=====");
                List<Attribute> attrs = book.getAttributes();
                for (Attribute attr : attrs) {
                    System.out.println("属性: " + attr.getName() + "--" + attr.getValue());
                }

                List<Element> bookChildren = book.getChildren();
                for (Element bookChild : bookChildren) {
                    System.out.println("节点: " + bookChild.getName() + "--" + bookChild.getValue());
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void create(String path) {
        Element bookStore = new Element("bookstore");
        bookStore.setAttribute("id", "1");
        Document document = new Document(bookStore);
        Element book = new Element("book");
        bookStore.addContent(book);
        Element name = new Element("name");
        book.addContent(name);
        name.setText("<![CDATA[ JDOM创建XML ]]>");

        Format format = Format.getCompactFormat();
        format.setIndent("");
        format.setEncoding("UTF-8");
        XMLOutputter outputter = new XMLOutputter(format);
        try {
            outputter.output(document, new FileOutputStream(new File(path)));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

DOM4J (推荐)

DOM4J 是 JDOM 的一种智能分支。它合并了许多超出基本 XML 文档表示的功能,包括集成的 XPath 支持、XML Schema 支持以及用于大文档或流化文档的基于事件的处理。它还提供了构建文档表示的选项,它通过 DOM4J API 和标准 DOM 接口具有并行访问功能。DOM4J 使用接口和抽象基本类方法,比 JDOM 更灵活。

导入依赖:

dependencies {
    implementation 'org.dom4j:dom4j:2.1.1'
}
  • Dom4jUtils
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;

import java.io.File;
import java.io.FileOutputStream;
import java.util.Iterator;
import java.util.List;

/**
* DOM4J解析XML
* <p>
* @Author LeifChen
* @Date 2019-01-14
*/
public class Dom4jUtils {

    public static void parse(String path) {
        SAXReader reader = new SAXReader();
        try {
            Document document = reader.read(new File(path));
            Element bookStore = document.getRootElement();
            Iterator<Element> iterator = bookStore.elementIterator();
            while (iterator.hasNext()) {
                System.out.println("=====开始遍历一本书=====");
                Element book = iterator.next();
                List<Attribute> attrs = book.attributes();
                for (Attribute attr : attrs) {
                    System.out.println("属性: " + attr.getName() + "--" + attr.getValue());
                }

                Iterator<Element> it = book.elementIterator();
                while (it.hasNext()) {
                    Element bookChild = it.next();
                    System.out.println("节点: " + bookChild.getName() + "--" + bookChild.getStringValue());
                }
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void create(String path) {
        Document document = DocumentHelper.createDocument();
        Element bookStore = document.addElement("bookstore");
        bookStore.addAttribute("id", "1");
        Element book = bookStore.addElement("book");
        Element name = book.addElement("name");
        name.setText("<![CDATA[ DOM4J创建XML ]]>");
        OutputFormat format = OutputFormat.createPrettyPrint();
        format.setEncoding("UTF-8");
        try {
            XMLWriter writer = new XMLWriter(new FileOutputStream(new File(path)), format);
            writer.write(document);
            writer.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

综合比较

PropertyDOMSAXJDOMDOM4J
概念基于DOM树基于事件驱动的解析方式基于底层API的高级封装,仅使用具体类而不使用接口JDOM的智能分支,使用接口和抽象
优点形成树结构,直观好理解;
解析过程中树结构保留在内存中,方便修改
内存耗费比较小API 大量使用了 Collections 类,简化开发性能优异、灵活性好、功能强大;
开源
缺点内存消耗大,容易造成内存溢出不易编码;
很难同时访问同一xml中的多处不同数据
不够灵活

参考

  1. GitHub
  2. Java眼中的XML—文件读取
  3. Java眼中的XML 文件写入
  4. XML之四种解析dom,sax,jdom,dom4j原理及性能比较
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值