# XML 大文件解析

XML 大文件解析

  • 解析出XMl文件中节点的数据
样例 xml
<?xml version="1.0" encoding="utf-8" ?>
<DOCUMNET>
    <CLASS>
        <CLASS_CNO>C1001</CLASS_CNO>
        <CLASS_NAME>初一一班</CLASS_NAME>
        <STUDENT>
            <STUDENT_SNO>S1001</STUDENT_SNO>
            <NAME>张三</NAME>
            <GENDER></GENDER>
            <SUBJECT>
                <NAME>数学</NAME>
                <SUBJECT_NO>S10001</SUBJECT_NO>
                <SCORE>79</SCORE>
            </SUBJECT>
            <SUBJECT>
                <NAME>英语</NAME>
                <SUBJECT_NO>S10002</SUBJECT_NO>
                <SCORE>100</SCORE>
            </SUBJECT>
        </STUDENT>
        <STUDENT>
            <STUDENT_SNO>S1002</STUDENT_SNO>
            <NAME>李四</NAME>
            <GENDER></GENDER>
            <SUBJECT>
                <NAME>数学</NAME>
                <SUBJECT_NO>S10001</SUBJECT_NO>
                <SCORE>100</SCORE>
            </SUBJECT>
            <SUBJECT>
                <NAME>英语</NAME>
                <SUBJECT_NO>S10002</SUBJECT_NO>
                <SCORE>100</SCORE>
            </SUBJECT>
        </STUDENT>
    </CLASS>
</DOCUMNET>
Dom4j 和 Sax 区别
  • Dom4j无法做到按节点逐次加载,一次全部加载进内存。
  • Sax按行将xml加载进内存中,在内存有限的情况下可以更加高效的解析Xml

Dom 4j

依赖
<!-- https://mvnrepository.com/artifact/org.dom4j/dom4j -->
<dependency>
    <groupId>org.dom4j</groupId>
    <artifactId>dom4j</artifactId>
    <version>2.1.3</version>
</dependency>
实例代码
public class ParseXmlByDom4jHandlerTest {

    private static final Logger logger = LoggerFactory.getLogger(ParseXmlByDom4jHandlerTest.class);


    public InputStream getInputStream() throws FileNotFoundException {
        File file = new File("K:\\JavaTest\\src\\main\\java\\parsexml\\ClassDemo.xml");
        return new FileInputStream(file);
    }

    /**
     * Dom 4j 方式解析
     * 这种方式还是会全部加载到内存中,还是很肯爹的
     */
    @Test
    public void parseXmlByDom4jHandler() {
        //1.创建Reader对象
        SAXReader reader = new SAXReader();
        XmlClass xmlClass = new XmlClass();
        reader.addHandler("/DOCUMNET/CLASS", new XmlClassHandler(xmlClass));
        reader.addHandler("/DOCUMNET/CLASS/STUDENT", new XmlStudentHandler(xmlClass));
        InputStream inputStream = null;
        try {
            inputStream = getInputStream();
            reader.read(inputStream);
        } catch (DocumentException | FileNotFoundException e) {
            logger.error("Error Occur:{}", e.getMessage(), e);
        } finally {
            try {
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (IOException e) {
                logger.error("Error Occur:{}", e.getMessage());
            }
        }
        logger.info("test");
    }

    /**
     * 监听器
     */
    private class XmlClassHandler implements ElementHandler {
        private XmlClass xmlClass;

        public XmlClassHandler(XmlClass xmlClass) {
            this.xmlClass = xmlClass;
        }

        @Override
        public void onStart(ElementPath elementPath) {
            // 释放节点
            elementPath.getCurrent().detach();
        }

        @Override
        public void onEnd(ElementPath elementPath) {
            Element element = elementPath.getCurrent();
            Iterator<Element> childElementIterator = element.elementIterator();
            while (childElementIterator.hasNext()) {
                Element childElement = childElementIterator.next();
                String name = childElement.getName();
                String stringValue = childElement.getStringValue();
                if (XmlNodeConstant.CLASS_NAME.equalsIgnoreCase(name)) {
                    xmlClass.setClassName(stringValue);
                }
                if (XmlNodeConstant.CLASS_CNO.equalsIgnoreCase(name)) {
                    xmlClass.setClassCno(stringValue);
                }
            }
            element.detach();
        }
    }

    /**
     * 监听器
     */
    private class XmlStudentHandler implements ElementHandler {

        private XmlClass xmlClass;

        public XmlStudentHandler(XmlClass xmlClass) {
            this.xmlClass = xmlClass;
        }

        @Override
        public void onStart(ElementPath elementPath) {
            elementPath.getCurrent().detach();
        }

        @Override
        public void onEnd(ElementPath elementPath) {
            Element element = elementPath.getCurrent();
            XmlStudent xmlStudent = new XmlStudent();
            packageXmlStudent(element.content(), xmlStudent);
            xmlClass.getXmlStudents().add(xmlStudent);
            element.detach();
        }
    }


    /**
     * 组装学生信息
     *
     * @param content
     * @param xmlStudent
     */
    private void packageXmlStudent(List<Node> content, XmlStudent xmlStudent) {
        for (Node studentNode : content) {
            String tableNodeName = studentNode.getName();
            String tableNodeStringValue = studentNode.getStringValue();
            if (XmlNodeConstant.STUDENT_SNO.equalsIgnoreCase(tableNodeName)) {
                xmlStudent.setSno(tableNodeStringValue);
            }
            if (XmlNodeConstant.NAME.equalsIgnoreCase(tableNodeName)) {
                xmlStudent.setName(tableNodeStringValue);
            }
            if (XmlNodeConstant.GENDER.equalsIgnoreCase(tableNodeName)) {
                xmlStudent.setGender(tableNodeStringValue);
            }
            if (XmlNodeConstant.SUBJECT.equalsIgnoreCase(tableNodeName)) {
                XmlSubject xmlSubject = new XmlSubject();
                packageXmlSubject(studentNode, xmlSubject);
                xmlStudent.getXmlSubjects().add(xmlSubject);
            }

        }
    }

    /**
     * 组装科目信息
     *
     * @param studentNode
     * @param xmlSubject
     */
    private void packageXmlSubject(Node studentNode, XmlSubject xmlSubject) {
        List<Node> subjectNodes = ((Branch) studentNode).content();
        for (Node subjectNode : subjectNodes) {
            String columnNodeName = subjectNode.getName();
            String columnNodeString = subjectNode.getStringValue();
            if (XmlNodeConstant.NAME.equalsIgnoreCase(columnNodeName)) {
                xmlSubject.setSubjectName(columnNodeString);
            }
            if (XmlNodeConstant.SUBJECT_NO.equalsIgnoreCase(columnNodeName)) {
                xmlSubject.setSubjectNo(columnNodeString);
            }
            if (XmlNodeConstant.SCORE.equalsIgnoreCase(columnNodeName)) {
                xmlSubject.setScore(Float.parseFloat(columnNodeString));
            }
        }
    }


}


Sax

依赖

  • 使用java原生的API,和Dom4j中的SaxReader是有区别的
实例代码
public class ParseXmlBySaxTest {

    private static final Logger logger = LoggerFactory.getLogger(ParseXmlBySaxTest.class);


    public InputStream getInputStream() throws FileNotFoundException {
        File file = new File("K:\\JavaTest\\src\\main\\java\\parsexml\\ClassDemo.xml");
        return new FileInputStream(file);
    }

    /**
     * 使用 Sax 解析逐行处理
     */
    @Test
    public void parseXmlBySax() {
        InputStream inputStream = null;
        try {
            inputStream = getInputStream();
            SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
            SAXParser saxParser = saxParserFactory.newSAXParser();
            XmlClass xmlClass = new XmlClass();
            saxParser.parse(inputStream, new XmlClassHandler(xmlClass));
            logger.info("test");
        } catch (Exception e) {
            logger.error("Error Occur:{}", e.getMessage(), e);
        } finally {
            try {
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /**
     * 监听事件 按行处理
     */
    private static class XmlClassHandler extends DefaultHandler {

        /**
         * XmlClass
         */

        private final XmlClass xmlClass;

        /**
         * XmlStudent
         */
        private XmlStudent xmlStudent;

        /**
         * XmlSubject
         */
        private XmlSubject xmlSubject;

        /**
         * 是否 学生
         */
        boolean studentFlag;

        /**
         * 是否 科目
         */
        boolean subjectFlag;

        /**
         * 节点名称
         */
        private String nodeName;

        /**
         * 计数器
         */

        public XmlClassHandler(XmlClass xmlClass) {
            this.xmlClass = xmlClass;
        }

        @Override
        public void startDocument() {
            logger.info("开始处理 Xml ...");
        }

        @Override
        public void endDocument() {
            logger.info("结束处理 Xml ...");
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) {
            if (XmlNodeConstant.STUDENT.equals(qName)) {
                xmlStudent = new XmlStudent();
                studentFlag = true;
            }
            if (XmlNodeConstant.SUBJECT.equals(qName)) {
                xmlSubject = new XmlSubject();
                subjectFlag = true;
            }
            nodeName = qName;
        }

        @Override
        public void endElement(String uri, String localName, String qName) {
            if (XmlNodeConstant.STUDENT.equals(qName)) {
                xmlClass.getXmlStudents().add(this.xmlStudent);
                studentFlag = false;
            }
            if (XmlNodeConstant.SUBJECT.equals(qName)) {
                xmlStudent.getXmlSubjects().add(xmlSubject);
                subjectFlag = false;
            }
        }

        @Override
        public void characters(char[] chars, int start, int length) {
            String nodeValue = new String(chars, start, length);
            if (StringUtils.isEmpty(nodeValue.trim())) {
                return;
            }
            if (studentFlag && !subjectFlag) {
                switch (nodeName) {
                    case "NAME":
                        xmlStudent.setName(nodeValue);
                        break;
                    case "GENDER":
                        xmlStudent.setGender(nodeValue);
                        break;
                    case "STUDENT_SNO":
                        xmlStudent.setSno(nodeValue);
                        break;
                }
            }
            if (studentFlag && subjectFlag) {
                switch (nodeName) {
                    case "NAME":
                        xmlSubject.setSubjectName(nodeValue);
                        break;
                    case "SUBJECT_NO":
                        xmlSubject.setSubjectNo(nodeValue);
                        break;
                    case "SCORE":
                        xmlSubject.setScore(Float.parseFloat(nodeValue));
                        break;
                }
            }
            if (XmlNodeConstant.CLASS_NAME.equals(nodeName)) {
                xmlClass.setClassName(nodeValue);
            } else if (XmlNodeConstant.CLASS_CNO.equals(nodeName)) {
                xmlClass.setClassCno(nodeValue);
            }
        }

        @Override
        public void error(SAXParseException e) {
            logger.error("Error Occur:{}", e.getMessage());
        }

    }
}
完整代码见

https://gitee.com/Marlon_Brando/JavaTest/tree/master/src/main/java/parsexml

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

全栈程序员

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值