XML 大文件解析
- 解析出
XMl
文件中节点的数据
样例 xml
<?xml version="1.0" encoding="utf-8" ?>
<DOCUMNET>
<CLASS>
<CLASS_CNO>C1001</CLASS_CNO>
<CLASS_NAME>初一一班</CLASS_NAME>
<STUDENT>
<STUDENT_SNO>S1001</STUDENT_SNO>
<NAME>张三</NAME>
<GENDER>男</GENDER>
<SUBJECT>
<NAME>数学</NAME>
<SUBJECT_NO>S10001</SUBJECT_NO>
<SCORE>79</SCORE>
</SUBJECT>
<SUBJECT>
<NAME>英语</NAME>
<SUBJECT_NO>S10002</SUBJECT_NO>
<SCORE>100</SCORE>
</SUBJECT>
</STUDENT>
<STUDENT>
<STUDENT_SNO>S1002</STUDENT_SNO>
<NAME>李四</NAME>
<GENDER>男</GENDER>
<SUBJECT>
<NAME>数学</NAME>
<SUBJECT_NO>S10001</SUBJECT_NO>
<SCORE>100</SCORE>
</SUBJECT>
<SUBJECT>
<NAME>英语</NAME>
<SUBJECT_NO>S10002</SUBJECT_NO>
<SCORE>100</SCORE>
</SUBJECT>
</STUDENT>
</CLASS>
</DOCUMNET>
Dom4j 和 Sax 区别
Dom4j
无法做到按节点逐次加载,一次全部加载进内存。Sax
按行将xml
加载进内存中,在内存有限的情况下可以更加高效的解析Xml
Dom 4j
依赖
<!-- https://mvnrepository.com/artifact/org.dom4j/dom4j -->
<dependency>
<groupId>org.dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>2.1.3</version>
</dependency>
实例代码
public class ParseXmlByDom4jHandlerTest {
private static final Logger logger = LoggerFactory.getLogger(ParseXmlByDom4jHandlerTest.class);
public InputStream getInputStream() throws FileNotFoundException {
File file = new File("K:\\JavaTest\\src\\main\\java\\parsexml\\ClassDemo.xml");
return new FileInputStream(file);
}
/**
* Dom 4j 方式解析
* 这种方式还是会全部加载到内存中,还是很肯爹的
*/
@Test
public void parseXmlByDom4jHandler() {
//1.创建Reader对象
SAXReader reader = new SAXReader();
XmlClass xmlClass = new XmlClass();
reader.addHandler("/DOCUMNET/CLASS", new XmlClassHandler(xmlClass));
reader.addHandler("/DOCUMNET/CLASS/STUDENT", new XmlStudentHandler(xmlClass));
InputStream inputStream = null;
try {
inputStream = getInputStream();
reader.read(inputStream);
} catch (DocumentException | FileNotFoundException e) {
logger.error("Error Occur:{}", e.getMessage(), e);
} finally {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (IOException e) {
logger.error("Error Occur:{}", e.getMessage());
}
}
logger.info("test");
}
/**
* 监听器
*/
private class XmlClassHandler implements ElementHandler {
private XmlClass xmlClass;
public XmlClassHandler(XmlClass xmlClass) {
this.xmlClass = xmlClass;
}
@Override
public void onStart(ElementPath elementPath) {
// 释放节点
elementPath.getCurrent().detach();
}
@Override
public void onEnd(ElementPath elementPath) {
Element element = elementPath.getCurrent();
Iterator<Element> childElementIterator = element.elementIterator();
while (childElementIterator.hasNext()) {
Element childElement = childElementIterator.next();
String name = childElement.getName();
String stringValue = childElement.getStringValue();
if (XmlNodeConstant.CLASS_NAME.equalsIgnoreCase(name)) {
xmlClass.setClassName(stringValue);
}
if (XmlNodeConstant.CLASS_CNO.equalsIgnoreCase(name)) {
xmlClass.setClassCno(stringValue);
}
}
element.detach();
}
}
/**
* 监听器
*/
private class XmlStudentHandler implements ElementHandler {
private XmlClass xmlClass;
public XmlStudentHandler(XmlClass xmlClass) {
this.xmlClass = xmlClass;
}
@Override
public void onStart(ElementPath elementPath) {
elementPath.getCurrent().detach();
}
@Override
public void onEnd(ElementPath elementPath) {
Element element = elementPath.getCurrent();
XmlStudent xmlStudent = new XmlStudent();
packageXmlStudent(element.content(), xmlStudent);
xmlClass.getXmlStudents().add(xmlStudent);
element.detach();
}
}
/**
* 组装学生信息
*
* @param content
* @param xmlStudent
*/
private void packageXmlStudent(List<Node> content, XmlStudent xmlStudent) {
for (Node studentNode : content) {
String tableNodeName = studentNode.getName();
String tableNodeStringValue = studentNode.getStringValue();
if (XmlNodeConstant.STUDENT_SNO.equalsIgnoreCase(tableNodeName)) {
xmlStudent.setSno(tableNodeStringValue);
}
if (XmlNodeConstant.NAME.equalsIgnoreCase(tableNodeName)) {
xmlStudent.setName(tableNodeStringValue);
}
if (XmlNodeConstant.GENDER.equalsIgnoreCase(tableNodeName)) {
xmlStudent.setGender(tableNodeStringValue);
}
if (XmlNodeConstant.SUBJECT.equalsIgnoreCase(tableNodeName)) {
XmlSubject xmlSubject = new XmlSubject();
packageXmlSubject(studentNode, xmlSubject);
xmlStudent.getXmlSubjects().add(xmlSubject);
}
}
}
/**
* 组装科目信息
*
* @param studentNode
* @param xmlSubject
*/
private void packageXmlSubject(Node studentNode, XmlSubject xmlSubject) {
List<Node> subjectNodes = ((Branch) studentNode).content();
for (Node subjectNode : subjectNodes) {
String columnNodeName = subjectNode.getName();
String columnNodeString = subjectNode.getStringValue();
if (XmlNodeConstant.NAME.equalsIgnoreCase(columnNodeName)) {
xmlSubject.setSubjectName(columnNodeString);
}
if (XmlNodeConstant.SUBJECT_NO.equalsIgnoreCase(columnNodeName)) {
xmlSubject.setSubjectNo(columnNodeString);
}
if (XmlNodeConstant.SCORE.equalsIgnoreCase(columnNodeName)) {
xmlSubject.setScore(Float.parseFloat(columnNodeString));
}
}
}
}
Sax
依赖
- 使用
java
原生的API
,和Dom4j
中的SaxReader
是有区别的
实例代码
public class ParseXmlBySaxTest {
private static final Logger logger = LoggerFactory.getLogger(ParseXmlBySaxTest.class);
public InputStream getInputStream() throws FileNotFoundException {
File file = new File("K:\\JavaTest\\src\\main\\java\\parsexml\\ClassDemo.xml");
return new FileInputStream(file);
}
/**
* 使用 Sax 解析逐行处理
*/
@Test
public void parseXmlBySax() {
InputStream inputStream = null;
try {
inputStream = getInputStream();
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
SAXParser saxParser = saxParserFactory.newSAXParser();
XmlClass xmlClass = new XmlClass();
saxParser.parse(inputStream, new XmlClassHandler(xmlClass));
logger.info("test");
} catch (Exception e) {
logger.error("Error Occur:{}", e.getMessage(), e);
} finally {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 监听事件 按行处理
*/
private static class XmlClassHandler extends DefaultHandler {
/**
* XmlClass
*/
private final XmlClass xmlClass;
/**
* XmlStudent
*/
private XmlStudent xmlStudent;
/**
* XmlSubject
*/
private XmlSubject xmlSubject;
/**
* 是否 学生
*/
boolean studentFlag;
/**
* 是否 科目
*/
boolean subjectFlag;
/**
* 节点名称
*/
private String nodeName;
/**
* 计数器
*/
public XmlClassHandler(XmlClass xmlClass) {
this.xmlClass = xmlClass;
}
@Override
public void startDocument() {
logger.info("开始处理 Xml ...");
}
@Override
public void endDocument() {
logger.info("结束处理 Xml ...");
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) {
if (XmlNodeConstant.STUDENT.equals(qName)) {
xmlStudent = new XmlStudent();
studentFlag = true;
}
if (XmlNodeConstant.SUBJECT.equals(qName)) {
xmlSubject = new XmlSubject();
subjectFlag = true;
}
nodeName = qName;
}
@Override
public void endElement(String uri, String localName, String qName) {
if (XmlNodeConstant.STUDENT.equals(qName)) {
xmlClass.getXmlStudents().add(this.xmlStudent);
studentFlag = false;
}
if (XmlNodeConstant.SUBJECT.equals(qName)) {
xmlStudent.getXmlSubjects().add(xmlSubject);
subjectFlag = false;
}
}
@Override
public void characters(char[] chars, int start, int length) {
String nodeValue = new String(chars, start, length);
if (StringUtils.isEmpty(nodeValue.trim())) {
return;
}
if (studentFlag && !subjectFlag) {
switch (nodeName) {
case "NAME":
xmlStudent.setName(nodeValue);
break;
case "GENDER":
xmlStudent.setGender(nodeValue);
break;
case "STUDENT_SNO":
xmlStudent.setSno(nodeValue);
break;
}
}
if (studentFlag && subjectFlag) {
switch (nodeName) {
case "NAME":
xmlSubject.setSubjectName(nodeValue);
break;
case "SUBJECT_NO":
xmlSubject.setSubjectNo(nodeValue);
break;
case "SCORE":
xmlSubject.setScore(Float.parseFloat(nodeValue));
break;
}
}
if (XmlNodeConstant.CLASS_NAME.equals(nodeName)) {
xmlClass.setClassName(nodeValue);
} else if (XmlNodeConstant.CLASS_CNO.equals(nodeName)) {
xmlClass.setClassCno(nodeValue);
}
}
@Override
public void error(SAXParseException e) {
logger.error("Error Occur:{}", e.getMessage());
}
}
}
完整代码见
https://gitee.com/Marlon_Brando/JavaTest/tree/master/src/main/java/parsexml