参考文章:http://inotgaoshou.iteye.com/blog/1012188
一、xml解析常用的四种方式:
DOM(Document Object Model)文档对象模型;
SAX(Simple APIs for XML)XML简单应用程序接口;
JDOM(java-based Document Object Model);
DOM4J(Document Object Model for Java);
================================================================================
推荐用dom4j(大文档,或xml文档较多时候) 和 dom (小文档,xml文档较少的时候)
================================================================================
参考文章:http://download.csdn.net/download/shizhending/4067649
二、四种方式的比较:
DOM:性能测试时表现不佳 .W3C的官方标准,允许应用程序对数据和结构做出更改,允许获取和操作文档的任意部分,
但需要加载整个文档,性能差,占内存多, 小文件可以,据说超10M才溢出
由于它的遍历能力,DOM解析器常用于XML文档需要频繁的改变的服务中。
SAX:性能表现较好.类似于流媒体特点,分析能够立即开始,而不是等待所有的数据被处理。
只在读取数据时检查数据,不需要保存在内存中。可以在某个条件得到满足时停止解析,
不必解析整个文档。效率和性能较高,能解析大于系统内存的文档。
很难同时访问同一个文档中的多处不同数据 ,对内存的要求通常会比较低,
适用于大型文档。
DOM4j:DOM4J性能最好,DOM4J是一个非常非常优秀的Java XML API,具有性能优异、功能强大和极端易用使用的特点,
同时它也是一个开放源代码的软件。
JDOM:性能测试时表现不佳 , JDOM的目的是成为Java特定文档模型,它简化与XML的交互并且比使用DOM实现更快。
三、使用举例
1.dom4j:
src/person.xml
<?xml version="1.0" encoding="UTF-8"?>
<persons>
<person id="psn0001" >
<name>gavin</name>
<age>18</age>
<address>
<country>中国</country>
<province>北京</province>
<city>北京</city>
</address>
<zipcode>100000</zipcode>
</person>
<person2 id="psn0002" >
<name>sophia</name>
<age>18</age>
<address>
<country>中国</country>
<province>钓鱼岛</province>
<city>钓鱼岛</city>
</address>
<zipcode>100000</zipcode>
</person2>
</persons>
package com.gavin.xmlparse.dom4j;
import java.io.File;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import java.util.Iterator;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.DOMReader;
import org.dom4j.io.SAXReader;
import org.xml.sax.InputSource;
/**
* 用dom4j读取xml信息
* @author gavin
*/
public class ParseXml {
/**
* dom4j object model representation of a xml document. Note: We use the
* interface(!) not its implementation
*/
private Document doc;
/**
* Loads a document from a file.
* @param aFile the data source
* @throw a org.dom4j.DocumentExcepiton occurs on parsing failure.
*/
public void parseWithSAX(File aFile) throws DocumentException {
SAXReader xmlReader = new SAXReader();
this.doc = xmlReader.read(aFile);
}
/**
* Loads a document from a file.
*
* @param aURL
* the data source
* @throw a org.dom4j.DocumentExcepiton occurs on parsing failure.
*/
public void parseWithSAX(URL aURL) throws DocumentException {
SAXReader xmlReader = new SAXReader();
this.doc = xmlReader.read(aURL);
}
/**
* Reads a Document from the given InputSource using SAX
* @param inputSource
* @throws DocumentException
*/
public void parseWithSAX(InputSource inputSource) throws DocumentException {
SAXReader xmlReader = new SAXReader();
this.doc = xmlReader.read(inputSource);
}
/**
* Reads a Document from the given stream using SAX
* @param in
* @throws DocumentException
*/
public void parseWithSAX(InputStream in) throws DocumentException {
SAXReader xmlReader = new SAXReader();
this.doc = xmlReader.read(in);
}
/**
* Reads a Document from the given stream using SAX
* @param in
* @param systemId
* @throws DocumentException
*/
public void parseWithSAX(InputStream in, String systemId) throws DocumentException {
SAXReader xmlReader = new SAXReader();
this.doc = xmlReader.read(in,systemId);
}
/**
* Reads a Document from the given Reader using SAX
* @param reader
* @throws DocumentException
*/
public void parseWithSAX(Reader reader) throws DocumentException {
SAXReader xmlReader = new SAXReader();
this.doc = xmlReader.read(reader);
}
/**
* Reads a Document from the given URL or filename using SAX.
* @param systemId
* @throws DocumentException
*/
public void parseWithSAX(String systemId) throws DocumentException {
SAXReader xmlReader = new SAXReader();
this.doc = xmlReader.read(systemId);
}
/**
* converts a W3C DOM document into a dom4j document
* @param doc
*/
public void parseW3CDom2Dom4j(org.w3c.dom.Document doc){
DOMReader domReader = new DOMReader();
this.doc = domReader.read(doc);
}
public Document getDoc() {
return doc;
}
/**
* 利用xpath读取属性值
* @param xpathExpression
* @return
*/
public String getAttributeValue(String xpathExpression){
Node node = doc.selectSingleNode(xpathExpression);
if(node == null){
return null;
}
return node.valueOf("@id");
}
/**
* 利用xpath读取某元素的值
* @param xpathExpression
* @return
*/
public String getElementValue(String xpathExpression){
Node node = doc.selectSingleNode(xpathExpression);
if(node == null){
return null;
}
return node.getText();
}
public static void main(String[] args) throws DocumentException {
ParseXml parser = new ParseXml();
File file = new File("D:\\workspace\\javaCoreSkill\\src\\person.xml");
if(!file.exists()){
return;
}
parser.parseWithSAX(file);
Document document = parser.getDoc();
Element root = document.getRootElement();
// iterate through child elements of root
for ( Iterator i = root.elementIterator(); i.hasNext(); ) {
Element element = (Element) i.next();
String psnId = element.attributeValue("id");
System.out.println(psnId);
// iterate through attributes of root
for ( Iterator iter = element.attributeIterator(); iter.hasNext(); ) {
Attribute attribute = (Attribute) iter.next();
System.out.println(attribute.getValue());
}
}
// iterate through child elements of root with element name "person"
for ( Iterator i = root.elementIterator( "person" ); i.hasNext(); ) {
Element person = (Element) i.next();
String psnId = person.attributeValue("id");
System.out.println(psnId);
for(Iterator i2 = person.elementIterator(); i2.hasNext();){
Element el = (Element) i2.next();
if(el.isTextOnly()){
System.out.println(el.getName() + ":" +el.getText());
}else{
Node node1 = document.selectSingleNode( "/persons/person/address/country" );
Node node2 = document.selectSingleNode( "/persons/person/address/city" );
Node node3 = document.selectSingleNode( "/persons/person/address/province" );
String country = node1.getText();
String province = node2.getText();
String city = node3.getText();
System.out.println("country:"+country+"province:"+province+"city:"+city);
}
}
}
System.out.println("person2元素的id属性值为:"+parser.getAttributeValue("//person2"));
System.out.println("person2元素的id属性值为:"+parser.getElementValue("//person2/address/province"));
}
}
参考文档:
w3cschool
待续..........