处理XML一般有两种方法,DOM和基于流。相关有很多工具,比如SAX、STAX、DOM、JDOM、DOM4J等,SAX和STAX都是基于流,前者属于推模型,后者是拉模型,STAX为oracle公司提出基于流(stream)来处理的方式,在Java中封装成了stax,和sax很像,在webservice中一般使用基于流的工具,基于dom的或多或少的会影响一些效率。WS中还要涉及到Java对象和XML之间的转换,可以直接使用JDK提供的JAXB。类似的还有:XStream、Jackson、json-lib,这些框架提供了xml和json,json和java对象的转换,根据具体的需求可选择不同的框架。
Jaxb的用法很简单,把Java对象转换为xml叫编排,xml转换为Java对象叫反编排,实例如下:
package com.tgb.xml;
import java.io.StringReader;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
import org.junit.Test;
public class TestJaxb {
@Test
public void test01(){
try {
JAXBContext ctx = JAXBContext.newInstance(Student.class);
Marshaller marshaller = ctx.createMarshaller();
Student stu = new Student(1,"这是","32",new Classroom(1,"计算机","2012"));
marshaller.marshal(stu, System.out);
} catch (JAXBException e) {
e.printStackTrace();
}
}
@Test
public void test02(){
try {
String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><student><age>32</age><classroom><grade>2012</grade><id>1</id><name>计算机</name></classroom><id>1</id><name>这是</name></student>";
JAXBContext ctx = JAXBContext.newInstance(Student.class);
Unmarshaller um = ctx.createUnmarshaller();
Student stu = (Student)um.unmarshal(new StringReader(xml));
System.out.println(stu.getName() + "," + stu.getClassroom().getName());
} catch (JAXBException e) {
e.printStackTrace();
}
}
}
下面主要讲stax操作xml的实例,首先创建一个xml文档:
<?xml version="1.0" encoding="ISO-8859-1"?>
<bookstore>
<book category="COOKING">
<title lang="en">
Everyday Italian
</title>
<author>Giada De Laurentiis</author>
<year>2005</year>
<price>30.00</price>
</book>
<book category="CHILDREN">
<title lang="en">Harry Potter</title>
<author>J K. Rowling</author>
<year>2005</year>
<price>29.99</price>
</book>
<book category="WEB">
<title lang="en">XQuery Kick Start</title>
<author>James McGovern</author>
<author>Per Bothner</author>
<author>Kurt Cagle</author>
<author>James Linn</author>
<author>Vaidyanathan Nagarajan</author>
<year>2003</year>
<price>49.99</price>
</book>
<book category="WEB">
<title lang="en">Learning XML</title>
<author>Erik T. Ray</author>
<year>2003</year>
<price>39.95</price>
</book>
</bookstore>
基于光标的查找:
@Test
public void test01(){
XMLInputFactory factory = XMLInputFactory.newInstance();
InputStream is = null;
is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
XMLStreamReader reader;
try {
reader = factory.createXMLStreamReader(is);
while(reader.hasNext()){
int type = reader.next();
if(type == XMLStreamConstants.START_ELEMENT){ //起始节点
System.out.println(reader.getName());
}else if(type == XMLStreamConstants.CHARACTERS){ //文本节点
System.out.println(reader.getText());
}else if(type == XMLStreamConstants.END_ELEMENT){ //结束节点
System.out.println("/" + reader.getName());
}
}
} catch (XMLStreamException e) {
e.printStackTrace();
}finally{
if(is != null){
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
@Test
public void test02(){
XMLInputFactory factory = XMLInputFactory.newInstance();
InputStream is = null;
is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
XMLStreamReader reader;
try {
reader = factory.createXMLStreamReader(is);
while(reader.hasNext()){
int type = reader.next();
if(type == XMLStreamConstants.START_ELEMENT){
String name = reader.getName().toString();
if("book".equals(name)){
//读取属性名和值
System.out.println(reader.getAttributeName(0) + ":" + reader.getAttributeValue(0));
}
//获取元素内容
if("price".equals(name)){
System.out.println(reader.getElementText() + "\n");
}
}
}
} catch (XMLStreamException e) {
e.printStackTrace();
}finally{
if(is != null){
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
基于迭代模型查找:
@Test
public void test03(){
XMLInputFactory factory = XMLInputFactory.newInstance();
InputStream is = null;
is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
try {
//基于迭代模型的操作方式
XMLEventReader reader = factory.createXMLEventReader(is);
int num = 0;
while(reader.hasNext()){
//通过XMLEvent来获取是否是某种节点类型
XMLEvent event = reader.nextEvent();
if(event.isStartElement()){
//通过event.asxxx转换节点
String name = event.asStartElement().getName().toString();
if("title".equals(name)){
System.out.println(reader.getElementText() + ":");
}
if("price".equals(name)){
System.out.println(reader.getElementText() + "\n");
}
}
num++;
}
System.out.println(num);
} catch (XMLStreamException e) {
e.printStackTrace();
}finally{
if(is != null){
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
过滤器的使用:
@Test
public void test04(){
XMLInputFactory factory = XMLInputFactory.newInstance();
InputStream is = null;
is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
try {
//基于Filter的过滤方式,可以有效的过滤不用进行操作的节点,效率高
XMLEventReader reader = factory.createFilteredReader(factory.createXMLEventReader(is), new EventFilter() {
@Override
public boolean accept(XMLEvent event) {
if(event.isStartElement()){
String name = event.asStartElement().getName().toString();
if(name.equals("title") || name.equals("price")){
return true;
}
}
return false;
}
});
int num = 0;
while(reader.hasNext()){
XMLEvent event = reader.nextEvent();
if(event.isStartElement()){
String name = event.asStartElement().getName().toString();
if("title".equals(name)){
System.out.println(reader.getElementText() + ":");
}
if("price".equals(name)){
System.out.println(reader.getElementText() + "\n");
}
}
num++;
}
System.out.println(num);
} catch (XMLStreamException e) {
e.printStackTrace();
}finally{
if(is != null){
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
XPath的使用:
@Test
public void test05(){
XMLInputFactory factory = XMLInputFactory.newInstance();
InputStream is = null;
try{
is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
//创建文档处理对象
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
//通过DocumentBuilder创建doc的文档对象
Document doc = db.parse(is);
//创建xpath
XPath xpath = XPathFactory.newInstance().newXPath();
//第一个参数就是xpath,第二个参数就是文档
NodeList list = (NodeList)xpath.evaluate("//book[@category='WEB']", doc, XPathConstants.NODESET);
for(int i=0;i<list.getLength();i++){
//遍历输出相应的结果
Element e = (Element)list.item(i);
System.out.println(e.getElementsByTagName("title").item(0).getTextContent());
}
} catch (Exception e) {
e.printStackTrace();
}finally{
if(is != null){
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
使用XMLStreamWriter创建xml:
@Test
public void test06(){
try {
XMLStreamWriter xsw = XMLOutputFactory.newInstance().createXMLStreamWriter(System.out);
xsw.writeStartDocument("utf-8", "1.0");
xsw.writeEndDocument();
String ns = "http://www.tgb.com";
xsw.writeStartElement("prefix","person",ns);
xsw.writeStartElement(ns,"id");
xsw.writeCharacters("1");
xsw.writeEndElement();
xsw.writeEndElement();
xsw.flush();
xsw.close();
} catch (XMLStreamException e) {
e.printStackTrace();
} catch (FactoryConfigurationError e) {
e.printStackTrace();
}
}
使用Transformer更新节点信息:
@Test
public void test07(){
try {
InputStream is = TestStax.class.getClassLoader().getResourceAsStream("books.xml");
//创建文档处理对象
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
//通过DocumentBuilder创建doc文档对象
Document doc = db.parse(is);
//创建xpath
XPath xpath = XPathFactory.newInstance().newXPath();
Transformer tran = TransformerFactory.newInstance().newTransformer();
tran.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
tran.setOutputProperty(OutputKeys.INDENT, "yes");
//第一个参数就是xpath,第二个参数就是文档
NodeList list = (NodeList)xpath.evaluate("//book[title='Learning XML']",doc,XPathConstants.NODESET);
//获取price节点
Element be = (Element)list.item(0);
Element e = (Element)(be.getElementsByTagName("price").item(0));
e.setTextContent("232323");
Result result = new StreamResult(System.out);
//修改源
tran.transform(new DOMSource(doc), result);
} catch (Exception e) {
e.printStackTrace();
}
}
以上都是stax的简单应用,具体可参考JDK,结合本文,可参考《XML解析技术研究(一)》和《XML解析技术研究(二)》、《Java6.0新特性之StAX--全面解析Java XML分析技术》《使用JAXP进行SAX解析(XMLReaderFactory、XMLReader 、SAXParserFactory与SAXParser)》写的很棒!
DOM4j解析xml的一篇文章:《使用Dom4j解析XML》,总体来说还是dom4j好用些。