解析方式概述
今天介绍下在 Java 中操作 XML 的常见方案:DOM、SAX
DOM 解析的优缺点和使用场景:
- 优点:解析的时候整个 XML 文档树都在内存当中。支持芯新增、删除、修改、重新排列等功能。
- 缺点:将整个 XML 文档树都放进内存,如果 XML 文档比较大的话,会对内存形成比较大的压力。
- 使用场合:一旦解析了文档还需要多次访问这些数据,而且资源比较充足(如内存、CPU等)。
SAX 解析优缺点和使用场景:
通过事件驱动,每解析一个节点就触发一个事件。事件过后,如没有保存数据,那么数据就会丢失。
- 优点:不必将整个 XML 解析成文档树放进内存,才能执行解析操作。占用内存资源少。尤其在嵌入式环境中,极力推荐采用 SAX 进行解析 XML 文档。
- 缺点:不像DOM一样将文档长期驻留在内存,数据不是持久的,事件过后,如没有保存数据,那么数据就会丢失。
关于 XML 解析的开源库:JDOM、DOM4J
JDOM 解析优缺点:
优点:
1、使用具体类而不是接口,简化了 DOM 的 API。
2、大量使用了 Java 集合类,方便了 Java 开发人员。
缺点:
1、较好灵活性不搞。
2、性能较差。
DOM4J 解析的优缺点:
简单易用,采用Java集合框架,并完全支持 DOM、SAX 和 JAXP
优点:
1、大量使用了Java集合类,方便Java开发人员,同时提供一些提高性能的替代方法。
2、支持 XPath。
3、有很好的性能。
DOM 解析
案例1:
public class DomPaeseXml {
public static void main(String[] args) throws Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.parse(new File("student.xml") ) ;
// System.out.println(document.getXmlVersion());
// System.out.println(document.getXmlStandalone());
// System.out.println(document.getXmlVersion());
Element element = document.getDocumentElement();
System.out.println(element.getNodeName());
NodeList list = element.getChildNodes();
System.out.println(list.getLength());
for (int i = 0; i < list.getLength(); i++) {
System.out.println(list.item(i).getNodeName());
}
System.out.println("***************************");
NodeList nodeList = document.getElementsByTagName("学生");
for (int i = 0; i < nodeList.getLength(); i++) {
Element elment =(Element)nodeList.item(i);
String str = elment.getElementsByTagName("姓名").item(0).getFirstChild().getNodeValue();
System.out.println("姓名:"+str);
str = elment.getElementsByTagName("性别").item(0).getFirstChild().getNodeValue();
System.out.println("性别:"+str);
str = elment.getElementsByTagName("年龄").item(0).getFirstChild().getNodeValue();
System.out.println("年龄:"+str);
System.out.println("-------------------------------------------------");
}
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
System.out.println(node.getNodeType()+" : " + node.getNodeValue());
}
}
}
案例2:解析任意的XML文件
public class DomParseXml2 {
private static StringBuilder stringBuilder = new StringBuilder();
public static void main(String[] args) throws Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new File("student.xml"));
// 得到文档根元素
Element element = doc.getDocumentElement();
getChildren(element);
System.out.println(stringBuilder);
}
private static void getChildren(Element element) {
stringBuilder.append("<"+element.getNodeName());
NodeList nodeList = element.getChildNodes();
NamedNodeMap map = element.getAttributes();
//如果存在属性
if (map != null) {
for(int i=0;i<map.getLength();i++){
Attr attr = (Attr)map.item(i);
stringBuilder.append(" "+attr.getName()+"=\""+attr.getValue()+"\"");
}
}
stringBuilder.append(">");
if(nodeList!=null){
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
short type = node.getNodeType();
if(type==Node.TEXT_NODE){
stringBuilder.append(node.getNodeValue());
}else if (type==Node.ELEMENT_NODE) {
getChildren((Element)node);
}else if (type==Node.COMMENT_NODE) {
Comment comment = (Comment)node;
stringBuilder.append("<!--"+comment.getData()+"-->");
}
}
}
stringBuilder.append("</"+element.getTagName()+">");
}
}
SAX解析(基于事件)
案例1:
public class SAXParseXML {
public static void main(String[] args) throws Exception {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
parser.parse(new File("student.xml"),new MyHandler());
}
}
class MyHandler extends DefaultHandler{
@Override
public void endDocument() throws SAXException {
System.out.println("document end");
}
@Override
public void endElement(String uri, String localName, String name)
throws SAXException {
System.out.println("element end");
}
@Override
public void startDocument() throws SAXException {
System.out.println("document start");
}
@Override
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
System.out.println("element start");
}
}
案例2:
public class SAXParseXML2 {
public static void main(String[] args) throws Exception {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
parser.parse(new File("student.xml"), new MyHandler2());
}
}
class MyHandler2 extends DefaultHandler {
private Stack<String> stack = new Stack<String>();
private String name;
private String age;
private String gender;
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
stack.pop();
if("学生".equals(qName)){
System.out.println("姓名"+"="+name);
System.out.println("年龄"+"="+age);
System.out.println("性别"+"="+gender);
System.out.println();
}
}
@Override
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
stack.push(name);
for (int i = 0; i < attributes.getLength(); i++) {
String qName = attributes.getQName(i);
String value = attributes.getValue(i);
System.out.println(qName + "=" + value);
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
String tag = stack.peek();
if ("性别".equals(tag)) {
gender = new String(ch, start, length);
}
if ("姓名".equals(tag)) {
name = new String(ch, start, length);
}
if ("年龄".equals(tag)) {
age = new String(ch, start, length);
}
}
}
JDOM 解析
案例1:
public class JdomTest1 {
public static void main(String[] args) throws Exception {
Document document = new Document();
Element root = new Element("root");
root.addContent(new Comment("this is my comment!"));
document.setRootElement(root);
Element people = new Element("person");
people.setAttribute(new Attribute("number","1"));
Element element1 = new Element("name");
element1.setText("zhangsan");
Element element2 = new Element("age");
element2.setText("12");
Element element3 = new Element("birthday");
element3.setText("2011-11-11");
Element element4 = new Element("address");
element4.addContent(new Element("country").setText("China"));
element4.addContent(new Element("city").setText("nanchang"));
element4.addContent(new Element("street").setText("bayiguangchang"));
people.addContent(element1).addContent(element2).addContent(element3).addContent(element4);
root.addContent(people);
Format format = Format.getPrettyFormat();
format.setIndent(" ");
XMLOutputter outputter = new XMLOutputter(format);
outputter.output(document,new FileOutputStream("jdom.xml"));
}
}
案例2:
public class JdomTest2 {
public static void main(String[] args) throws Exception {
SAXBuilder builder = new SAXBuilder();
Document document = builder.build(new File("jdom.xml"));
Element root = document.getRootElement();
System.out.println("root: "+root.getName());
List list = root.getChildren();
for (int i = 0; i < list.size(); i++) {
Element element = (Element)list.get(i);
System.out.println(element.getName());
}
List<Attribute> attrs = attrs = root.getChild("person").getAttributes();
for(Attribute attr : attrs){
System.out.println(attr.getName()+"="+attr.getValue());
}
Element element = root.getChild("person").getChild("address");
element.removeContent();
element.addContent("China");
XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat().setIndent(" "));
outputter.output(document,new FileOutputStream("jdom2.xml"));
}
}
案例3:
package cn.huaxia.xml.jdom;
import java.io.FileWriter;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
public class JdomTest3 {
public static void main(String[] args) throws Exception
{
Document document = new Document();
Element root = new Element("联系人列表").setAttribute(new Attribute("公司",
"A集团"));
document.addContent(root);
Element contactPerson = new Element("联系人");
root.addContent(contactPerson);
contactPerson
.addContent(new Element("姓名").setText("张三"))
.addContent(new Element("公司").setText("A公司"))
.addContent(new Element("电话").setText("021-55556666"))
.addContent(
new Element("地址")
.addContent(new Element("街道").setText("5街"))
.addContent(new Element("城市").setText("上海"))
.addContent(new Element("省份").setText("上海市")));
XMLOutputter output = new XMLOutputter(Format.getPrettyFormat()
.setIndent(" ").setEncoding("gbk"));
output.output(document, new FileWriter("contact.xml"));
}
}
Dom4J 解析
案例 1:
public class Dom4JTest {
public static void main(String[] args) throws Exception {
Document document = DocumentHelper.createDocument();
Element element = DocumentHelper.createElement("root");
document.setRootElement(element);
Element element2 = element.addElement("student");
Element element3 = element2.addElement("name");
Element element4 = element2.addElement("age");
Element element5 = element2.addElement("sex");
element3.setText("johnny");
element4.setText("11");
element5.setText("男");
OutputFormat format = new OutputFormat(" ", true, "UTF-8");
XMLWriter writer = new XMLWriter(format);
writer.write(document);
XMLWriter writer2 = new XMLWriter(new FileOutputStream("student2.xml"),
format);
writer2.write(document);
OutputStreamWriter fileWriter = new OutputStreamWriter(new FileOutputStream(
"student3.xml"), "utf-8");
System.out.println(fileWriter.getEncoding());
XMLWriter writer3 = new XMLWriter(fileWriter, format);
writer3.write(document);
writer3.flush();
//writer3.close();
}
}
案例2:
public class Dom4JTest2 {
public static void main(String[] args) throws Exception {
System.out.println("---------------SAXReader----------------------");
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(new File("student2.xml"));
Element element = document.getRootElement();
System.out.println("root element name: " + element.getName());
List<Element> list = element.elements();
System.out.println(list.size());
for (Iterator iterator = element.elementIterator(); iterator.hasNext();) {
Element element2 = (Element) iterator.next();
System.out.println(element2.getName());
}
Element element3 = element.element("student");
List<Element> sunElement = element3.elements();
for (Element element2 : sunElement) {
System.err.println(element2.getName() + " : " + element2.getText());
}
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory
.newInstance();
DocumentBuilder documentBuilder = documentBuilderFactory
.newDocumentBuilder();
org.w3c.dom.Document document2 = documentBuilder.parse(new File(
"student.xml"));
System.out.println("---------------DOMReader----------------------");
DOMReader domReader = new DOMReader();
Document document3 = domReader.read(document2);
Element rootElement = document3.getRootElement();
System.out.println(rootElement.getName());
}
}