DOM对XML解析总结_使用dom解析xml数据实验总结-CSDN博客

本文链接：https://blog.csdn.net/hwb1992/article/details/20299519

今天我用了dom对xml文件进行解析，现在进行一下自己的总结

现在贴上一个最简单的例子

package edu.fjnu.cs.hwb.xml;

import java.io.File;
import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class MessageXML {

    public static void main(String[ ] args){

        /*
         * 获得一个dom解析器工厂 
         * 为了创建与具体解析器无关的程序，当DocumentBuilderFactory类的静态方法newInstance()被调用时
         * 它根据一个系统变量来决定具体使用哪一个解析器。又因为所有的解析器都服从于JAXP所定义的接口，
         * 所以无论具体使用哪一个解析器，代码都是一样的。所以当在不同的解析器之间进行切换时，
         * 只需要更改系统变量的值，而不用更改任何代码。这就是工厂所带来的好处。
         */
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        DocumentBuilder db = null;
        Document dom = null;
        try {
     
            /*
             * 获得一个dom解析器
             * 这个对象代表了具体的DOM解析器。但具体是哪一种解析器，微软的或者IBM的，对于程序而言并不重要
             */
            db = dbf.newDocumentBuilder();
            
            获得具体的XMLDOM对象 一个Document对象，这个Document对象就代表了一个XML文档的树模型
            //以后所有的对XML文档的操作，都与解析器无关，直接在这个Document对象上进行操作就可以了
            //这就是一个dom 树
            dom = db.parse(new File("message.xml"));
        } catch (ParserConfigurationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (SAXException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        /*
         * 我们通过这样一条语句所得到的是XML文档中所有<message>标签对应的Node对象的一个列表。
         * 然后，我们可以使用NodeList对象的item()方法来得到列表中的每一个Node对象： 
         */
        NodeList nodelist = dom.getElementsByTagName("message"); 
        
        Element node = (Element)nodelist.item(0);
        /*
         * 请注意，这里还使用了一个getFirstChild()方法来获得message下面的第一个子Node对象。
         * 虽然在message标签下面除了文本外并没有其它子标签或者属性，但是我们坚持在这里使用getFirseChild()方法
         * 这主要和W3C对DOM的定义有关。W3C把标签内的文本部分也定义成一个Node，所以先要得到代表文本的那个Node
         * 我们才能够使用getNodeValue()来获取文本的内容。
         */
        System.out.println(node.getNodeName());
        System.out.println(node.getFirstChild().getNodeValue());
        
    }
    
}



/**
 * Dom 介绍
 * DOM的基本对象有5个：Document，Node，NodeList，Element和Attr。下面就这些对象的功能和实现的方法作一个大致的介绍。 
 * Node对象是DOM结构中最为基本的对象，代表了文档树中的一个抽象的节点。
 * 在实际使用的时候，很少会真正的用到Node这个对象，
 * 而是用到诸如Element、Attr、Text等Node对象的子对象来操作文档。
 * Node对象为这些对象提供了一个抽象的、公共的根。虽然在Node对象中定义了对其子节点进行存取的方法，
 * 但是有一些Node子对象，比如Text对象，它并不存在子节点，这一点是要注意的。 
 */

附上xml

<?xml version="1.0" encoding="UTF-8"?>
<messages>
<message>Hello World</message>
</messages>

现在说下比较复杂的操作:

先看下xml文件:

<?xml version="1.0" encoding="UTF-8" standalone="no"?><links>
<link>
<text>JSP Insider</text> 
<url newWindow="no">http://www.jspinsider.com</url>
<author>JSP Insider</author>
<date>
<day>2</day>
<month>1</month>
<year>2001</year>
</date>
<description>A JSP information site.</description>
</link>
<link>
<text>The makers of Java</text>
<url newWindow="no">http://java.sun.com</url>
<author>Sun Microsystems</author>
<date>
<day>3</day>
<month>1</month>
<year>2001</year>
</date>
<description>Sun Microsystem's website.</description>
</link>
<link>
<text>The standard JSP container</text>
<url newWindow="no">http://jakarta.apache.org</url>
<author>Apache Group</author>
<date>
<day>4</day>
<month>1</month>
<year>2001</year>
</date>
<description>Some great software.</description>
</link>
</links>

在看下代码

/**
 * 
 */
package edu.fjnu.cs.hwb.xml;

import java.io.File;
import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/**
 * @author hwb
 *
 */
public class LinkXML 
{

    /**
     * @param args
     */
    public static void main(String[] args)
    {
        // TODO Auto-generated method stub
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        DocumentBuilder db = null;
        Document dom=null;
        try 
        {
            db = dbf.newDocumentBuilder();
            dom = db.parse(new File("link.xml"));
        } catch (ParserConfigurationException e) 
        {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (SAXException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        
        
        NodeList nodelist = dom.getElementsByTagName("link");         
        System.out.println(nodelist.item(0).getClass());
        for(int i=0;i<nodelist.getLength();i++)
        {
            Element element = (Element) nodelist.item(i);
            NodeList node1 = element.getElementsByTagName("text");
            System.out.println(node1.item(0).getFirstChild().getNodeValue());
            node1 = element.getElementsByTagName("url");
            System.out.println(node1.item(0).getFirstChild().getNodeValue());
            node1 = element.getElementsByTagName("author");
            System.out.println(node1.item(0).getFirstChild().getNodeValue());
//            node1 = element.getElementsByTagName("day");
//            System.out.print(node1.item(0).getFirstChild().getNodeValue());
//            node1 = element.getElementsByTagName("month");
//            System.out.print("-"+node1.item(0).getFirstChild().getNodeValue());
//            node1 = element.getElementsByTagName("year");
//            System.out.println("-"+node1.item(0).getFirstChild().getNodeValue());   这两种方式都可以读取成功
            
            node1 = element.getElementsByTagName("date");
            Element dateelement =(Element ) node1.item(0);
            System.out.print(dateelement.getElementsByTagName("day").item(0).getFirstChild().getNodeValue());
            System.out.print("-"+dateelement.getElementsByTagName("month").item(0).getFirstChild().getNodeValue());
            System.out.println("-"+dateelement.getElementsByTagName("year").item(0).getFirstChild().getNodeValue());
            
            node1 = element.getElementsByTagName("description");
            System.out.println(node1.item(0).getFirstChild().getNodeValue());
            
            
            //总结:
            /**
             * link.xml的基本结构如下
             * <?xml version="1.0" standalone="yes"?>
             * <links>
             * <link>
             * <text>JSP Insider</text> 
             *    <url newWindow="no">http://www.jspinsider.com</url>
             *    <author>JSP Insider</author>
             *    <date>
             *    <day>2</day>
             *    <month>1</month>
             *    <year>2001</year>
             *    </date>
             *    <description>A JSP information site.</description>
             *    </link>
             *    </links>
             *
             *  步骤如下：
             *1.获取dom工厂
             *2.获取dom解析器
             *3.加载xml文件，获得dom对象
             *4.通过dom对象的getElementsByTagName(标签)获得一个NodeList 数组，表示这个dom树的所有带有某个标签的节点对象   如:link
             *5.通过NodeList.item(位数)的方式获取具体的节点，这个时候，一般是用Element  e 来接收 ，此时表示的是第 位数 link标签所有的所有子标签
             *6.这个时候通过e.getElementsByTageName(标签)来获得你想要的标签了 ，通过NodeList nodelist来接受 标签如:text
             *6.这个时候要分情况了
             *        一.如果这个标签没有子标签，而是纯粹的文本，那么可以通过如下的方式去获得文本的内容  :  nodelist.item(0).getFirstChild().getNodeValue()
             *    二.如果这个标签还有子标签，那么可以通过如下的方式去获得文本的内容  :   Element e2 = nodelist.item(0); e2.getElementsByTageName("标签").item(0).getFirstChilde().getNodeValue(); 
             *    
             *    我上面写得0之类的要根据具体的xml格式来分析..而且我这个步骤也是根据这个xml做的.如果遇到别的xml，要变通。
             */
        }
        
    }

}

ps.这个例子我是在百度上面找的...但是自己写了点注释和心德。如果冒犯了原作者，请跟我说下，我会删除掉。