XML文档解析

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/lan_xuewei/article/details/79143983

XML文件如下:

<?xml version="1.0" encoding="UTF-8" ?>
<bookstore>
    <book id="1">
        <name>活着</name>
        <author>余华</author>
        <year>2012</year>
        <price>89</price>
    </book>

    <book id="2">
        <name>平凡的世界</name>
        <author>路遥</author>
        <year>2015</year>
        <price>98</price>
    </book>
</bookstore>

1、使用DOM方式解析,代码如下:

package com.lanxuewei.codeonline.demo;

import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;

/**
 * 解析xml文件
 */
public class DomTest {
    public static void main(String[] args) {
        DocumentBuilderFactory dbf =
                DocumentBuilderFactory.newInstance();  //创建DocumentBuilderFactory对象
        try {
            DocumentBuilder db
                    = dbf.newDocumentBuilder();                         //创建DocumentBuilder对象
            Document document = db.parse("xmlTest.xml");                //加载xml文件
            NodeList bookList =  document.getElementsByTagName("book"); //获取book节点集合
            System.out.println("bookNum: " + bookList.getLength());
            for (int i = 0; i < bookList.getLength(); i++){
                System.out.println("---开始遍历第" + (i+1) + "本书---");
                Node book = bookList.item(i);               //遍历获得book
                NamedNodeMap attrs = book.getAttributes();  //获取book属性
                for (int j = 0; j < attrs.getLength(); j++){
                    Node node = attrs.item(j);
                    System.out.print("属性名:" + node.getNodeName());
                    System.out.println(",属性值:" + node.getNodeValue());
                }
                NodeList childNodes = book.getChildNodes(); //获取book节点第子节点
                System.out.println("第" + (i+1) +
                        "本书有" + childNodes.getLength() + "个节点");  //空白处也属于子节点
                for (int k = 0; k < childNodes.getLength(); k++){     //获取book子节点属性
                    if (childNodes.item(k).getNodeType() == Node.ELEMENT_NODE){
                        System.out.print("第" + (k+1) + "个节点的节点名:" +
                                childNodes.item(k).getNodeName());
                        System.out.println(",节点值:"+
                                childNodes.item(k).getFirstChild().getNodeValue());
/*                            System.out.println(",节点值:" +              //比较getFirstChild()和getTextContent()
                                childNodes.item(k).getTextContent());*/
                    }
                }
                System.out.println("---结束遍历第" + (i+1) + "本书---");
            }
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

运行结果如下:
这里写图片描述

2、使用SAX方式解析,代码如下:
(1)Main方法

import org.xml.sax.SAXException;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;

public class SaxTest {
    public static void main(String[] args) {
        SAXParserFactory saxParserFactory
                = SAXParserFactory.newInstance();   //获得SAXParserFactory对象
        try {
            SAXParser saxParser =
                    saxParserFactory.newSAXParser();    //获得SAXParser对象
            SaxParserHandler saxParserHandler =
                    new SaxParserHandler();             //获得SaxParserHandler对象
            saxParser.parse("xmlTest.xml",saxParserHandler); //解析xml
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
}

(2)SaxParserHandler

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class SaxParserHandler extends DefaultHandler{

    int bookIndex = 0;

    /**
     * 标识解析开始
     * @throws SAXException
     */
    @Override
    public void startDocument() throws SAXException {
        super.startDocument();
        System.out.println("Sax解析开始...");
    }

    /**
     * 标识解析结束
     * @throws SAXException
     */
    @Override
    public void endDocument() throws SAXException {
        super.endDocument();
        System.out.println("Sax解析结束...");
    }

    /**
     * 用来遍历xml文件的开始标签
     * @param uri
     * @param localName
     * @param qName
     * @param attributes
     * @throws SAXException
     */
    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
        super.startElement(uri, localName, qName, attributes);
        if (qName.equals("book")){  //解析book标签
            bookIndex++;
            System.out.println("---开始遍历第" + bookIndex + "本书---");
            String bookId = attributes.getValue("id");
            System.out.println("bookId = " + bookId);
        }else if (!qName.equals("bookstore")){  //获取其他节点名
            System.out.print("节点名:" + qName);
        }
    }

    /**
     * 用来遍历xml文件的结束标签
     * @param uri
     * @param localName
     * @param qName
     * @throws SAXException
     */
    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        super.endElement(uri, localName, qName);
        if (qName.equals("book")){
            System.out.println("---结束遍历第" + bookIndex + "本书---");
        }
    }

    /**
     * 获取节点值
     * @param ch
     * @param start
     * @param length
     * @throws SAXException
     */
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        super.characters(ch, start, length);
        String value = new String(ch,start,length);
        if (!value.trim().equals("")){  //非空属性值才输出
            System.out.println(",属性值:" + value);
        }
    }
}

运行结果如下:
这里写图片描述

展开阅读全文

一个超复杂的xml文档解析

02-04

[code=XML]1Your request was successfully received and processed. Here is the response to your request rnrn rn Client_Weekrn rn SiteMonrn rn Cam_Mon_Child_0rn Mon__Mailingrn StandardMailingrn Mon__Mailing_1rn StandardMailingrn Mon__Mailing_2rn StandardMailingrn rn rn rn Site_Weekrn rn Cam_Weekrn rn rn Cam_Week_Child_0rn Weekly__Mailingrn StandardMailingrn Weekly__Mailing_1rn StandardMailingrn Weekly__Mailing_2rn StandardMailingrn rn rn WeeklyCampaign_2009_52rn rn rn C_20080209rn investing_20080209_Other-Copyrn StandardMailingrn investing_20080209_Global-Copyrn StandardMailingrn investing_20080209_Otherrn StandardMailingrn investing_20080209_Neteasern StandardMailingrn investing_20080209_Globalrn StandardMailingrn rn rn C_20080203rn investing_20080203_Neteasern StandardMailingrn investing_20080203_Globalrn StandardMailingrn rn rn C_20071026rn news_20071026_Otherrn StandardMailingrn news_20071026_Neteasern StandardMailingrn news_20071026_Globalrn StandardMailingrn rnrnrnrn";[/code]rnrn这是源XML文档,现在我要把每个ClientName,SiteName,Campaign,MailingName的值按照其对应关系存进一个数组中去。rn请高手帮忙解决一下,我用SimpleXMLElement和DOMDocument都没有解析成功,老提示错误。rn这是我用DOMDocument写的rn[code=PHP]rnfunction ANALYSIS_XML($XMLstr)rn rn $XMLDoc = new DOMDocument();rn $XMLDoc->Load($XMLstr);rn $ClientDom = $XMLDoc->getElementsByTagName("GetObjectNamesRs");rn rn $datatable=array("Client","Site","Campaign","Mailing");rn rn $ClientName="";rn $SiteName="";rn $CamName="";rn $i=0;rn rn foreach($ClientDom as $ClientEle)rn rn $ClientName=$ClientEle->getElementsByTagName("ClientName");rn $SiteDom=$ClientEle->getElementsByTagNam("SiteTree");rn if(isset($SiteDom))rn rn foreach($SiteDom as $SiteEle)rn rn $SiteName=$SiteEle->getElementsByTagName("SiteName");rn $CamDom=$SiteEle->getElementsByTagName("CampaignTree");rn if(isset($CamDom))rn rn $CamName=$CamEle->getElementsByTagName("CampaignName");rn if(isset($CamDom->MailingName))rn rn foreach($CamDom->MailingName as $MailName)rn rn $datatable[$i] = "$ClientName*|*$SiteName*|*$CamName*|*$MailName";rn $i++;rn rn rn rn rn rn rn rn return $datatable;rn rnrn[/code]rnrnrn 论坛

关于sax对xml文档解析的问题

03-10

MySAXHandle.java如下rnrnimport org.xml.sax.helpers.DefaultHandler;rnimport org.xml.sax.Attributes;rnrnpublic class MySAXHandle extends DefaultHandlerrnrn public void startDocument()rn rn System.out.println("Start document: ");rn rn rn public void endDocument()rn rn System.out.println("End document: "); rn rn rn public void startElement(String uri,String localName,String qname,Attributes attr)rn rn System.out.println("Start element:local name: "+localName+" qname: "+qname+" uri: "+uri);rn rn rn public void endElement(String uri,String localName,String qname)rn rn System.out.println("End element:local name: "+localName+" qname: "+qname+" uri: "+uri);rn rn rn public void characters(char[] ch,int start,int length)rn rn System.out.println("Characters: "+new String(ch,start,length));rn rn rn public void ignorableWhitespace(char[] ch,int start,int length)rn rn System.out.println("Ignorable whitespace: "+new String(ch,start,length));rn rnrnrnrnTrySAXHandler.java如下rnrnimport javax.xml.parsers.SAXParserFactory;rnimport javax.xml.parsers.SAXParser;rnimport javax.xml.parsers.ParserConfigurationException;rnimport org.xml.sax.SAXException;rnimport java.io.File;rnimport java.io.IOException;rnrnpublic class TrySAXHandlerrnrn public static void main(String[] args)rn rn if(args.length==0)rn rn System.out.println("No file to process. Usage is:"+"\njava TrySax \"filename\" ");rn return;rn rn File xmlFile=new File(args[0]);rn process(xmlFile);rn rn rn private static void process(File file)rn rn SAXParserFactory spf=SAXParserFactory.newInstance();rn SAXParser parser=null;rn spf.setNamespaceAware(true);rn spf.setValidating(true);rn tryrn rn parser=spf.newSAXParser();rn rn catch(SAXException e)rn rn e.printStackTrace(System.err);rn System.exit(1);rn rn catch(ParserConfigurationException e)rn rn e.printStackTrace(System.err);rn System.exit(1);rn rn rn System.out.println("\nStarting parsing of "+file+"\n");rn MySAXHandle handler=new MySAXHandle();rn tryrn rn parser.parse(file, handler);rn rn catch(IOException e)rn rn e.printStackTrace(System.err);rn rn catch(SAXException e)rn rn e.printStackTrace(System.err);rn rn rnrnrncircle.xml如下rnrnrnrn 15rn rn 30rn 50rn rnrnrnjava TrySAXHandler "circle.xml"的结果是rnrnParser will be namespace awarernParser will validate XMLrnParser object is: org.apache.crimson.jaxp.SAXParserImprnrnStarting parsing of circle.xmlrnrnStart document:rnStart element: local name: circle qname: circle uri:rnCharacters:rnCharacters:rnrnCharacters:rnStart element: local name: radius qname: radius uri:rnCharacters: 15rnEnd element: local name: radius qname: radius uri:rnCharacters:rnCharacters:rnrn在circle元素和radius元素中应该只有一个Characters (white space),可是控制台上显示了3个,这是什么原因阿? 论坛

没有更多推荐了,返回首页