dom4j解析XML

最新推荐文章于 2022-04-20 11:25:48 发布

weixin_33908217

最新推荐文章于 2022-04-20 11:25:48 发布

阅读量73

点赞数

文章标签： python java

原文链接：https://my.oschina.net/zhangxiaoc/blog/705313

版权

2019独角兽企业重金招聘Python工程师标准>>>

package com.dom4jdemo.test;

import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.XPath;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
import org.dom4j.Namespace;

/**
*
* Project: dom4jDemo
*
* @title com.dom4jDemo.test.Test.java
* @Description: dom4j解析xml的Deml类
* @author 张宇佳
* @created 2016年6月27日下午1:53:15
*/
@SuppressWarnings("all")
public class Test {
   // // XML文档内容
   // /
   // <?xml version="1.0" encoding="UTF-8" standalone="no"?>
   // <!DOCTYPE Books SYSTEM "books.dtd">
   // <Books>
   // <book id="a">
   // <title>How to Program in JAVA</title>
   // <author>James Green</author>
   // <price>20</price>
   // <publishHouse>清华大学出版社</publishHouse>
   // <description>是一本莫须有的书</description>
   // </book>
   // <book id="b">
   // <title>How to Program in C#</title>
   // <author>李逍遥</author>
   // <price>12</price>
   // <publishHouse>清华大学出版社</publishHouse>
   // <description>不存在这本书哦</description>
   // </book>
   // </Books>
   // /

   /**
   *
   * @TODO 解析Xml报文
   * @author 张宇佳
   * @created 2016年6月27日下午1:53:55
   * @version 1.0.0
   */
   public static void parseXmlMsg() {
       StringBuffer sbf = new StringBuffer();
       sbf.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>");
       sbf.append("<Books>");
       sbf.append("<book id=\"a\">");
       sbf.append("<title>How to Program in JAVA</title>");
       sbf.append("<author>James Green</author>");
       sbf.append("<price>20</price>");
       sbf.append("<publishHouse>清华大学出版社</publishHouse>");
       sbf.append("<description>是一本莫须有的书</description>");
       sbf.append("</book>");
       sbf.append("<book id=\"b\">");
       sbf.append("<title>How to Program in C#</title>");
       sbf.append("<author>李逍遥</author>");
       sbf.append("<price>12</price>");
       sbf.append("<publishHouse>清华大学出版社</publishHouse>");
       sbf.append("<description>不存在这本书哦</description>");
       sbf.append("</book>");
       sbf.append("</Books>");
       String xmlDocStr = sbf.toString(); // xml报文
       Document doc = null;
       try {
           doc = DocumentHelper.parseText(xmlDocStr); // 将xml报文转换成xml文档对象
       } catch (DocumentException e) {
           // TODO Auto-generated catch block
           e.printStackTrace();
       }
       Element root = doc.getRootElement(); // 获取根节点
       parseXml(root);
   }

   /**
   *
   * @TODO 创建Xml文档并解析
   * @author 张宇佳
   * @created 2016年6月27日下午1:54:11
   * @version 1.0.0
   */
   public static void createDocument() {
       Document document = DocumentHelper.createDocument(); // 创建一个XML对象
       document.setXMLEncoding("utf-8");
       Element rootEle = DocumentHelper.createElement("ROOT"); // 创建根节点
       { // 添加根节点下的第一个子节点
           Element cEle = DocumentHelper.createElement("Book");
           cEle.setAttributeValue("id", "book1");
           // cEle.setText("123");
           { // 添加Book节点下的第一个子节点
               Element bookName = DocumentHelper.createElement("name");
               bookName.setText("Java");
               cEle.add(bookName);
           }

           { // 添加Book节点下的第二节点
               Element bookAuthor = DocumentHelper.createElement("author");
               bookAuthor.setText("作者EDG");
               cEle.add(bookAuthor);
           }

           {// ..........
               Element bookPrice = DocumentHelper.createElement("price");
               bookPrice.setText("25");
               cEle.add(bookPrice);
           }
           rootEle.add(cEle); // 添加节点
       }
       { // 添加第二个
           Element cEle = DocumentHelper.createElement("Book");
           cEle.setAttributeValue("id", "book2");
           // cEle.setText("123");
           { // 添加Book节点下的第一个子节点
               Element bookName = DocumentHelper.createElement("name");
               bookName.setText("Maven");
               cEle.add(bookName);
           }

           { // 添加Book节点下的第二节点
               Element bookAuthor = DocumentHelper.createElement("author");
               bookAuthor.setText("作者RNG");
               cEle.add(bookAuthor);
           }

           {// ..........
               Element bookPrice = DocumentHelper.createElement("price");
               bookPrice.setText("160");
               cEle.add(bookPrice);
           }
           rootEle.add(cEle); // 添加节点
       }
       {// ............
           Element cEle = DocumentHelper.createElement("Book");
           cEle.setAttributeValue("id", "book3");
           // cEle.setText("123");
           { // 添加Book节点下的第一个子节点
               Element bookName = DocumentHelper.createElement("name");
               bookName.setText("dom4J");
               cEle.add(bookName);
           }

           { // 添加Book节点下的第二节点
               Element bookAuthor = DocumentHelper.createElement("author");
               bookAuthor.setAttributeValue("id", "author1");
               bookAuthor.setText("作者SHR");
               cEle.add(bookAuthor);
           }

           {// ..........
               Element bookPrice = DocumentHelper.createElement("price");
               bookPrice.setText("122");
               cEle.add(bookPrice);
           }
           rootEle.add(cEle); // 添加节点
       }
       document.add(rootEle);

String xmlDocStr = document.asXML(); // 将xml文档对象转换成字符串对象
System.err.println(xmlDocStr);

       System.err
               .println("--------------------------------------------------------------------");
       // document.selectNodes("");
       // xpath表达式意为查找root元素下的book元素下的name元素过滤 Book元素的属性id值为book1
       Node node = document.selectSingleNode("/ROOT/Book[@id='book1']/name");
       System.err.println(node.getText());
       System.err
               .println("--------------------------------------------------------------------");
       // xpath表达式意为查找root元素下的Book元素条件 Book元素下的price元素的text值大于80 若返回结果为多个
       // 则返回第一个找到的Book对象，
       // 最后面的 .selectSingleNode 意为在找到的Book元素对象中查找price对象，返回找的第一个元素对象
       Node node1 = document.selectSingleNode("/ROOT/Book[price>80]")
               .selectSingleNode("price");
       System.err.println(node1.getText());

       System.out
               .println("----------------------------------------- 修改节点包含属性id=book2的节点为 id=abcd2 --------------------------------------");
       System.out.println("现文档内容：");
       System.out.println(document.asXML());
       Element updateEle = getEleByAttr(rootEle, "id", "book2");
       Element updatedNode = updateAttr(updateEle, "id", "abcd2");
       System.out
               .println("---------------------------------------------修改后的文档内容-------------------------------------------------------");
       System.out.println(document.asXML());
       System.out
               .println("------------------------------------ 开始测试移出节点 --------------------------------");
       System.out.println("XML原文档内容：" + xmlDocStr);
       System.out.println("移出Root元素下Book元素下Price>100的第一个price元素节点");
       // Node bigNode =
       // document.selectSingleNode("/ROOT/Book[price>100]/price");
       Node bigNode = document.selectSingleNode("//Book[price>100]/price");
       // Node bigNode = document.selectSingleNode("//author[@id='author1']");
       System.out.println();
       bigNode.getParent().remove(bigNode); // 移出节点
       System.out.println("XML现文档内容：" + document.asXML());

System.out
.println("-----------------------------------------------------将xml写入xml文件中-------------------------------------------------");

       // OutputFormat of = OutputFormat.createCompactFormat(); // 紧凑格式不排版缩进
       OutputFormat of = OutputFormat.createPrettyPrint(); // 缩进格式
       of.setEncoding("UTF-8"); // 文中含有中文设置utf-8字节码
       document.setXMLEncoding("UTF-8");
       File file = new File("D:\\JavaCreateDocument.xml");
       if (file.exists()) {
           file.delete(); // 如果文件已经存在删除它
       }
       XMLWriter out = null;
       try {
           out = new XMLWriter(new FileOutputStream(file), of);
           out.write(document);
           out.flush();
           out.close();
           System.out.println("文件生成成功！");
       } catch (Exception e) {
           // TODO Auto-generated catch block
           e.printStackTrace();
       }

       System.out
               .println("----------------------------------------读取生成的XML文档------------------------------------------");
       parseXmlFile("D:\\JavaCreateDocument.xml");
   }

   /**
   *
   * @TODO 读取解析Xml文件
   * @author 张宇佳
   * @created 2016年6月27日下午1:54:26
   * @version 1.0.0
   */
   public static void parseXmlFile() {
       String filePath = "D:\\Books.xml"; // Xml文件路径
       System.err.println("XML文档路径：" + filePath);
       SAXReader read = new SAXReader();// 获取一个文件读取器
       File file = new File(filePath); // 创建一个文件对象
       Document doc = null;
       try {
           // 设置解析XML文档时忽略DTD文件引用
           read.setFeature(
                   "http://apache.org/xml/features/nonvalidating/load-external-dtd",
                   false);
           // 获取xml文档对象
           doc = read.read(file);
       } catch (Exception e) {
           e.printStackTrace();
       }
       Element root = doc.getRootElement(); // 获取文档的根节点
       parseXml(root);
   }

   /**
   *
   * @TODO 读取解析Xml文件
   * @author 张宇佳
   * @created 2016年6月27日下午1:54:26
   * @version 1.0.0
   */
   public static void parseXmlFile(String filepath) {
       String filePath = "D:\\Books.xml"; // Xml文件路径
       if (filepath != null && !filepath.equals("")) {
           filePath = filepath;
       }
       System.err.println("XML文档路径：" + filePath);
       SAXReader read = new SAXReader();// 获取一个文件读取器
       File file = new File(filePath); // 创建一个文件对象
       Document doc = null;
       try {
           // 设置解析XML文档时忽略DTD文件引用
           read.setFeature(
                   "http://apache.org/xml/features/nonvalidating/load-external-dtd",
                   false);
           // 获取xml文档对象
           doc = read.read(file);
       } catch (Exception e) {
           e.printStackTrace();
       }
       System.out
               .println("---------------------------------------------文档内容------------------------------------------------");
       System.out.println(doc.asXML());
       // Element root = doc.getRootElement(); // 获取文档的根节点
       // parseXml(root);
   }

   /**
   *
   * @TODO 测试解析xml
   * @author 张宇佳
   * @created 2016年6月27日下午3:15:33
   * @param root
   * @version 1.0.0
   */
   public static void parseXml(Element root) {
       Element element = root.element("book"); // 获取root节点下的子节点
       Element elementTitle = element.element("title"); // 获取element节点下的title节点
       Element ele = getEleByAttr(root, "id", "b"); // 根据id=b 获取root节点下的子节点信息
       if (ele != null) { // 已经找到节点
           System.err.println("root节点下的具有id=b属性的节点名字:" + ele.getName()
                   + "\n循环子节点:");
           Iterator it = ele.elementIterator();
           while (it.hasNext()) {
               Element e = (Element) it.next();
               System.out.println("节点的名字:" + e.getName());
               System.out.println("节点中的文字:" + e.getTextTrim());
               System.out.println();
           }
           System.out.println("修改id=b属性的节点为id=b1并循环该遍历同级别节点");

       } else {
           System.err.println("未找到节点信息");
       }
       System.err
               .println("------------------------------------------------------------------------");

       String nodeText = elementTitle.getTextTrim(); // 获取节点的文字
       System.err.println("book节点的文字：" + nodeText);
       List rootChileNodeList = root.elements(); // 获取某个节点下的子节点集合
       Iterator nodesIter = rootChileNodeList.iterator(); // 迭代器
       while (nodesIter.hasNext()) { // 遍历
           Element node = (Element) nodesIter.next(); // 其中一个子节点
           String nodeTextStr = node.getTextTrim(); // 获取节点内的文本并去除两侧的空格
           System.err.println("根节点下的子节点包含的文本：" + nodeTextStr);
           String nodeName = node.getName(); // 获取节点的名字
           System.err.println("根节点下的子节点名字:" + nodeName);
           String nodeAllText = node.getStringValue(); // 获取节点下的所有String值
           System.err.println("根节点下的子节点中的所有String值:" + nodeAllText);
       }
   }

   /**
   *
   * @TODO 根据属性值获取节点
   * @author 张宇佳
   * @created 2016年6月27日下午2:44:26
   * @param node
   * 父节点
   * @param attrName
   * 属性名字
   * @param attrValue
   * 属性值
   * @return 找到的节点对象
   * @version 1.0.0
   */
   public static Element getEleByAttr(Element node, String attrName,
           String attrValue) {
       for (Iterator it = node.elementIterator(); it.hasNext();) { // 获取节点的子节点迭代对象
           Element element = (Element) it.next();
           Attribute attribute = element.attribute(attrName); // 根据属性的名字获取属性对象
                                                               // 值不为null时表示找到这个属性
                                                               // 否则为未找到这个属性
           if (attribute != null) {
               String value = attribute.getValue(); // 获取属性的值
               if (value != null && value.equals(attrValue)) {
                   return element;
               } else {
                   getEleByAttr(element, attrName, attrValue); // 当前节点列表中没有找到
                                                               // 继续深入子节点查找
               }
           }
       }
       return null;
   }

   /**
   *
   * @TODO 修改节点的属性值
   * @author 张宇佳
   * @created 2016年6月28日上午9:26:08
   * @param node
   * 要修改的元素节点
   * @param attrName
   * 节点属性的名字
   * @param attrValue
   * 修改后的节点属性值
   * @return 修改后的元素节点对象
   * @version 1.0.0
   */
   public static Element updateAttr(Element node, String attrName,
           String attrValue) {
       Iterator it = node.attributeIterator(); // 获取node节点的所有属性迭代器
       while (it.hasNext()) {
           Attribute attribute = (Attribute) it.next();
           String attrNameTempStr = attribute.getName(); // 获取该属性的名字
           if (attrNameTempStr != null && attrNameTempStr.equals(attrName)) { // 判断是否和要修改的属性名字一样
               attribute.setValue(attrValue); // 设置属性值
               return node;
           }
       }
       return null;
   }

   /***
   *
   * @TODO dom4j+Xpath 测试示例不带命名空间
   * @author 张宇佳
   * @created 2016年6月29日上午10:04:44
   * @version 1.0.0
   */
   public static void testXpath() {

}

   /**
   *
   * @TODO dom4j+Xpath 测试示例带命名空间
   * @author 张宇佳
   * @created 2016年6月29日上午10:05:25
   * @version 1.0.0
   */

   public static void testXpathAndNamespace() throws Exception {
       SAXReader reader = new SAXReader();
       String filePath = "D:\\applicationContext.xml";
       File file= new File(filePath);
       Document doc = reader.read(file);
       Element rootNode = doc.getRootElement();
       Iterator childNodeIter = rootNode.elementIterator();
       System.out.println("-------循环根节点下的子节点");
       while(childNodeIter.hasNext()){
           Element childNode = (Element) childNodeIter.next();
           String nodeNameStr = childNode.getName(); // 获取节点的名字
           String prefix = childNode.getNamespacePrefix(); // 获取节点的命名空间前缀
           String nodeTextStr = childNode.getTextTrim(); // 获取节点内的文字
           Iterator childNodePropIter = childNode.attributeIterator();
           System.out.println("节点的名字："+nodeNameStr+ (nodeTextStr.equals("")?"":",节点内的文字："+nodeTextStr)+ ",节点的属性：");
           System.out.println();
           while(childNodePropIter.hasNext()){
               Attribute attr = (Attribute) childNodePropIter.next();
               String attrName = attr.getName();
               String attrValue = attr.getValue();
               System.out.print(attrName+" = "+attrValue);
           }
           System.out.println();
       }
       System.out.println("-----------------------------------------------XPath获取节点---------------------------------------------------");
       Map nsMap = new HashMap();
       List<Namespace> nsList = rootNode.declaredNamespaces(); // 返回这个文档中声明的全部命名空间
       for (Namespace namespace : nsList) {
           String nsKeyStr = namespace.getPrefix(); // 得到命名空间的简写
           String nsValueStr = namespace.getURI(); // 得到命名空间的URI
           if(nsKeyStr.equals("")){
               nsKeyStr = "ns" ; // 当命名空间的简称为空时设置为ns
           }
           nsMap.put(nsKeyStr, nsValueStr);
       }
       reader.getDocumentFactory().setXPathNamespaceURIs(nsMap);
       Node propNode1 = rootNode.selectSingleNode("//context:component-scan");
       Node propNode = rootNode.selectSingleNode("//ns:property");
       String propNodeNameStr = propNode.getName();
       String propNodeTextStr = propNode.getText();
       Iterator propNodeAttrItor = ((Element)propNode).attributeIterator();
       System.out.println("属性：");
       while(propNodeAttrItor.hasNext()){
           Attribute propNodeAttr = (Attribute) propNodeAttrItor.next();
           String attrName = propNodeAttr.getName();
           String attrValue = propNodeAttr.getValue();
           System.out.print(attrName + " = "+ attrValue + ",");
       }
       System.out.println("");

   }

   public static void main(String[] args) throws Exception{
       parseXmlFile();
       parseXmlMsg();
       createDocument();
       testXpathAndNamespace();
   }

}

示例代码：

360云盘 https://yunpan.cn/ckrhe9VDjXy6p 访问密码：f9d5

转载于:https://my.oschina.net/zhangxiaoc/blog/705313