Java版本Xpath解析Xml文件文本用法说明

最新推荐文章于 2023-09-03 20:48:45 发布

云水-禅心

最新推荐文章于 2023-09-03 20:48:45 发布

阅读量345

点赞数

分类专栏： Android java 文章标签： java android

本文链接：https://blog.csdn.net/zhanghao_Hulk/article/details/108854962

版权

Android 同时被 2 个专栏收录

87 篇文章 2 订阅

订阅专栏

java

34 篇文章 1 订阅

订阅专栏

java或者Android开发中进程使用到xml解析, 本文讲述Xpath解析方式用法.

本文通过代码的方式呈现,不讲理论了. 每个函数和关键代码块都有注释,请看官们注意看即可

类解释:

XpathParser: Xpath核心解析类,通过该类可以实现xml,可以通过各种方式获取节点和条件查询

XpathUtils: xpath解析工具类, 主要解析节点和节点列表信息为常用的对象NadeInfo

NodeInfo: 节点信息类

XpathTest: java版本的测试类

XmlPrinter: 信息打印器

首先看java测试程序,注意看代码注释:

package com.hulk.xpath;

import java.util.List;
import java.util.Map;
import java.util.Set;

import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * Xpath测试程序:
 * @author hulk
 *
 */
public class XpathTest {

	private final static String TAG = "XpathTest";
	private final static boolean DEBUG = true;
	static XpathParser sXpathParser;
	
	public static void main(String[] args) throws Exception {
		XmlPrinter.print(TAG, "Test XpathParser");
		//执行xml解析
		log("执行xml解析");
		sXpathParser = new XpathParser();
		//sXpathParser.parseXml(XML.RSS_XML.trim());
		String  filePath = "rss_demo.xml";
		sXpathParser.parseFile(filePath);
		Node node;
		
		//获取根节点
		log("获取根节点");
		node = sXpathParser.getRootNode();
		XmlPrinter.print("Root", node, false);
		//通过名称获取根节点
		log("通过名称获取根节点");
		node = sXpathParser.getRootNode("rss");
		XmlPrinter.print("Root rss", node, false);
		
		//解析节点列表的节点和器子节点的全部信息
		log("解析节点列表的节点和器子节点的全部信息");
		Map<String, String> attrMap = XpathUtils.parseNodeAttributes(node);
		XmlPrinter.log("Node attr map", attrMap.toString());
		
		NodeList nodeList = null;
		//打印根节点下的所有元素节点
		log("打印根节点下的所有元素节点");
		Set<Node> set = sXpathParser.getRootChildNodes();
		XmlPrinter.print("RootChildNodes", set, false);
		
		///rss/channel/*下所有节点, 
		log("获取rss/channel/*下所有节点");
		nodeList = sXpathParser.getExpressionNodeList("/rss/channel/*");
		XmlPrinter.print("//rss/channel/* ExpressionNodeList", nodeList, false);
		
		//解析节点列表的节点和器子节点的全部信息
		log("解析节点列表的节点和器子节点的全部信息");
		List<NodeInfo> nodeInfoList = XpathUtils.parseNodeList(nodeList);
		XmlPrinter.print("//rss/channel/* nodeInfoList", nodeInfoList);
		
		
		//通过计算表达式获取元素方式
		log("通过计算表达式获取元素方式");
		//获取第一本书信息
		log("获取第一本书信息");
		String expression = "//bookstore/book[1]";
		nodeList = sXpathParser.getExpressionNodeList(expression);
		XmlPrinter.print(expression, nodeList, true);
				
		//获取第一本书标题
		log("获取第一本书标题");
		expression = "//bookstore/book[1]/title";
		nodeList = sXpathParser.getExpressionNodeList(expression);
		XmlPrinter.print(expression, nodeList, true);
		
		//价格条件筛选
		log("价格条件筛选");
		expression = "//bookstore/book[price>35.00]/title";
		nodeList = sXpathParser.getExpressionNodeList(expression);
		XmlPrinter.print(expression, nodeList, true);
	}
	
	private static void log(String msg) {
		if(DEBUG) {
			XmlPrinter.log(TAG, msg);
		}
    }
}

直接上代码, 首先看解析类:

package com.hulk.xpath;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

/**
 * Xpth java 解析器
 * <p>ref: https://my.oschina.net/cloudcoder/bprint/223359
 * <p> eg rss_demo.xml:
 *
 * @Time: 2019-04-28 17:56
 * @author: zhanghao
 */
public class XpathParser {

	public static final String TAG = "XpathParser";
	public static boolean DEBUG = false;
	private Document doc;
    private XPath xpath;

    /**
     * Xpath构造器
     */
    public XpathParser() {
    	//do something
    }
    
    /**
     * Xpath构造器
     * @param xmlText
     * @throws Exception
     */
    public void XpathParser(String xmlText) throws Exception {
    	parseXml(xmlText);
    }
    
    /**
     * Xpath构造器
     * @param input 输入流:
     * <p> eg:  new ByteArrayInputStream(xmlText.getBytes()) or new FileInputStream(new File(xmlFilePath))
     * @throws Exception
     */
    public void XpathParser(InputStream input) throws Exception {
    	doParse(input);
    }
    
    /**
     * 初始化Document、XPath对象
     * @param xmlText XMl文本
     * @throws Exception
     */
    public void parseXml(String xmlText) throws Exception {
        doParse(new ByteArrayInputStream(xmlText.getBytes()));
    }

    /**
     * 初始化Document、XPath对象
     * @param xmlFilePath "demo.xml"
     * @throws Exception
     */
    public void parseFile(String xmlFilePath) throws Exception {
        doParse(new FileInputStream(new File(xmlFilePath)));
    }

    /**
     * 执行解析操作
     * 初始化Document、XPath对象
     * @param is 输入流
     * @throws ParserConfigurationException 
     * @throws IOException 
     * @throws SAXException 
     * @throws Exception
     */
    private void doParse(InputStream is) throws ParserConfigurationException, SAXException, IOException {
        // 创建Document对象
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setValidating(false);
        DocumentBuilder db = dbf.newDocumentBuilder();
        doc = db.parse(is);

        // 创建XPath对象
        XPathFactory factory = XPathFactory.newInstance();
        xpath = factory.newXPath();
    }

    /**
     * 获取根元素
     * @return
     * @throws XPathExpressionException
     */
    public Node getRootNode() throws XPathExpressionException {
        //根节点的名称, eg /* or /rss (rss为实际文本根节点名称)
        Node node = (Node) xpath.evaluate("/*", doc, XPathConstants.NODE);
        return node;
    }

    /**
     * 获取根元素, " /*" or "/rss" (rss为实际文本根节点名称)
     * xml文件只能有一个根元素,如果有多个解析会出现如下异常:
     * [Fatal Error] :40:2: The markup in the document following the root element must be well-formed.
     * @return
     * @throws XPathExpressionException
     */
    public Node getRootNode(String rootNodeName) throws XPathExpressionException {
        Node node = (Node) xpath.evaluate("/" + rootNodeName, doc, XPathConstants.NODE);
        return node;
    }

    /**
     * 根据自定义表达式，获取符合条件的所有元素
     * <p>通用节点查询接口函数
     * @param expression  
     * <p>eg (1) "/rss/channel/*": 获取/rss/channel/下面的所有子元素;
     * <p>   (2) "//bookstore/book[price>35.00]/title" 表示获取 获取所有大于指定价格的书箱 (表达式前面必须为//)
     * @return
     * @throws XPathExpressionException
     */
    public NodeList getExpressionNodeList(String expression) throws XPathExpressionException {
        NodeList nodeList = (NodeList) xpath.evaluate(expression, doc, XPathConstants.NODESET);
        return nodeList;
    }

    /**
     * 获取指定名称的部分元素
       eg 只获取元素名称为title的元素
     * @param name 节点名称 eg title
     * @throws XPathExpressionException
     */
    public NodeList getPartNodeList(String name) throws XPathExpressionException {
        String expression = "//*[name() = '" + name + "']";
        NodeList nodeList = (NodeList) xpath.evaluate(expression, doc, XPathConstants.NODESET);
        return nodeList;
    }

    /**
     * 获取包含子节点的元素
     * @return
     * @throws XPathExpressionException
     */
    public NodeList haveChildNodes() throws XPathExpressionException {
        NodeList nodeList = (NodeList) xpath.evaluate("//*[*]", doc, XPathConstants.NODESET);
        return nodeList;
    }

    /**
     * 获取指定层级的元素
     * @param levels eg, 3 (获取第三层的全部元素)
     * @return
     * @throws XPathExpressionException
     */
    public NodeList getLevelElements(int levels) throws XPathExpressionException, IllegalArgumentException {
        if (levels <= 0) {
            throw new IllegalArgumentException("Invalid levels: " + levels);
        }
        //eg: "/*/*/*/*"
        StringBuffer expression = new StringBuffer();
        for (int i = 0; i < levels; i++) {
            expression.append("/*");
        }
        NodeList nodeList = (NodeList) xpath.evaluate(expression.toString(), doc, XPathConstants.NODESET);
        return nodeList;
    }
    
    /**
     * 获取根节点下所有的元素节点
     * @return
     */
    public Set<Node> getRootChildNodes() {
        if (doc == null) {
            return null;
        }
        Set<Node> set = new HashSet<>();
        NodeList nodeList = doc.getDocumentElement().getChildNodes();
		for (int i = 0; i < nodeList.getLength(); i++) {
			Node node = nodeList.item(i);
			int nodeType = node.getNodeType();
			if (XpathUtils.isElementNode(nodeType)) {
				set.add(node);
			} else {
				//log("getRootChildNodes: Ignored nodeType=" + nodeType + ", Name=" + node.getNodeName());
			}
		}
        return set;
    }
    
    private static void log(String msg) {
    	if(DEBUG) {
    		XmlPrinter.log(TAG, msg);
    	}
    }
    
	public static void setDebug(boolean debug) {
		DEBUG = debug;
	}
}

工具套件类:

package com.hulk.xpath;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class XpathUtils {

	private static final String TAG = "XpathUtils";
	
	private static boolean DEBUG = false;

	/**
	 * 转换节点的名称和值
	 * @param nodeList
	 * @return
	 */
	public static NodeInfo parseNode(Node node) {
        if (node == null) {
            return null;
        }
        int type = node.getNodeType();
        String name = node.getNodeName();
        if (!isElementNode(type)) {
        	//不是元素级别的节点忽略掉
        	log("parseNode: Ignored not element node=" + name + ",type=" + type);
        	return null;
        }
        if (!node.hasChildNodes()) {
        	log("parseNode: Ignored has no Child node=" + name);
        	return null;
        }
        log("parseNode: Start parse node name: " + name);
        NodeInfo info = new NodeInfo(name);
        info.type = type;
        NamedNodeMap attrsMap = node.getAttributes();
        if(attrsMap != null && attrsMap.getLength() > 0) {
        	info.attrs = parseAttributes(attrsMap);
        	log("parseNode: parsed attrs=" + info.attrs);
        }
        NodeList childNodes = node.getChildNodes();
    	int childLength = childNodes.getLength();
    	if(childLength == 1) {
    		//注意: #text 标签也是node的孩子,
            //如 <title>Java Examples</title> 中的"Java Examples"
    		String content = node.getTextContent();
    		info.content = content;
    		log("parseNode: parsed content=" + content);
    	} else {
    		if(childLength > 1) {
    			//多个孩子的节点,执行node.getTextContent();返回子节点中所有的#text
    			//使用递归再次解析, eg:
    			/*
    			<item language="Java">
    				<title>Java Examples</title>
    				<link>http://www.javacodegeeks.com/</link>
    			</item>
    			*/
        		log("parseNode: recursion to parse child nodes for node name: " + name);
        		List<NodeInfo> childs = parseNodeList(childNodes);
        		info.childs = childs;
        	}
    	}
        return info;
    }
	
	/**
	 * 解析节点属性
	 * @param attrNodeMap
	 * @return
	 */
	public static Map<String, String> parseAttributes(NamedNodeMap attrNodeMap) {
		if(attrNodeMap == null) {
			log("parseAttributes: Ignored attrNodeMap is null");
			return null;
		}
		Map<String, String> attrs = new HashMap<String, String>();
		int length = attrNodeMap.getLength();
		for(int i = 0; i < length; i++) {
			Node node = attrNodeMap.item(i);
			String name = node.getNodeName();
			String value = node.getNodeValue();
			attrs.put(name, value);
		}
		return attrs;
	}
	
	/**
	 *  解析节点属性
	 * @param node
	 * @return
	 */
	public static Map<String, String> parseNodeAttributes(Node node) {
		if(node == null) {
			return null;
		}
		NamedNodeMap attrsMap = node.getAttributes();
		return parseAttributes(attrsMap);
	}
	
	/**
	 * 转换节点列表的名称和值
	 * @param nodeList
	 * @return
	 */
	public static List<NodeInfo> parseNodeList(NodeList nodeList) {
		List<NodeInfo> list = new ArrayList<NodeInfo>(); 
		int length = nodeList.getLength();
		log("parseNodeList: length=" + length);
        for (int i = 0; i < length; i++) {
            Node node = nodeList.item(i);
            if (node == null) {
                continue;
            }
            NodeInfo info = parseNode(node);
            if(info != null) {
            	list.add(info);
            }
        }
        return list;
    }
	
	/**
	 * 转换节点列表的名称和值
	 * @param nodeList
	 * @return
	 */
	public static List<NodeInfo> parseNodeList(List<Node> nodeList) {
		List<NodeInfo> list = new ArrayList<NodeInfo>(); 
		int length = nodeList.size();
		log("parseNodeList: length=" + length);
        for (int i = 0; i < length; i++) {
            Node node = nodeList.get(i);
            if (node == null) {
                continue;
            }
            NodeInfo info = parseNode(node);
            if(info != null) {
            	list.add(info);
            }
        }
        return list;
    }
	
	/**
	 * 是否为元素节点:<title>Java Examples</title>
	 * @param node
	 * @return
	 */
	public static boolean isElementNode(Node node) {
		if(node== null) {
			return false;
		}
		int nodeType = node.getNodeType();
		return isElementNode(nodeType);
	}
	
	/**
	 * 是否为元素节点:<title>Java Examples</title>
	 * @param nodeType
	 * @return
	 */
	public static boolean isElementNode(int nodeType) {
		return nodeType == Node.ELEMENT_NODE;
	}
	
	/**
	 * 是否为属性节点
	 * @param nodeType
	 * @return
	 */
	public static boolean isAttributeNode(int nodeType) {
		return nodeType == Node.ATTRIBUTE_NODE;
	}
	
	/**
	 * 是否为文本内容节点
	 * @param nodeType
	 * @return
	 */
	public static boolean isTextNode(int nodeType) {
		return nodeType == Node.TEXT_NODE;
	}
	
	private static void log(String msg) {
		if(DEBUG) {
			XmlPrinter.log(TAG, msg);
		}
    }
	
	public static void setDebug(boolean debug) {
		DEBUG = debug;
	}
}

xml的Xpath解析不复杂,找到好的解析思路和工具类就很方便了.

具体代码请下载github:git@github.com:zhanghulk/HulkXPath.git

或者下载csdn资源: https://download.csdn.net/download/zhanghao_Hulk/12895617

有任何问题请在下面留言交流

云水-禅心

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Java版本Xpath解析Xml文件文本用法说明

java或者Android开发中进程使用到xml解析, 本文讲述Xpath解析方式用法.本文通过代码的方式呈现,不讲理论了. 每个函数和关键代码块都有注释,请看官们注意看即可类解释:XpathParser: Xpath核心解析类,通过该类可以实现xml,可以通过各种方式获取节点和条件查询XpathUtils: xpath解析工具类, 主要解析节点和节点列表信息为常用的对象NadeInfoNodeInfo: 节点信息类XpathTest: java版本的测试类XmlPrinter.
复制链接

扫一扫