java或者Android开发中进程使用到xml解析, 本文讲述Xpath解析方式用法.
本文通过代码的方式呈现,不讲理论了. 每个函数和关键代码块都有注释,请看官们注意看即可
类解释:
XpathParser: Xpath核心解析类,通过该类可以实现xml,可以通过各种方式获取节点和条件查询
XpathUtils: xpath解析工具类, 主要解析节点和节点列表信息为常用的对象NadeInfo
NodeInfo: 节点信息类
XpathTest: java版本的测试类
XmlPrinter: 信息打印器
首先看java测试程序,注意看代码注释:
package com.hulk.xpath;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* Xpath测试程序:
* @author hulk
*
*/
public class XpathTest {
private final static String TAG = "XpathTest";
private final static boolean DEBUG = true;
static XpathParser sXpathParser;
public static void main(String[] args) throws Exception {
XmlPrinter.print(TAG, "Test XpathParser");
//执行xml解析
log("执行xml解析");
sXpathParser = new XpathParser();
//sXpathParser.parseXml(XML.RSS_XML.trim());
String filePath = "rss_demo.xml";
sXpathParser.parseFile(filePath);
Node node;
//获取根节点
log("获取根节点");
node = sXpathParser.getRootNode();
XmlPrinter.print("Root", node, false);
//通过名称获取根节点
log("通过名称获取根节点");
node = sXpathParser.getRootNode("rss");
XmlPrinter.print("Root rss", node, false);
//解析节点列表的节点和器子节点的全部信息
log("解析节点列表的节点和器子节点的全部信息");
Map<String, String> attrMap = XpathUtils.parseNodeAttributes(node);
XmlPrinter.log("Node attr map", attrMap.toString());
NodeList nodeList = null;
//打印根节点下的所有元素节点
log("打印根节点下的所有元素节点");
Set<Node> set = sXpathParser.getRootChildNodes();
XmlPrinter.print("RootChildNodes", set, false);
///rss/channel/*下所有节点,
log("获取rss/channel/*下所有节点");
nodeList = sXpathParser.getExpressionNodeList("/rss/channel/*");
XmlPrinter.print("//rss/channel/* ExpressionNodeList", nodeList, false);
//解析节点列表的节点和器子节点的全部信息
log("解析节点列表的节点和器子节点的全部信息");
List<NodeInfo> nodeInfoList = XpathUtils.parseNodeList(nodeList);
XmlPrinter.print("//rss/channel/* nodeInfoList", nodeInfoList);
//通过计算表达式获取元素方式
log("通过计算表达式获取元素方式");
//获取第一本书信息
log("获取第一本书信息");
String expression = "//bookstore/book[1]";
nodeList = sXpathParser.getExpressionNodeList(expression);
XmlPrinter.print(expression, nodeList, true);
//获取第一本书标题
log("获取第一本书标题");
expression = "//bookstore/book[1]/title";
nodeList = sXpathParser.getExpressionNodeList(expression);
XmlPrinter.print(expression, nodeList, true);
//价格条件筛选
log("价格条件筛选");
expression = "//bookstore/book[price>35.00]/title";
nodeList = sXpathParser.getExpressionNodeList(expression);
XmlPrinter.print(expression, nodeList, true);
}
private static void log(String msg) {
if(DEBUG) {
XmlPrinter.log(TAG, msg);
}
}
}
直接上代码, 首先看解析类:
package com.hulk.xpath;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
/**
* Xpth java 解析器
* <p>ref: https://my.oschina.net/cloudcoder/bprint/223359
* <p> eg rss_demo.xml:
*
* @Time: 2019-04-28 17:56
* @author: zhanghao
*/
public class XpathParser {
public static final String TAG = "XpathParser";
public static boolean DEBUG = false;
private Document doc;
private XPath xpath;
/**
* Xpath构造器
*/
public XpathParser() {
//do something
}
/**
* Xpath构造器
* @param xmlText
* @throws Exception
*/
public void XpathParser(String xmlText) throws Exception {
parseXml(xmlText);
}
/**
* Xpath构造器
* @param input 输入流:
* <p> eg: new ByteArrayInputStream(xmlText.getBytes()) or new FileInputStream(new File(xmlFilePath))
* @throws Exception
*/
public void XpathParser(InputStream input) throws Exception {
doParse(input);
}
/**
* 初始化Document、XPath对象
* @param xmlText XMl文本
* @throws Exception
*/
public void parseXml(String xmlText) throws Exception {
doParse(new ByteArrayInputStream(xmlText.getBytes()));
}
/**
* 初始化Document、XPath对象
* @param xmlFilePath "demo.xml"
* @throws Exception
*/
public void parseFile(String xmlFilePath) throws Exception {
doParse(new FileInputStream(new File(xmlFilePath)));
}
/**
* 执行解析操作
* 初始化Document、XPath对象
* @param is 输入流
* @throws ParserConfigurationException
* @throws IOException
* @throws SAXException
* @throws Exception
*/
private void doParse(InputStream is) throws ParserConfigurationException, SAXException, IOException {
// 创建Document对象
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setValidating(false);
DocumentBuilder db = dbf.newDocumentBuilder();
doc = db.parse(is);
// 创建XPath对象
XPathFactory factory = XPathFactory.newInstance();
xpath = factory.newXPath();
}
/**
* 获取根元素
* @return
* @throws XPathExpressionException
*/
public Node getRootNode() throws XPathExpressionException {
//根节点的名称, eg /* or /rss (rss为实际文本根节点名称)
Node node = (Node) xpath.evaluate("/*", doc, XPathConstants.NODE);
return node;
}
/**
* 获取根元素, " /*" or "/rss" (rss为实际文本根节点名称)
* xml文件只能有一个根元素,如果有多个解析会出现如下异常:
* [Fatal Error] :40:2: The markup in the document following the root element must be well-formed.
* @return
* @throws XPathExpressionException
*/
public Node getRootNode(String rootNodeName) throws XPathExpressionException {
Node node = (Node) xpath.evaluate("/" + rootNodeName, doc, XPathConstants.NODE);
return node;
}
/**
* 根据自定义表达式,获取符合条件的所有元素
* <p>通用节点查询接口函数
* @param expression
* <p>eg (1) "/rss/channel/*": 获取/rss/channel/下面的所有子元素;
* <p> (2) "//bookstore/book[price>35.00]/title" 表示获取 获取所有大于指定价格的书箱 (表达式前面必须为//)
* @return
* @throws XPathExpressionException
*/
public NodeList getExpressionNodeList(String expression) throws XPathExpressionException {
NodeList nodeList = (NodeList) xpath.evaluate(expression, doc, XPathConstants.NODESET);
return nodeList;
}
/**
* 获取指定名称的部分元素
eg 只获取元素名称为title的元素
* @param name 节点名称 eg title
* @throws XPathExpressionException
*/
public NodeList getPartNodeList(String name) throws XPathExpressionException {
String expression = "//*[name() = '" + name + "']";
NodeList nodeList = (NodeList) xpath.evaluate(expression, doc, XPathConstants.NODESET);
return nodeList;
}
/**
* 获取包含子节点的元素
* @return
* @throws XPathExpressionException
*/
public NodeList haveChildNodes() throws XPathExpressionException {
NodeList nodeList = (NodeList) xpath.evaluate("//*[*]", doc, XPathConstants.NODESET);
return nodeList;
}
/**
* 获取指定层级的元素
* @param levels eg, 3 (获取第三层的全部元素)
* @return
* @throws XPathExpressionException
*/
public NodeList getLevelElements(int levels) throws XPathExpressionException, IllegalArgumentException {
if (levels <= 0) {
throw new IllegalArgumentException("Invalid levels: " + levels);
}
//eg: "/*/*/*/*"
StringBuffer expression = new StringBuffer();
for (int i = 0; i < levels; i++) {
expression.append("/*");
}
NodeList nodeList = (NodeList) xpath.evaluate(expression.toString(), doc, XPathConstants.NODESET);
return nodeList;
}
/**
* 获取根节点下所有的元素节点
* @return
*/
public Set<Node> getRootChildNodes() {
if (doc == null) {
return null;
}
Set<Node> set = new HashSet<>();
NodeList nodeList = doc.getDocumentElement().getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
int nodeType = node.getNodeType();
if (XpathUtils.isElementNode(nodeType)) {
set.add(node);
} else {
//log("getRootChildNodes: Ignored nodeType=" + nodeType + ", Name=" + node.getNodeName());
}
}
return set;
}
private static void log(String msg) {
if(DEBUG) {
XmlPrinter.log(TAG, msg);
}
}
public static void setDebug(boolean debug) {
DEBUG = debug;
}
}
工具套件类:
package com.hulk.xpath;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class XpathUtils {
private static final String TAG = "XpathUtils";
private static boolean DEBUG = false;
/**
* 转换节点的名称和值
* @param nodeList
* @return
*/
public static NodeInfo parseNode(Node node) {
if (node == null) {
return null;
}
int type = node.getNodeType();
String name = node.getNodeName();
if (!isElementNode(type)) {
//不是元素级别的节点忽略掉
log("parseNode: Ignored not element node=" + name + ",type=" + type);
return null;
}
if (!node.hasChildNodes()) {
log("parseNode: Ignored has no Child node=" + name);
return null;
}
log("parseNode: Start parse node name: " + name);
NodeInfo info = new NodeInfo(name);
info.type = type;
NamedNodeMap attrsMap = node.getAttributes();
if(attrsMap != null && attrsMap.getLength() > 0) {
info.attrs = parseAttributes(attrsMap);
log("parseNode: parsed attrs=" + info.attrs);
}
NodeList childNodes = node.getChildNodes();
int childLength = childNodes.getLength();
if(childLength == 1) {
//注意: #text 标签也是node的孩子,
//如 <title>Java Examples</title> 中的"Java Examples"
String content = node.getTextContent();
info.content = content;
log("parseNode: parsed content=" + content);
} else {
if(childLength > 1) {
//多个孩子的节点,执行node.getTextContent();返回子节点中所有的#text
//使用递归再次解析, eg:
/*
<item language="Java">
<title>Java Examples</title>
<link>http://www.javacodegeeks.com/</link>
</item>
*/
log("parseNode: recursion to parse child nodes for node name: " + name);
List<NodeInfo> childs = parseNodeList(childNodes);
info.childs = childs;
}
}
return info;
}
/**
* 解析节点属性
* @param attrNodeMap
* @return
*/
public static Map<String, String> parseAttributes(NamedNodeMap attrNodeMap) {
if(attrNodeMap == null) {
log("parseAttributes: Ignored attrNodeMap is null");
return null;
}
Map<String, String> attrs = new HashMap<String, String>();
int length = attrNodeMap.getLength();
for(int i = 0; i < length; i++) {
Node node = attrNodeMap.item(i);
String name = node.getNodeName();
String value = node.getNodeValue();
attrs.put(name, value);
}
return attrs;
}
/**
* 解析节点属性
* @param node
* @return
*/
public static Map<String, String> parseNodeAttributes(Node node) {
if(node == null) {
return null;
}
NamedNodeMap attrsMap = node.getAttributes();
return parseAttributes(attrsMap);
}
/**
* 转换节点列表的名称和值
* @param nodeList
* @return
*/
public static List<NodeInfo> parseNodeList(NodeList nodeList) {
List<NodeInfo> list = new ArrayList<NodeInfo>();
int length = nodeList.getLength();
log("parseNodeList: length=" + length);
for (int i = 0; i < length; i++) {
Node node = nodeList.item(i);
if (node == null) {
continue;
}
NodeInfo info = parseNode(node);
if(info != null) {
list.add(info);
}
}
return list;
}
/**
* 转换节点列表的名称和值
* @param nodeList
* @return
*/
public static List<NodeInfo> parseNodeList(List<Node> nodeList) {
List<NodeInfo> list = new ArrayList<NodeInfo>();
int length = nodeList.size();
log("parseNodeList: length=" + length);
for (int i = 0; i < length; i++) {
Node node = nodeList.get(i);
if (node == null) {
continue;
}
NodeInfo info = parseNode(node);
if(info != null) {
list.add(info);
}
}
return list;
}
/**
* 是否为元素节点:<title>Java Examples</title>
* @param node
* @return
*/
public static boolean isElementNode(Node node) {
if(node== null) {
return false;
}
int nodeType = node.getNodeType();
return isElementNode(nodeType);
}
/**
* 是否为元素节点:<title>Java Examples</title>
* @param nodeType
* @return
*/
public static boolean isElementNode(int nodeType) {
return nodeType == Node.ELEMENT_NODE;
}
/**
* 是否为属性节点
* @param nodeType
* @return
*/
public static boolean isAttributeNode(int nodeType) {
return nodeType == Node.ATTRIBUTE_NODE;
}
/**
* 是否为文本内容节点
* @param nodeType
* @return
*/
public static boolean isTextNode(int nodeType) {
return nodeType == Node.TEXT_NODE;
}
private static void log(String msg) {
if(DEBUG) {
XmlPrinter.log(TAG, msg);
}
}
public static void setDebug(boolean debug) {
DEBUG = debug;
}
}
xml的Xpath解析不复杂,找到好的解析思路和工具类就很方便了.
具体代码请下载github:git@github.com:zhanghulk/HulkXPath.git
或者下载csdn资源: https://download.csdn.net/download/zhanghao_Hulk/12895617
有任何问题请在下面留言交流