Java-XPath(code)

/*
* Created on 2005/01/24
*
* Introduce XPath
*/
package org.brunt.xml.xpath;

import java.io.StringBufferInputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.xpath.XPathAPI;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.NodeIterator;
import org.xml.sax.InputSource;
/**
* @author ht
*
* Test XPath
*/
public class TestXPath {

public static final String description =
" http://www.zvon.org/xxl/XPathTutorial/General_chi/examples.html";
public static final String XPATH[] = {
//example1: start with '/' express a absoulte path
"/AAA", "/AAA/XXX/DDD", "/AAA/XXX/DDDNO",

//example2: start with '//' express all element
"//BBB", "//BBB/BBB", "//BBB/BBBNO",

//example3: '*' express all
"/AAA/XXX/DDD/*", "//DDD/*",

//example4: '[]' descript position
"/AAA/XXX/DDD/BBB[1]",
"/AAA/XXX/DDD/BBB[2]",
"/AAA/XXX/DDD/BBB[last()]", //last()


//example5: '@' descript attribute
"//@name", //"//@id",
"/AAA/@id",
"//DDD/@name",
"//DDD/@color",
"//BBB[ @*]",
"//BBB[not( @*)]",//not()


//example6:
"//BBB[ @color='yellow']",
"//BBB[ @color='red']",
"//BBB[normalize-space( @color)='red']",//normalize-space()


//example7: count()
"//*[count(BBB)=3]", //count()
"//*[count(*)=1]", //*

//example8: name() starts-with() contains()
"//*[name()='BBB']", //name()
"//*[starts-with(name(),'D')]", //starts-with()
"//*[contains(name(),'E')]", //contains()

//example9:
"//*[string-length(name()) = 5]", //string-length()
"//*[string-length(name()) < 5]",

//example10: connect with some path by '|'
"//BBB[ @id='b1'] | //EEE | //FFF",

//example11:
"/child::AAA", //child::
"/AAA", // /child::AAA == AAA

//example12:
"/AAA/XXX/DDD/descendant::*", //descendant::
"//DDD/descendant::*",

//example13:
"//BBB/parent::*", //parent::

//example14:
"/AAA/XXX/DDD/BBB/ancestor::*", //ancestor::
"/AAA/XXX/DDD/BBB/ancestor::XXX",

//example15:
"/AAA/XXX/following-sibling::*", //following-sibling::

//example16:
"/AAA/CCC/preceding-sibling::*", //preceding-sibling::

//example17:
"/AAA/XXX/following::*", //following::

//example18:
"/AAA/XXX/preceding::*", //preceding::

//example19:
"/AAA/XXX/descendant-or-self::*", //descendant-or-self::

//example20:
"/AAA/XXX/DDD/EEE/ancestor-or-self::*", ///ancestor-or-self::

//example21:
"//BBB/self::*",
"//EEE/ancestor::* | //EEE/descendant::* | //EEE/following::* | //EEE/preceding::* | //EEE/self::*",

//example22:
"//BBB[position() mod 2 = 0 ]", //position() mod
"//BBB[ position() = floor(last() div 2 + 0.5) or position() = ceiling(last() div 2 + 0.5) ]",
};

public static final String DEFAULT_XML_STRING =
" <AAA id=/"a1/"> /n"
+ " <XXX id=/"x1/"> /n"
+ " <DDD id=/"d1/" name=/"d1name/"> /n"
+ " <BBB id=/"b1/"/> /n"
+ " <BBB id=/"b2/"/> /n"
+ " <BBB/> /n"
+ " <EEE id=/"e1/"/> /n"
+ " <FFF id=/"f1/"/> /n"
+ " </DDD> /n"
+ " <DEE id=/"dee1/">hello /n"
+ " </DEE> /n"
+ " <TEST1 id=/"test1/"/> /n"
+ " <TEST2 id=/"test2/"/> /n"
+ " </XXX> /n"
+ " <CCC id=/"c1/"> /n"
+ " <DDD id=/"d2/" name=/"d2name/" color=/"red/"> /n"
+ " <BBB id=/"b3/"/> /n"
+ " <BBB id=/"b4/"/> /n"
+ " <BBB id=/"b10/" color=/"yellow/"/> /n"
+ " <BBB id=/"b11/" color=/"red/"/> /n"
+ " <BBB id=/"b12/" color=/" red /"/> /n"
+ " <EEE id=/"e2/" name=/"e2name/"/> /n"
+ " <FFF id=/"f2/"/> /n"
+ " </DDD> /n"
+ " </CCC> /n"
+ " <CCC> /n"
+ " <BBB id=/"b5/"> /n"
+ " <BBB id=/"b6/"> /n"
+ " <BBB id=/"b7/"/> /n"
+ " </BBB> /n"
+ " </BBB> /n"
+ " </CCC> /n"
+ " </AAA> /n ";

public static void printDefaultXml() {
System.out.println(DEFAULT_XML_STRING);
}

public static InputSource getDefaultInputSource() {
return new InputSource(new StringBufferInputStream(DEFAULT_XML_STRING));
}

public static Document getDocument(InputSource is) throws Exception {
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
dfactory.setNamespaceAware(true);
return dfactory.newDocumentBuilder().parse(is);
}

private static void printNodeIterator(NodeIterator ni) {
Node node = null;
while ((node = ni.nextNode()) != null) {
if (node.getNodeType() == Node.ELEMENT_NODE) {

System.out.print("/t[NodeName]:" + node.getNodeName());
System.out.print("/t[NodeValue]:" + node.getNodeValue());
printNodeAttrId(node);

} else if (node.getNodeType() == Node.ATTRIBUTE_NODE) {

System.out.println(
"/t/t[ATTR]:"
+ node.getNodeName()
+ "="
+ node.getNodeValue());
}

}
}
private static void printNodeAttrId(Node node) {
String id = "null";
if (node.hasAttributes()) {
Node attrNode = node.getAttributes().getNamedItem("id");

if (attrNode != null)
id = attrNode.getNodeValue();
}
System.out.println("/t/t[NodeId]:" + id);
}

public void run() throws Exception {
run(null);
}

public void run(InputSource is) throws Exception {
Document doc = getDocument(is == null ? getDefaultInputSource() : is);
for (int i = 0; i < XPATH.length; i++)
run(XPATH[i], doc);
}

public void run(String xpath, Document doc) throws Exception {
System.out.println("[XPATH]:" + xpath);

NodeIterator nl = XPathAPI.selectNodeIterator(doc, xpath);
printNodeIterator(nl);

System.out.println("/n");
}

public static void main(String[] args) throws Exception {

printDefaultXml();

TestXPath txpath = new TestXPath();
txpath.run();
}
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
XPath是一种XML文档的定位方法,也可以用于HTML文档的定位,Selenium中也可以使用XPath来定位网页元素。下面是使用XPath定位元素的详细步骤: 1. 打开浏览器并访问网页: ```python from selenium import webdriver driver = webdriver.Chrome() driver.get("http://www.example.com") ``` 2. 使用XPath定位元素: ```python # 通过元素id定位 element = driver.find_element_by_xpath('//*[@id="element_id"]') # 通过元素name定位 element = driver.find_element_by_xpath('//*[@name="element_name"]') # 通过元素class定位 element = driver.find_element_by_xpath('//*[@class="element_class"]') # 通过元素标签名定位 element = driver.find_element_by_xpath('//tag_name') # 通过元素属性定位 element = driver.find_element_by_xpath('//*[@attribute_name="attribute_value"]') # 通过元素文本内容定位 element = driver.find_element_by_xpath('//*[text()="text_content"]') # 通过元素部分文本内容定位 element = driver.find_element_by_xpath('//*[contains(text(), "text_content")]') ``` 3. 对元素进行操作: ```python # 输入文本 element.send_keys("text_input") # 点击元素 element.click() # 获取元素文本 print(element.text) # 获取元素属性值 print(element.get_attribute("attribute_name")) ``` 注意事项: - XPath定位需要用到浏览器的开发者工具,在开发者工具中可以查看元素的XPath路径。 - XPath路径中的引号需要用不同类型的引号包裹,例如在单引号内使用双引号包裹。 - 如果XPath路径中包含斜杠(/),则需要使用双斜杠(//)或者使用单引号包裹整个XPath路径。 - 在XPath路径中没有找到元素时,会抛出NoSuchElementException异常。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值