xpath

from  scrapy.selector  import  Selector, HtmlXPathSelector
from  scrapy.http  import  HtmlResponse
html  =  """<!DOCTYPE html>
<html>
     <head lang="en">
         <meta charset="UTF-8">
         <title></title>
     </head>
     <body>
         <ul>
             <li class="item-"><a id='i1' href="link.html">first item</a></li>
             <li class="item-0"><a id='i2' href="llink.html">first item</a></li>
             <li class="item-1"><a href="llink2.html">second item<span>vv</span></a></li>
         </ul>
         <div><a href="llink2.html">second item</a></div>
     </body>
</html>
"""
response  =  HtmlResponse(url = 'http://example.com' , body = html,encoding = 'utf-8' )
# hxs = HtmlXPathSelector(response)
# print(hxs)
# hxs = Selector(response=response).xpath('//a')
# print(hxs)
# hxs = Selector(response=response).xpath('//a[2]')
# print(hxs)
# hxs = Selector(response=response).xpath('//a[@id]')
# print(hxs)
# hxs = Selector(response=response).xpath('//a[@id="i1"]')
# print(hxs)
# hxs = Selector(response=response).xpath('//a[@href="link.html"][@id="i1"]')
# print(hxs)
# hxs = Selector(response=response).xpath('//a[contains(@href, "link")]')
# print(hxs)
# hxs = Selector(response=response).xpath('//a[starts-with(@href, "link")]')
# print(hxs)
# hxs = Selector(response=response).xpath('//a[re:test(@id, "i\d+")]')
# print(hxs)
# hxs = Selector(response=response).xpath('//a[re:test(@id, "i\d+")]/text()').extract()
# print(hxs)
# hxs = Selector(response=response).xpath('//a[re:test(@id, "i\d+")]/@href').extract()
# print(hxs)
# hxs = Selector(response=response).xpath('/html/body/ul/li/a/@href').extract()
# print(hxs)
# hxs = Selector(response=response).xpath('//body/ul/li/a/@href').extract_first()
# print(hxs)
 
# ul_list = Selector(response=response).xpath('//body/ul/li')
# for item in ul_list:
#     v = item.xpath('./a/span')
#     # 或
#     # v = item.xpath('a/span')
#     # 或
#     # v = item.xpath('*/a/span')
#     print(v)

转载于:https://www.cnblogs.com/BensonChang/p/9222243.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值