初始化
from pyquery import PyQuery
html = '''
<div >
<ul >
<li class ="item-0" > first item</li >
<li class ="item-1" > <a href ="link2.html" > second item</a > </li >
<li class ="item-0 active" > <a href ="link3.html" > <span class ="bold" > third item</span > </a > </li >
<li class ="item-1 active" > <a href ="link4.html" > fourth item</a > </li >
<li class ="item-0" > <a href ="link5.html" > fifth item</a > </li >
</ul >
</div >
'''
ht = PyQuery(html)
print(ht('li')) #直接传入CSS选择器
URL初始化和文档初始化我觉得不常用,还是请求解析分离开的好
基本CSS选择器
print(ht('#container .list .item-1' ) )
父子
print(ht.find('.item-1' ) .children() )
print(ht.find('.item-1' ) .parent() )
兄弟节点
print(ht('.item-1.active').siblings())
----------------------------------------------------------------------
<li class ="item-0 active" > <a href ="link3.html" > <span class ="bold" > third item</span > </a > </li >
<li class ="item-1" > <a href ="link2.html" > second item</a > </li >
<li class ="item-0" > first item</li >
<li class ="item-0" > <a href ="link5.html" > fifth item</a > </li >
遍历
for i in ht('li' ).items():
print(i)
获取信息
print(ht('.item-1.active a' ).attr('href' )) #获取属性
link4.html
fourth item
<a href="link4.html">fourth item</a>
DOM操作
addClass、removeClass
li = doc('.item-0.active' )
print (li)
li.removeClass('active' )
print (li)
li.addClass('active' )
print (li)
添加属性
li = doc('.item-0.active' )
print (li)
li.attr('name' , 'link' )
print (li)
li.css('font-size' , '14px' )
print (li)
删除指定标签,这个例子经典
html = '''
<div class="wrap">
Hello, World
<p>This is a paragraph.</p>
</div>
'''
from pyquery import PyQuery as pq
doc = pq(html)
wrap = doc('.wrap' )
print(wrap.text())
wrap.find('p' ).remove()
Hello, World This is a paragraph.
Hello, World
伪类选择器
html = '''
<div class ="wrap" >
<div id ="container" >
<ul class ="list" >
<li class ="item-0" >first item </li>
<li class ="item-1" ><a href="link2.html" >second item </a></li>
<li class ="item-0 active" ><a href="link3.html" ><span class ="bold" >third item </span></a></li>
<li class ="item-1 active" ><a href="link4.html" >fourth item </a></li>
<li class ="item-0" ><a href="link5.html" >fifth item </a></li>
</ul>
</div >
</div >
'''
from pyquery import PyQuery
q = PyQuery(html)
print(q('li:first -child'))
print(q('li:last -child'))
print(q('li:nth-child(2 )'))
print('
print(q('li:gt(2 )'))
<li class ="item-0" >first item </li>
<li class ="item-0" ><a href="link5.html" >fifth item </a></li>
<li class ="item-1" ><a href="link2.html" >second item </a></li>
<li class ="item-1 active" ><a href="link4.html" >fourth item </a></li>
<li class ="item-0" ><a href="link5.html" >fifth item </a></li>
还有
li = doc('li:nth-child(2n)' )
print (li)
li = doc('li:contains(second)' )
print (li)