catalog
Initialization
html Initialization
from pyquery import PyQuery as pq
doc = pq(html)
print(doc("li"))
url Initialization
from pyquery import PyQuery as pq
doc = pq(url = 'www.baidu.com')
print(doc("li"))
file Initialization
from pyquery import PyQuery as pq
doc = pq(filename = 'demo.html')
print(doc("li"))
basic CSS selector
select
basic select method
.class
#id
li
use Space to connect labels
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(".list")
li.find('li')
child node
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(".list")
li.children(".active")
parent node
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(".list")
li.parent(".active")
li.parents(".active")
sibling node
from pyquery import PyQuery as pq
doc = pq(html)
li = doc(".list")
li.siblings(".active")
traversal
from pyquery import PyQuery as pq
doc = pq(html)
lis = doc('.list').items()
for li in lis:
print(li)
get Information
get attrs
from pyquery import PyQuery as pq
doc = pq(html)
lis = doc('.list')
print(lis.attr('href'))
get text
from pyquery import PyQuery as pq
doc = pq(html)
lis = doc('.list')
print(lis.text())
get HTML
from pyquery import PyQuery as pq
doc = pq(html)
lis = doc('.list')
print(lis.html())
DOM operation
adjust the nodes
addClass, removeClass
attr, css
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.list')
li.attr('name','link')
#add attribute name = link
li.css('font-size','14px')
#add attribute style = font-size:14px
remove
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('.list')
li.find('p').remove()
other DOM operation
link.
伪类选择器
from pyquery import PyQuery as pq
doc = pq(html)
li = doc('li:first-child')#first li lable
li = doc('li:last-child')#last li lable
li = doc('li:nth-child(2)')#nth li lable
li = doc('li:nth-child(2n)')
li = doc('li:gt-child(2)')#greater than second li lable
li = doc('li:contains(second)')#contains 'second' text li lable
link.
official document
http://pyquery.readthedocs.io/