''' from pyquery import PyQuery as pq doc = pq(html) li = doc('.list .item-0.active')#空格表示里面,没有空格表示整体 print(li.siblings())#.siblings()兄弟元素,即同级别的元素,不包括自己
''' from pyquery import PyQuery as pq doc = pq(html) li = doc('.item-0.active') print(li) li.attr('name', 'link')#增加一个属性 print(li) li.css('font-size', '14px')#增加一个css print(li) 结果: < li class ="item-0 active" > < a href="link3.html" > < span class ="bold" > third item < / span > < / a > < / li > < li class ="item-0 active" name="link" > < a href="link3.html" > < span class ="bold" > third item < / span > < / a > < / li > < li class ="item-0 active" name="link" style="font-size: 14px" > < a href="link3.html" > < span class ="bold" > third item < / span > < / a > < / li >
remove html = ‘’’
Hello, World
This is a paragraph.
''' from pyquery import PyQuery as pq doc = pq(html) wrap = doc('.wrap') print(wrap.text()) wrap.find('p').remove()#找到p标签然后删除 print(wrap.text()) 结果: Hello, World This is a paragraph. Hello, World
''' from pyquery import PyQuery as pq doc = pq(html) li = doc('li:first-child') print(li) li = doc('li:last-child') print(li) li = doc('li:nth-child(2)') print(li) li = doc('li:gt(2)') print(li) li = doc('li:nth-child(2n)') print(li) li = doc('li:contains(second)') print(li) 结果: < li class ="item-0" > first item < / li > < li class ="item-0" > < a href="link5.html" > fifth item < / a > < / li > < li class ="item-1" > < a href="link2.html" > second item < / a > < / li > < li class ="item-1 active" > < a href="link4.html" > fourth item < / a > < / li > < li class ="item-0" > < a href="link5.html" > fifth item < / a > < / li > < li class ="item-1" > < a href="link2.html" > second item < / a > < / li > < li class ="item-1 active" > < a href="link4.html" > fourth item < / a > < / li > < li class ="item-1" > < a href="link2.html" > second item < / a > < / li >
人生苦短,我用PythonPyQuery是强大而又灵活的网页解析库,如果你觉得正则写起来太麻烦,如果你觉得BeautifulSoup语法太难记,如果你熟悉jQuery的语法那么,PyQuery就是你绝佳的选择。一、初始化方式,有三种,可以传入字符串,传入url,传入文件。字符串初始化html = ‘’’ first item second item third item fourth item