1.处理子标签和其他后代标签
孩子(child)和后代(descendant)
from urllib.request import urlopen
from bs4 import BeautifulSoup
html=urlopen("http://www.pythonscraping.com/pages/page3.html")
bsObj=BeautifulSoup(html)
for child in bsObj.find("table",{"id":"giftList"}).children:
print(child) #打印giftlist表格中所有产品的数据行
2.处理兄弟标签:
from urllib.request import urlopen
from bs4 import BeautifulSoup
html=urlopen("http://www.pythonscraping.com/pages/page3.html")
bsObj=BeautifulSoup(html)
for sibling in bsObj.find("table",{"id":"giftList"}).tr.next_siblings:
print(sibling)
3.父标签处理:
from urllib.request import urlopen
from bs4 import BeautifulSoup
html=urlopen("http://www.pythonscraping.com/pages/page3.html")
bsObj=BeautifulSoup(html)
print( bsObj.find("img",{"src":"../img/gifts/img1.jpg"}).parent.previous_siblings.get_text())