Navigating trees
导航树
from urllib.request import urlopen
from urllib.error import HTTPError ,URLError
from bs4 import BeautifulSoup
#打开url
html = urlopen("http://www.pythonscraping.com/pages/page3.html")
#创建beautifulsoup对象
bsObj = BeautifulSoup(html.read())
#子标签是指父标签的下一级,后代标签是指下面的所有级别的标签
#比如tr是table的子标签,tr,th,td,img,span都是table的后代标签(示例html中)
#第一个div标签的所有img标签
#print (bsObj.div.findAll("img"))
#打印子标签
print ("------------")
for child in bsObj.find("table",{"id":"giftList"}).children:
print (child)
print ("+++++++++++++++++++++")
#打印后代标签
for child in bsObj.find("table",{"id":"giftList"}).descendants:
print (child)
#处理兄弟标签
#兄弟标签不包括自己本身,这个函数只会调用他后面的兄弟标签
for sibling in bsObj.find("table",{"id":"giftList"}).tr.next_siblings:
print (sibling)
#打印父标签
print (bsObj.find("img",{"src":"../img/gifts/img1.jpg"}).parent.previous_sibling.get_text())