#导入Beautifulsoup包 from bs4 import BeautifulSoup as bs html_doc = """ <html><head><title>The Dormouse's story</title></head> <body> <p class="title"><b>The Dormouse's story</b></p> <p class="story">Once upon a time there were three little sisters; and their names were <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>, <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>; and they lived at the bottom of a well.</p> <p class="story">...</p> """ #创建Beautifulsoup对象 soup=bs(html_doc,"html.parser") #用html.parser编译器 # print(soup.prettify()) #prettify格式化输出 print(soup.title.string) #输出title的内容,string只能输出单条 #注:<title>The Dormouse's story</title> 只有一条的才可以用string输出,若是中间还有其它标签则要用get_text() # 例如<title>The Dormouse's story <a> hello world </a></title> 这种就不能用String输出 print(soup.a) #输出a标签,只能输出第一条a标签 print(soup.find(id="link2").get_text()) #根据id="link2"查找,并打印出内容 print(soup.findAll("a")) #输出所有的a标签,它是以列表形式返回的 # for link in soup.findAll("a"): #打印所有a标签下面的内容 # print(link.string) # print(soup.find("p",class="storty")) #class是python关键字,故会报错,则可用{}来表示属性 print(soup.find("p",{"class":"story"})) #输出p标签中,class=story的标签.因为a标签在p标签下面,故全部输出,若要获得内容则只能用get_text()
BeautifulSoup基础应用
最新推荐文章于 2023-10-10 11:18:00 发布