html = urlopen("http://pythonscraping.com/pages/page1.html")
print(html.read())
from bs4 import BeautifulSoup
from urllib.request import urlopen
html = urlopen("http://pythonscraping.com/pages/page1.html")
bsobj = BeautifulSoup(html.read(),'html.parser')
print(bsobj)
#两种异常,1:网页在服务器上不存在,2:服务器不存在
from bs4 import BeautifulSoup
from urllib.request import urlopen
def getTitle(url):
try:
html = urlopen(url)
except HTTPError as e:
return None
#异常,网页在服务器上不存在,不再执行 else 语句后面的代码
try:
bsobj = BeautifulSoup(html.read(),'html.parser')
title = bsobj.body.h1
except AttributeError as e:
return None
return title
title = getTitle("http://pythonscraping.com/pages/page1.html")
if title == None:
print("Title is none")
else:
print(title.text)