爬虫异常处理:
from urllib.request import urlopen
from urllib.error import HTTPError,URLError
from bs4 import BeautifulSoup
def getTitle(url):
try:
html=urlopen(url)
except(HTTPError,URLError) as e:
return None
try:
bsObj=BeautifulSoup(html.read())
title=bsObj.body.h1
except AttributeError as e:
return None
return title
title=getTitle("http://www.pythonscraping.com/pages/pages1.html")
if title==None:
print("title could not be found")
else:
print(title)