from urllib.request import urlopen from urllib.error import HTTPError from bs4 import BeautifulSoup # 常用的函数的收集 def getTitle(url): # 测试服务器是否存在 测试服务器上面的文件是否存在 try: html = urlopen(url) except HTTPError as e: return None # 测试标签是否存在 try: bsObj = BeautifulSoup(html.read()) title = bsObj.body.h1 except AttributeError as e: return None return title title = getTitle("http://www.pythonscraping.com/pages/page1.html") if title == None: print("Title could not be found") else: print(title)
BeautifulSoup的简单爬取应用(常用自定义函数收集)
最新推荐文章于 2023-04-15 14:02:49 发布