bs4的链式调用很赞,所以我把soup包装了一下
class MY_SOUP():
'''
包装类
'''
def __init__(self,soup):
self.soup = soup
if soup:
if soup.string:
self.string = soup.string.strip()
else:
self.string = None
else:
self.string = None
def find(self, *args, **kw):
ret = self.soup.find(*args, **kw)
if ret:
return FIND_SOUP(ret)
return FIND_SOUP(None)
def find_all(self,*args, **kw):
ret = self.soup.find_all(*args, **kw)
return ret
def get_text(self):
if self.soup:
return self.soup.get_text().strip()
return None
def get(self,*args, **kw):
if self.soup:
return self.soup.get(*args, **kw)
return None
soup = BeautifulSoup(html,'lxml')
summary_soup = soup.find('div', class_='summary')
#把 summary_soup 包装成我的soup
summary_soup = MY_SOUP(summary_soup)
#再也没有None异常了
twitter_url = summary_soup.find('a','twitter_url').get('href')
facebook_url = summary_soup.find('a','facebook_url').get('href')
linkedin_url = summary_soup.find('a','linkedin_url').get('href')
name = summary_soup.find('div', class_='name').find('a').string
...
参考 @prolifes