beautiful soup 4
basic
from bs4 import BeautifulSoup
from pprint import pprint
import re
......
soup = BeautifulSoup(html_doc, features='lxml')
print(soup.prettify())
find
pprint(soup.find('a').contents)
pprint(soup.find('a').string)
pprint(soup.find('a').get_text())
find_all
print(soup.find_all(id='link3'))
pprint(soup.find_all('a',limit=1))
print(soup.find_all(href=re.compile('.*elsie')))
pprint(soup.find_all(attrs={'class':'sister','id':'link3'}))