正则=xpath=BeautifulSoup
from bs4 import BeautifulSoup as bsf
import urllib.request
data = urllib.request.urlopen('xxxx.com').read().decode('utf-8','ignore')
bs = bsf(data) #格式化输出
print(bs.prettify())
bs.title #bs.标签名 <title>hello</title>
bs.title.name # 'title'
bs.title.string # hello
bs.a.attrs # 获取<a> 中所有属性
bs.a["class"] = bs.a.get("class") #获取的是 class="xxx"中的 xxx
bs.find_all('a')
bs.find_all(['a','u']) #获取所有a,u节点的内容
k1 = bs.ul.contents #返回list
k2 = bs.ul.children #返回的是生成器
children = [ i for i in k2]