1. 正则表达式
(1)
re.findall('<tr id="places_area__row">.*?<td\s*class=["\']w2p_fw["\']>(.*?)</td>', html)
(2)
import re
pattern = re.compile("hello")
#match_list = re.findall(pattern, "hello world! hello") 这个是找全部匹配的,返回列表
match = pattern.match("hello world! hello") #这个是找匹配的,有就返回一个,没有返回None
print(match)
2. BeautifulSoup(bs4)
转Python中使用Beautiful Soup库的超详细教程:http://www.jb51.net/article/65287.htm
from bs4 import BeautifulSoup
>>> soup = BeautifulSoup(html, "html.parser") #用html解释器对得到的html文本进行解析
>>> tr = soup.find(attrs={"id":"places_area__row"})
>>> tr
<tr id="places_area__row"