通过BS4解析html文本获取超链接url,以便用于读取下层次数据
from bs4 import BeautifulSoup as BS
import requests
url = 'http://python123.io/ws/demo.html'
r = requests.get(url)
demo = r.text
soup = BS(demo,'html.parser')
for link in soup.find_all('a'):
print(link)
print(type(link))
print(link.get('href')) #tag的属性可以被添加,删除或修改. 再说一次, tag的属性操作方法与字典一样,故此可以使用.get提取出href
print(soup.find_all('a','py2'))