BeautifulSoup
- 安装
- 打开cmd命令行
- pip3 install beautifulsoup4
- 案例一
import requests
from bs4 import BeautifulSoup
def get_html(url):
response = requests.get(url)
response.encoding = response.apparent_encoding
if response.status_code == 200:
return response.text
else:
print("网络访问出错")
def parse_html(html):
soup = BeautifulSoup(html, 'lxml')
for text in soup.select('#syncad_1 a'):
print(text)
print(soup.title)
print(soup.title.string)
print(soup.p)
print(soup.p.name)
print(soup.a.parent.name)
if __name__ == "__main__":
url = "http://news.sina.com.cn"
html = get_html(url)
if html is not None:
parse_html(html)
import requests
from bs4 import BeautifulSoup
def get_html(url):
response = requests.get(url)
response.encoding = response.apparent_encoding
if response.status_code == 200:
return response.text
else:
print("网络访问出错")
def parse_html(html):
soup = BeautifulSoup(html, 'lxml')
yield len(soup.div.contents)
for child in soup.div.children:
yield child
if __name__ == "__main__":
url = "http://music.baidu.com"
html = get_html(url)
if html is not None:
for text in parse_html(html):
print(text)