利用beatifulsoup统计网页tag个数
#!/usr/bin/env python3
import bs4
def _count(soup):
# count the tags under Beatifulsoup object soup
if soup.contents:
c = {soup.name:1}
for a in soup.contents:
if a.name:
c0=_count(a)
for k, v in c0.items():
if k in c:
c[k] += v
else:
c[k] = v
else:
c = {soup.name:1}
return c
def count(s):
"""Count tags in string s
Arguments:
s: str -- the HTML code
"""
soup = bs4.BeautifulSoup(s, 'lxml')
html = soup.html
body = html.body
return _count(body)
# read a html file or load one with `requests`
with open('zzjc.htm') as fo:
s = fo.read()
print(count(s))