这是一种方式:
def dictify(ul):
result = {}
for li in ul.find_all("li", recursive=False):
key = next(li.stripped_strings)
ul = li.find("ul")
if ul:
result[key] = dictify(ul)
else:
result[key] = None
return result
使用示例:
>>> from bs4 import BeautifulSoup
>>> soup = BeautifulSoup("""
...
...
Operating System...
...
Linux...
...
Debian...
Fedora...
Ubuntu...
...
...
Windows...
OS X...
...
...
Programming Languages...
...
Python...
C#...
Ruby...
...
...
... """)
>>> ul = soup.body.ul
>>> from pprint import pprint
>>> pprint(dictify(ul), width=1)
{u'Operating System': {u'Linux': {u'Debian': None,
u'Fedora': None,
u'Ubuntu': None},
u'OS X': None,
u'Windows': None},
u'Programming Languages': {u'C#': None,
u'Python': None,
u'Ruby': None}}