由于你原来的html不合规范,我改了点。
下面是用 lxml 做的。
doc = '''
The Dormouse's storyp1p1p1
b1b1b1
p2p2p2
p4p4p4
'''
from lxml import html
tree = html.fromstring(doc)
a = tree.get_element_by_id("a1")
print(html.tostring(a))
print(html.tostring(tree).decode())
def dropnode(e=None):
if e is None: return
if e.tag == 'body': return
nd = e.getnext()
while nd is not None:
nd.drop_tree()
nd = e.getnext()
dropnode(e.getparent())
dropnode(a)
print(html.tostring(tree).decode())