正则式分段提取中文，看不懂啊，先记录下来

最新推荐文章于 2024-07-07 03:13:51 发布

assistx

最新推荐文章于 2024-07-07 03:13:51 发布

阅读量923

点赞数

#-*- coding: UTF-8 -*-
import re

s = '''<html>
<head>
</head>
<body>.....
<li>......</li>
<h2>
我需要的内容h2
</h2>
<p>
我需要的内容p
</p>
<h3>
我需要的内容h3
</h3>'''

res = r'.*?<h2>(?P<H2>.*?)</h2>.*?<p>(?P<P>.*?)</p>(?P<H3>.*?)</h3>'
target = re.compile(res, re.S | re.M)
match = target.search(s)
print(match)
if match:
for k in match.groupdict().keys():
print(k, ': ', match.groupdict()[k])
print('=====================')