您可以在python中使用此代码,分两步完成.首先,我们将输入分为4个字母的大写字母,然后我们在匹配的两边找到最多4个字.
import re
str = 'Lorem IPSUM is simply DUMMY text of the printing and typesetting INDUSTRY'
re1 = r'\b([A-Z]{4,})\b'
re2 = r'(?:\s*\w+\b){,4}'
arr = re.split(re1, str)
result = []
for i in range(len(arr)):
if i % 2:
result.append( (re.search(re2, arr[i-1]).group(), arr[i], re.search(re2, arr[i+1]).group()) )
print result
输出:
[('Lorem', 'IPSUM', ' is simply'), (' is simply', 'DUMMY', ' text of the printing'), (' text of the printing', 'INDUSTRY', '')]