比较好理解的做法如下。
str = 'aaabbb/&edfg cccaaa'
keys = ['aaa', 'bbb', 'ccc']
found = {k: [] for k in keys}
total_length = len(str)
# 按key依次遍历字符串,保存出现的位置
for key in keys:
length, i = len(key), 0
while i + length <= total_length:
s = str[i:i + length]
if s == key:
found[key].append(i)
i += length
else:
i += 1
print(found)
# {'aaa': [0, 16], 'bbb': [3], 'ccc': [13]}
result, next_match_index = '', -1
# 重新组合,如果坐标重叠连接符为空,否则为空格
for i in range(total_length):
for k, v in found.items():
if i in v:
split = '' if next_match_index == i else ' '
result = result + split + k
next_match_index = i + len(k)
print(result)
# aaabbb cccaaa
优化循环次数后结果一样,但没那么好懂。
str = 'aaaabbb/&edfg cccaaa'
keys = ['aaa', 'bbb', 'ccc']
total_length = len(str)
result, next_match_index, skip = '', -1, 0
for i in range(total_length):
if skip:
skip -= 1
continue
for key in keys: # 查找当前位置是否有match的key
length = len(key)
if i + length <= total_length: # 确保index不越界
s = str[i:i + length]
if s == key: # 如果有match的key,添加到结果
split = '' if next_match_index == i else ' '
result = result + split + key
next_match_index = i + length #预测相邻key的位置
skip = length - 1 #需要跳过当前key再匹配下一个key
break # 已经找到匹配key,可以结束keys的遍历
print(result)