参考大神文章:http://www.ruanyifeng.com/blog/2013/05/Knuth–Morris–Pratt_algorithm.html
# -*- coding: UTF-8 -*-
__author__ = 'jiang'
text1 = 'BBC ABCDAB SDFSDAETASFSAFAafgsdfhXZVVBAABAGAABCDABCDABDE'
findStr = 'ABCDABD'
data_index = {}
def findString():
partTable()
index = 0 #text1的位移
findIndex = 1 #text1的迁移指针
ptr_index = 1 #findStr的位移
li = len(text1)
pre_str = ''
while index < li:
if ptr_index > len(findStr):
print('success find ' + findStr + ", ptr_index = " + str(ptr_index) + ", " + str(index) + ", " + str(findIndex))
break
ft = text1[index: index+findIndex]
fs = findStr[0:ptr_index]
if ft != fs:
forward_index = 0
if pre_str == '': #如果是首次进入
forward_index = data_index[fs]
pre_str = fs
else:
forward_index = data_index[pre_str]
pre_str = fs
if forward_index == 0:
index += 1
else:
index = ptr_index - forward_index
findIndex = 1
ptr_index = 1
continue
ptr_index += 1
findIndex += 1
def partTable():
data = []
strleng = len(findStr)
index = 1
while index <= strleng:
td = findStr[0:index]
print('td='+td)
index += 1
if len(td) == 1: #一个字符的前缀和后缀都是空集
data_index[td] = 0
else:
td_length = len(td)
tmp_dict = {}
for i in range(1,td_length):
pre_td = td[0:i]
sufx_td = td[td_length - i : td_length]
print(pre_td +'->' + sufx_td)
if pre_td in tmp_dict.keys():
tmp_dict[pre_td] = len(pre_td)
else:
tmp_dict[pre_td] = 0
if sufx_td in tmp_dict.keys():
tmp_dict[sufx_td] = len(sufx_td)
else:
tmp_dict[sufx_td] = 0
total = 0
for key in tmp_dict.keys():
value = tmp_dict[key]
if value > 0:
total += value
data_index[td] = total
print(str(data_index))
findString()