1.KMP算法
代码
def
compute_prefix_function(p):
m = len(p)
pi = [0] * m
k = 0
for q in range( 1 , m):
while k > 0 and p[k] != p[q]:
k = pi[k - 1 ]
if p[k] == p[q]:
k = k + 1
pi[q] = k
return pi
def kmp_matcher(t, p):
n = len(t)
m = len(p)
pi = compute_prefix_function(p)
q = 0
for i in range(n):
while q > 0 and p[q] != t[i]:
q = pi[q - 1 ]
if p[q] == t[i]:
q = q + 1
if q == m:
return i - m + 1
return - 1
m = len(p)
pi = [0] * m
k = 0
for q in range( 1 , m):
while k > 0 and p[k] != p[q]:
k = pi[k - 1 ]
if p[k] == p[q]:
k = k + 1
pi[q] = k
return pi
def kmp_matcher(t, p):
n = len(t)
m = len(p)
pi = compute_prefix_function(p)
q = 0
for i in range(n):
while q > 0 and p[q] != t[i]:
q = pi[q - 1 ]
if p[q] == t[i]:
q = q + 1
if q == m:
return i - m + 1
return - 1
2.BM算法例子
代码
def
BoyerMooreHorspool(pattern, text):
m = len(pattern)
n = len(text)
if m > n: return - 1
skip = []
for k in range( 256 ): skip.append(m)
for k in range(m - 1 ): skip[ord(pattern[k])] = m - k - 1
skip = tuple(skip)
k = m - 1
while k < n:
j = m - 1 ; i = k
while j >= 0 and text[i] == pattern[j]:
j -= 1 ; i -= 1
if j == - 1 : return i + 1
k += skip[ord(text[k])]
return - 1
if __name__ == ' __main__ ' :
text = " this is the string to search in "
pattern = " the "
s = BoyerMooreHorspool(pattern, text)
print ' Text: ' ,text
print ' Pattern: ' ,pattern
if s > - 1 :
print ' Pattern \" ' + pattern + ' \" found at position ' ,s
m = len(pattern)
n = len(text)
if m > n: return - 1
skip = []
for k in range( 256 ): skip.append(m)
for k in range(m - 1 ): skip[ord(pattern[k])] = m - k - 1
skip = tuple(skip)
k = m - 1
while k < n:
j = m - 1 ; i = k
while j >= 0 and text[i] == pattern[j]:
j -= 1 ; i -= 1
if j == - 1 : return i + 1
k += skip[ord(text[k])]
return - 1
if __name__ == ' __main__ ' :
text = " this is the string to search in "
pattern = " the "
s = BoyerMooreHorspool(pattern, text)
print ' Text: ' ,text
print ' Pattern: ' ,pattern
if s > - 1 :
print ' Pattern \" ' + pattern + ' \" found at position ' ,s
这两个算法主要应用于字符串匹配。网上评论说BM性能优于KMP,我没验证过。改天可以用cProfile测试一下。
ps:今天分别用这两个算法,查找了69K文档的最后一行字符串,KMP用了0.053个CPU时间,BM仅用了0.025个CPU时间。
其实我非常想看一下sunday算法,据说是BM的改进,提升不少性能。研究了一下算法,是人性多了。但是现在网上没有它的python实现,改天尝试搞一个出来。