Rabin-Karp算法
# coding: utf-8
class RabinKarp(object):
def __init__(self, pat, txt):
self.pat_hash = -1
self.M = len(pat)
self.Q = 997
self.R = 10
self.txt = txt
self.pat = pat
# RM = R^(M-1)%Q
self.RM = 1
for i in range(self.M-1):
self.RM = (self.R * self.RM) % self.Q
print 'RM: %s' % self.RM
self.pat_hash = self.hash(pat, self.M)
print 'pat hash: %s' % self.pat_hash
def hash(self, key, m):
h = 0
for x in key[:m]:
h = (self.R*h + int(x)) % self.Q
return h
def check(self, i):
if self.txt[i:(i+self.M)] != self.pat:
return False
return True
def search(self):
N = len(self.txt)
txt_hash = self.hash(self.txt, self.M)
print 'txt hash: %s' % txt_hash
if N < self.M:
return -1
if txt_hash == self.pat_hash and self.check(0):
return 0
for i in range(self.M, N):
txt_hash = (txt_hash + self.Q - self.RM*int(self.txt[i-self.M])) % self.Q
txt_hash = (txt_hash*self.R + int(self.txt[i])) % self.Q
print txt_hash
if txt_hash == self.pat_hash and self.check(i-self.M+1):
return i - self.M + 1
return -1
if __name__ == '__main__':
rk = RabinKarp('26535', '3141592653589793')
print rk.search()
运行结果:
RM: 30
pat hash: 613
txt hash: 508
201
715
971
442
929
613
6
小结
预处理字符串的时间为O(m),平均时间复杂度为线性级别O(7n)