KMP字符串匹配算法--Python版

参考大神文章:http://www.ruanyifeng.com/blog/2013/05/Knuth–Morris–Pratt_algorithm.html

# -*- coding: UTF-8 -*-
__author__ = 'jiang'

text1 = 'BBC ABCDAB SDFSDAETASFSAFAafgsdfhXZVVBAABAGAABCDABCDABDE'
findStr = 'ABCDABD'
data_index = {}

def findString():
    partTable()
    index = 0  #text1的位移
    findIndex = 1   #text1的迁移指针
    ptr_index = 1   #findStr的位移
    li = len(text1)
    pre_str = ''
    while index < li:
        if ptr_index > len(findStr):
            print('success find ' + findStr + ", ptr_index = " + str(ptr_index) + ", " + str(index) + ", " + str(findIndex))
            break
        ft = text1[index: index+findIndex]

        fs = findStr[0:ptr_index]
        if ft != fs:
            forward_index = 0
            if pre_str == '':   #如果是首次进入
                forward_index = data_index[fs]
                pre_str = fs
            else:
                forward_index = data_index[pre_str]
                pre_str = fs
            if forward_index == 0:
                index += 1
            else:
                index = ptr_index - forward_index
            findIndex = 1
            ptr_index = 1
            continue
        ptr_index += 1
        findIndex += 1


def partTable():
    data = []
    strleng = len(findStr)
    index = 1
    while index <= strleng:
        td = findStr[0:index]
        print('td='+td)
        index += 1
        if len(td) == 1:   #一个字符的前缀和后缀都是空集
            data_index[td] = 0
        else:
            td_length = len(td)
            tmp_dict = {}
            for i in range(1,td_length):
                pre_td = td[0:i]
                sufx_td = td[td_length - i : td_length]
                print(pre_td +'->' + sufx_td)
                if pre_td in tmp_dict.keys():
                    tmp_dict[pre_td] = len(pre_td)
                else:
                    tmp_dict[pre_td] = 0

                if sufx_td in tmp_dict.keys():
                    tmp_dict[sufx_td] = len(sufx_td)
                else:
                    tmp_dict[sufx_td] = 0
            total = 0
            for key in tmp_dict.keys():
                value = tmp_dict[key]
                if value > 0:
                    total += value
            data_index[td] = total
    print(str(data_index))



findString()
        

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值