python3 查找一段文字内指定字符串具体位置

查找一段文字内指定字符串具体位置。返回位置

class TrieNode(object):
    __slots__ = ['value', 'next', 'fail', 'emit']

    def __init__(self, value):
        self.value = value
        self.next = dict()
        self.fail = None
        self.emit = None


class AhoCorasic(object):
    __slots__ = ['_root']

    def __init__(self, words):
        self._root = AhoCorasic._build_trie(words)

    @staticmethod
    def _build_trie(words):
        assert isinstance(words, list) and words
        root = TrieNode('root')
        for word in words:
            node = root
            for c in word:
                if c not in node.next:
                    node.next[c] = TrieNode(c)
                node = node.next[c]
            if not node.emit:
                node.emit = {word}
            else:
                node.emit.add(word)
        queue = []
        queue.insert(0, (root, None))
        while len(queue) > 0:
            node_parent = queue.pop()
            curr, parent = node_parent[0], node_parent[1]
            for sub in curr.next.values():
                queue.insert(0, (sub, curr))
            if parent is None:
                continue
            elif parent is root:
                curr.fail = root
            else:
                fail = parent.fail
                while fail and curr.value not in fail.next:
                    fail = fail.fail
                if fail:
                    curr.fail = fail.next[curr.value]
                else:
                    curr.fail = root
        return root

    def search(self, s):
        seq_list = []
        node = self._root
        for i, c in enumerate(s):
            matched = True
            while c not in node.next:
                if not node.fail:
                    matched = False
                    node = self._root
                    break
                node = node.fail
            if not matched:
                continue
            node = node.next[c]
            if node.emit:
                for _ in node.emit:
                    from_index = i + 1 - len(_)
                    match_info = (from_index, _)
                    seq_list.append(match_info)
                node = self._root
        return seq_list


if __name__ == '__main__':
    aho = AhoCorasic(['神奇', '宝贝'])
    print(aho.search('神奇的精灵宝可梦是个宝贝,神奇'))
[(0, '神奇'), (10, '宝贝'), (13, '神奇')]

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值