字典树or前缀树(Trie树)的Python模板及例题

今天刷到了字典树的知识,看了一篇文章感觉总结的挺好,链接放在下方,只不过文中的代码用的java,因为平时自己刷题用的都是python,所以打算将文章中的代码都同步为python语言,方便和我一样用python的小伙伴学习参考~

构造字典树

经典的字典树(只包含26个小写字母)

208. 实现 Trie (前缀树)

class TrieNode:
    def __init__(self):
        # 节点
        self.isWord = False  # 表示当前节点是否是一个单词的结尾
        self.children = [None] * 26  # 26个子节点,分别对应小写字母a到z

class Trie:
    def __init__(self):
        self.root = TrieNode()  # 根节点

    def insert(self, word):
        cur = self.root  # 先指向根节点

        for char in word:
            # 如果是【后缀树】而不是【前缀树】,把单词倒着插就可以了,即:for char in range(len(word) - 1, -1, -1):
            index = ord(char) - ord('a')  # (关键) 将一个字符用数字表示出来,并作为下标
            if not cur.children[index]:
                cur.children[index] = TrieNode()  # 新建节点
            cur = cur.children[index]  # 指向下一个节点

        cur.isWord = True  # 一个单词插入完毕,此时 cur 指向的节点即为一个单词的结尾

    def search(self, word):
        cur = self.root  # 从根节点开始

        for char in word:
            index = ord(char) - ord('a')  # (关键) 将一个字符用数字表示出来,并作为下标
            if not cur.children[index]:
                return False  # 如果遇到null,说明这个word不是前缀树的任何一条路径,返回false
            cur = cur.children[index]  # 指向下一个节点

        return cur.isWord  # 返回当前节点是否为单词的结尾

    def startsWith(self, prefix):
        cur = self.root  # 从根节点开始

        for char in prefix:
            index = ord(char) - ord('a')  # (关键) 将一个字符用数字表示出来,并作为下标
            if not cur.children[index]:
                return False  # 如果遇到null,说明这个word不是前缀树的任何一条路径,返回false
            cur = cur.children[index]  # 指向下一个节点

        return True  # 安全走完,返回true就行了———我们并不关心此时cur是不是末尾(isWord)

例题——变式题目

变式1:利用字典树的构造过程——忽略后缀单词

820. 单词的压缩编码

class TrieNode:
    def __init__(self):
        self.isWord = False
        self.children = [None] * 26

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        cur = self.root
        is_new = False
        for i in range(len(word) - 1, -1, -1):
            c = ord(word[i]) - ord('a')
            if not cur.children[c]:
                cur.children[c] = TrieNode()
                is_new = True
            cur = cur.children[c]
        cur.isWord = True
        return len(word) + 1 if is_new else 0

class Solution:
    def minimumLengthEncoding(self, words: List[str]) -> int:
    # 【字典树】——— 之所以想到使用字典树,是因为该题完全发挥了字符串的后缀特征
    # 我们构造出这样的一个[逆序]字典树,很容易发现: "编码"后的字符串长度,就是忽略了后缀单词后,所有单词的(长度+1)之和
    # 这不难理解,比如"abcd#","bcd","cd","d"这种后缀单词就默认被包括了,因而算整个字符串的长度时,算"abcd"这个最长的就行了
    # 核心思路是:每次往字典树插入一个"新的word"时[对应代码中的is_new],就 += 该word的长度 + 1(#)
    # 需要注意的是,不是每一次插入单词,都需要加上该单词的长度
    # 而是先根据长度对words进行一次排序,先插入长的,再插入短的。如果插入时需要new出新节点,我们就认为这是一个"新word"
        res_len = 0
        words.sort(key = lambda s: len(s), reverse = True) # 从大到小排序
        trie = Trie()

        for word in words:
            res_len += trie.insert(word)

        return res_len

附评论中的简洁做法

class Solution:
    def minimumLengthEncoding(self, words: List[str]) -> int:
        words = sorted(words, key = lambda i: len(i), reverse=True)
        s = ""
        for i in words:
            if i in s and i+"#" in s:
                continue
            s += i+"#"
        return len(s)

变式2:利用字典树充分利用前缀(后缀)性质,优化暴力算法

面试题 17.13. 恢复空格

class TrieNode:
    def __init__(self):
        self.isWord = False
        self.children = [None] * 26

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        cur = self.root

        for i in range(len(word) - 1, -1, -1):
            c = ord(word[i]) - ord('a')
            if not cur.children[c]:
                cur.children[c] = TrieNode()
            cur = cur.children[c]
        cur.isWord = True

    def search(self, sentence, end):
        res_list = []
        cur = self.root

        for i in range(end, -1, -1):
            c = ord(sentence[i]) - ord('a')
            if not cur.children[c]:
                break
            cur = cur.children[c]
            if cur.isWord:
                res_list.append(i)
        return res_list


class Solution:
    def respace(self, dictionary, sentence):
        len_sentence = len(sentence)
        dp = [0] * (len_sentence + 1)

        trie = Trie()
        for word in dictionary:
            trie.insert(word)

        for i in range(1, len_sentence + 1):
            dp[i] = dp[i - 1] + 1
            for j in trie.search(sentence, i - 1):
                dp[i] = min(dp[i], dp[j])

        return dp[len_sentence]


# 测试
solution = Solution()
dictionary = ["looked", "just", "like", "her", "brother"]
sentence = "jesslookedjustliketimherbrother"
result = solution.respace(dictionary, sentence)
print(result)

附原始动态规划代码

class Solution:
    def respace(self, dictionary, sentence):
        word_set = set(dictionary)
        n = len(sentence)
        # dp[i] 表示字符串的前 i 个字符的最少未匹配数
        dp = [0] * (n + 1)
        for i in range(1, n + 1):
            dp[i] = dp[i - 1] + 1
            for idx in range(i):
                if sentence[idx:i] in word_set: # 此时是求sentence中的下标,所以不包含i的时候才是正确的长度--->对应dp含义
                    dp[i] = min(dp[i], dp[idx])
        return dp[n]

变式3:search方法的变异——match递归

  • 经典的search方法,是通过一个cur指针(引用),根据word的字符,一条路走下去
  • 其实,它还有一个思路———每次判断一个节点是否配对 的【递归】写法 :
class TrieNode:
    def __init__(self):
        self.isWord = False
        self.children = [None] * 26

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        cur = self.root
        for i in range(len(word) - 1, -1, -1):
            c = ord(word[i]) - ord('a')
            if cur.children[c] is None:
                cur.children[c] = TrieNode()
            cur = cur.children[c]
        cur.isWord = True

    def search(self, word):
        return self.match(word, self.root, 0)

#  macth方法基本思路是:根据word和start得到此时的字符,
#  然后看该字符是否与此时的节点node配对————即node.children[c]有值(!=null)
#  (其实start就相当于非递归写法中的 for i 的i,用来遍历word 
    def match(self, word, node, start):    # 这个三个参数直接背下来,这是模板参数
        if start == len(word):
            return node.isWord

        c = ord(word[start]) - ord('a')
        return node.children[c] is not None and self.match(word, node.children[c], start + 1)


Q:我知道match递归写法很妙,但有什么用呢?cur一条路走到黑的思路不是更好理解吗?

A:恰恰是因为“cur一条路走到黑”的思路有弊端——有时我们需要走一个分叉的路去尝试更多的可能

通过下面的两道变式题目,就能理解递归型search的强大之处

变式4:含有通配符的字典树匹配——递归的search

211. 添加与搜索单词 - 数据结构设计

【笔记】该题与208. 实现 Trie (前缀树)大同小异,只需要对.单独处理就可以了。其他代码复用,只需要重写 search 辅助函数,当出现.时,需要遍历res->children_[26],该过程深入几层后,可能存在不满足情况,此时需要回溯,所以最好采用递归。(来自评论)

class TrieNode:
    def __init__(self):
        self.isWord = False
        self.children = [None] * 26

class WordDictionary:
    def __init__(self):
        self.root = TrieNode()

    def addWord(self, word):
        cur = self.root
        for i in range(len(word)):
            c = ord(word[i]) - ord('a')
            if cur.children[c] is None:
                cur.children[c] = TrieNode()
            cur = cur.children[c]
        cur.isWord = True

    def search(self, word):
        return self.match(word, self.root, 0)

    def match(self, word, node, start):
        if start == len(word):
            return node.isWord

        if word[start] != '.':
            c = ord(word[start]) - ord('a')
            return node.children[c] is not None and self.match(word, node.children[c], start + 1)
        else:
            for i in range(26):
                if node.children[i] is not None and self.match(word, node.children[i], start + 1):
                    return True
            return False

变式5:允许且必须变化一个字符后再匹配——递归的search 

676. 实现一个魔法字典

>>> 你会遇到一个棘手的问题,就是当字典树中有"hello"和"hallo"时,search("hello")会返回false。

问题的关键在于:一般我们写search,都是根据word先算出下标————这会导致,字典树从hello这条路,一路走到头,因为没有修改任何一个字母导致返回false。
因此,千万要抛弃这个字典树的search模板,改为一次for(26)的遍历。

>>> 逻辑是:
发现这个字母可行后,再去看这个"可行的字母"是不是就是"word.charAt(start)"
而不是根据"word.charAt(start)",看这个字母是否"可行"  (可行的意思是,这是字典树的一个合法节点)

理解上面这句话,是解决第一行那个问题的关键。

class TrieNode:
    def __init__(self):
        self.isWord = False
        self.children = [None] * 26

class MagicDictionary:

    def __init__(self):
        self.root = TrieNode()

    def buildDict(self, dictionary: List[str]) -> None:
        for word in dictionary:
            cur = self.root
            for ch in word:
                num = ord(ch) - ord('a')
                if cur.children[num] is None:
                    cur.children[num] = TrieNode()
                cur = cur.children[num]
            cur.isWord = True

   def search(self, word):
        return self.match(word, self.root, 0, False)

    def match(self, word, node, start, has_chance):
        if start == len(word):
            return node.isWord and has_chance  # 因为"必须变一个字符",因此 "and not has_chance"
        for i in range(26):
            if node.children[i] is not None:
                if ord(word[start]) - ord('a') == i and self.match(word, node.children[i], start + 1, has_chance):
                    return True
                if ord(word[start]) - ord('a') != i and not has_chance and self.match(word, node.children[i], start + 1, True):
                    return True
        return False

自己加的题

648. 单词替换

做法一:哈希集合

首先将 dictionary中所有词根放入哈希集合中,然后对于 sentence中的每个单词,由短至长遍历它所有的前缀,如果这个前缀出现在哈希集合中,则我们找到了当前单词的最短词根,将这个词根替换原来的单词。最后返回重新拼接的句子。

class Solution:
    def replaceWords(self, dictionary: List[str], sentence: str) -> str:
        dictionarySet = set(dictionary)
        words = sentence.split(' ')
        for i, word in enumerate(words):
            for j in range(1, len(words) + 1):
                if word[:j] in dictionarySet:
                    words[i] = word[:j]
                    break
        return ' '.join(words)

作者:力扣官方题解
链接:https://leetcode.cn/problems/replace-words/solutions/1649109/dan-ci-ti-huan-by-leetcode-solution-pl6v/
来源:力扣(LeetCode)
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。

做法二:字典树

与哈希集合不同,我们用 dictionary中所有词根构建一棵字典树,并用特殊符号标记结尾。在搜索前缀时,只需在字典树上搜索出一条最短的前缀路径即可。

class Solution:
    def replaceWords(self, dictionary: List[str], sentence: str) -> str:
        trie = {}
        for word in dictionary:
            cur = trie
            for c in word:
                if c not in cur:
                    cur[c] = {}
                cur = cur[c]
            cur['#'] = {}

        words = sentence.split(' ')
        for i, word in enumerate(words):
            cur = trie
            for j, c in enumerate(word):
                if '#' in cur:
                    words[i] = word[:j]
                    break
                if c not in cur:
                    break
                cur = cur[c]
        return ' '.join(words)

作者:力扣官方题解
链接:https://leetcode.cn/problems/replace-words/solutions/1649109/dan-ci-ti-huan-by-leetcode-solution-pl6v/
来源:力扣(LeetCode)
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
class Trie:
    def __init__(self):
        self.children = dict()
        self.isEnd = False
    def insert(self, word: str) -> None:
        cur = self
        for ch in word:
            if ch not in cur.children:
                cur.children[ch] = Trie()
            cur = cur.children[ch]
        cur.isEnd = True
    def search(self, word: str) -> str:
        cur = self
        index = 0
        for ch in word:
            if ch not in cur.children:
                return word
            cur = cur.children[ch]
            index += 1
            if cur.isEnd:
                break
        return word[:index]

class Solution:
    def replaceWords(self, dictionary: List[str], sentence: str) -> str:
        trie_tree = Trie()
        for word in dictionary:
            trie_tree.insert(word)
        words = sentence.split(" ")
        size = len(words)
        for i in range(size):
            word = words[i]
            words[i] = trie_tree.search(word)
        return ' '.join(words)
# 哈希集合
class Solution:
    def replaceWords(self, dictionary: List[str], sentence: str) -> str:
        dictionarySet = set(dictionary)
        words = sentence.split(' ')
        for i, word in enurmate(words):
            for j in range(1, len(words) + 1):
                if word[:j] in dictionarySet:
                    words[i] = word[:j]
                    break
        return ' '.join(words)

class Solution:
    def replaceWords(self, dict: List[str], sentence: str) -> str:
        dict.sort()
        s = sentence.split(' ')
        for i, word in enumerate(s):
            for j in dict:
                if word.startswith(j):
                    s[i] = j
                    break
        return ' '.join(s)

# 字典树
class TrieNode:
    def __init__(self):
        self.children = [None] * 26
        self.isEnd = False

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        curr = self.root
        for ch in word:
            idx = ord(ch) - ord('a')
            if not curr.children[idx]:
                curr.children[idx] = TrieNode()
            curr = curr.children[idx]
        curr.isEnd = True

    def get_prefix(self, word):
        curr = self.root
        for i in range(len(word)):
            if curr.isEnd:
                return word[:i]
            idx = ord(word[i]) - ord('a')
            if not curr.children[idx]:
                break
            curr = curr.children[idx]
        return word

class Solution:
    def replace_words(self, dictionary, sentence):
        tr = Trie()
        for prefix in dictionary:
            tr.insert(prefix)
        words = sentence.split(" ")
        for i in range(len(words)):
            words[i] = tr.get_prefix(words[i])
        return " ".join(words)

677. 键值映射

class Trie:

    def __init__(self):
        """
        Initialize your data structure here.
        """
        self.children = dict()
        self.isEnd = False
        self.value = 0


    def insert(self, word: str, value: int) -> None:
        """
        Inserts a word into the trie.
        """
        cur = self
        for ch in word:
            if ch not in cur.children:
                cur.children[ch] = Trie()
            cur = cur.children[ch]
        cur.isEnd = True
        cur.value = value


    def search(self, word: str) -> int:
        """
        Returns if the word is in the trie.
        """
        cur = self
        for ch in word:
            if ch not in cur.children:
                return 0
            cur = cur.children[ch]
        return self.dfs(cur)

    def dfs(self, root) -> int:
        if not root:
            return 0
        res = root.value
        for node in root.children.values():
            res += self.dfs(node)
        return res



class MapSum:

    def __init__(self):
        """
        Initialize your data structure here.
        """
        self.trie_tree = Trie()


    def insert(self, key: str, val: int) -> None:
        self.trie_tree.insert(key, val)


    def sum(self, prefix: str) -> int:
        return self.trie_tree.search(prefix)
# 暴力
class MapSum:
    def __init__(self):
        self.map = {}
    def insert(self, key: str, val: int) -> None:
        self.map[key] = val
    def sum(self, prefix: str) -> int:
        res = 0
        for key, val in self.map.items():
            if key.startswith(prefix):
                res += val
        return res

# 前缀哈希映射
class MapSum:
    def __init__(self):
        self.map = {}
        self.prefixmap = {}
    def insert(self, key: str, val: int) -> None:
        delta = val
        if key in self.map:
            delta -= self.map[key]
        self.map[key] = val
        for i in range(len(key)):
            currprefix = key[0:i+1]
            self.prefixmap[currprefix] = self.prefixmap.get(currprefix, 0) + delta
    def sum(self, prefix: str) -> int:
        if prefix in self.prefixmap:
            return self.prefixmap[prefix]
        else:
            return 0

# 字典树[仿照前缀哈希映射]
class TrieNode:
    def __init__(self):
        self.val = 0
        self.next = [None] * 26
class MapSum:
    def __init__(self):
        self.root = TrieNode()
        self.map = {}

    def insert(self, key: str, val: int) -> None:
        delta = val
        if key in self.map:
            delta -= self.map[key]
        self.map[key] = val
        node = self.root
        for c in key:
            idx = ord(c) - ord('a')
            if node.next[idx] is None:
                node.next[idx] = TrieNode()
            node = node.next[idx]
            node.val += delta

    def sum(self, prefix: str) -> int:
        node = self.root
        for c in prefix:
            idx = ord(c) - ord('a')
            if node.next[idx] is None:
                return 0            
            node = node.next[idx]
        return node.val

 

 

【图解算法】模板+变式——带你彻底搞懂字典树(Trie树)

力扣(LeetCode)官网 - 全球极客挚爱的技术成长平台

  • 19
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值