前缀树,也叫字典树,Trie
前缀树 是一种树形数据结构,用于高效地存储和检索字符串数据集中的键。这一数据结构有相当多的应用情景,例如自动补完和拼写检查。
可以简单地认为,Trie是一种26叉树,每一个节点都有26个子节点,对应a-z。
单词不存储于字典树的节点内,而是字典树从根节点到该节点的路径
在具体实现上,python使用字典性能更好,而C++则是使用数组性能更好
字典树练习题集 https://leetcode-cn.com/tag/trie/problemset/实现 Trie (前缀树)
板子
class Trie:
def __init__(self):
self.root={}
def insert(self, word: str) -> None:
cur=self.root
for w in word: # in 判断键是否在字典中
if w not in cur:
cur[w]={}
cur = cur[w]
cur['#']='#'
def search(self, word: str) -> bool:
cur = self.root
for w in word:
if w not in cur:
return False
cur = cur[w]
return '#' in cur
def startsWith(self, prefix: str) -> bool:
cur = self.root
for w in prefix:
if w not in cur:
return False
cur = cur[w]
return True
添加与搜索单词 - 数据结构设计
注意
if cur[c]=='#':continue
,不能忘了
class WordDictionary:
def __init__(self):
self.root={}
def addWord(self, word: str) -> None:
cur=self.root
for w in word:
if w not in cur:
cur[w]={}
cur = cur[w]
cur['#']='#'
def search(self, word: str) -> bool:
def dfs(cur,word):
for i,w in enumerate(word):
if w=='.':
for c in cur:
if cur[c]=='#':continue
if dfs(cur[c],word[i+1:]):return True
return False
if w not in cur:
return False
cur=cur[w]
return '#' in cur
return dfs(self.root,word)
词典中最长的单词
class Solution:
def longestWord(self, words: List[str]) -> str:
root={}
def insert(word):
cur = root
for w in word:
if w not in cur:
cur[w]={}
cur= cur[w]
cur['#'] = '#'
def helper(word):
'''判断该单词是否由words词典中其他单词逐步添加一个字母组成'''
cur=root
for w in word:
cur=cur[w]
if '#' not in cur:
return False
return True
for w in words:
insert(w)
res=0 # 最大长度
for w in words:
if helper(w):
res=max(res,len(w))
ans=[]
for w in words:
if helper(w) and len(w)==res:
ans.append(w)
ans.sort()
return "" if not ans else ans[0]
字典序排数
字典树+DFS
class Solution:
def lexicalOrder(self, n: int) -> List[int]:
root={}
def insert(word):
cur=root
for w in word:
if w not in cur:
cur[w]={}
cur=cur[w]
cur['#']='#'
for i in range(1,n+1):
insert(str(i))
def dfs(root,path):
if '#' in root:
res.append(''.join(path[:]))
if len(root)==1:
return
for r in root:
if r=='#':continue
path.append(r)
dfs(root[r],path)
path.pop()
res=[]
dfs(root,[])
return list(map(int,res))
单词替换
找到一个单词的前缀直接返回,可以保证最短
class Solution:
def replaceWords(self, dictionary: List[str], sentence: str) -> str:
root={}
def insert(word):
cur=root
for w in word:
if w not in cur:
cur[w]={}
cur=cur[w]
cur['#']='#'
def match(word):
cur=root
res=''
for w in word:
if w not in cur:
return
cur=cur[w]
res+=w
if cur.get('#'):
return res
for d in dictionary:
insert(d)
res=[]
for word in sentence.split():
tmp=match(word)
if not tmp:
res.append(word)
else:
res.append(tmp)
return ' '.join(res)
实现一个魔法字典
单词比如hello,可以构造候选单词列表:aello……zello,hallo……hzllo,然后在trie里面搜索
class MagicDictionary:
def __init__(self):
self.root={}
def buildDict(self, dictionary: List[str]) -> None:
def insert(word):
cur=self.root
for w in word:
if w not in cur:
cur[w]={}
cur=cur[w]
cur['#']='#'
for d in dictionary:
insert(d)
def search(self, searchWord: str) -> bool:
def dfs(word):
cur=self.root
for w in word:
if w not in cur:
return False
cur=cur[w]
return cur.get('#')
searchWord=list(searchWord)
words=[chr(i) for i in range(ord('a'),ord('z')+1)]
for i,w in enumerate(searchWord):
for x in words:
if x!=w:
newWord=searchWord[:i]+[x]+searchWord[i+1:]
if dfs(''.join(newWord)):
return True
return False
键值映射
找到根节点后,遍历多叉树累加val
class MapSum:
def __init__(self):
self.root={}
def insert(self, key: str, val: int) -> None:
cur=self.root
for k in key:
if k not in cur:
cur[k]={}
cur=cur[k]
cur['#']=val #!!!
def sum(self, prefix: str) -> int:
cur=self.root
res=[0]
for p in prefix:
if p not in cur:
return 0
cur=cur[p] # 找到根节点
def dfs(root):
if root.get('#'):
res[0]+=root['#']
for r in root:
if isinstance(root[r],dict):
dfs(root[r])
dfs(cur)
return res[0]
面试题 16.02. 单词频率
用cur[’#’]记录频率
class WordsFrequency:
def __init__(self, book: List[str]):
self.root={}
def insert(word):
cur=self.root
for w in word:
if w not in cur:
cur[w]={}
cur=cur[w]
if cur.get('#'):
cur['#']+=1
else:
cur['#']=1
for b in book:
insert(b)
def get(self, word: str) -> int:
cur=self.root
for w in word:
if w not in cur:
return 0
cur=cur[w]
if cur.get('#'):
return cur['#']
else:
return 0