字符串集合常用字典树存储,这是一种字符串上的树形数据结构。字典树中每条边都对应一个字,从根节点往下的路径构成一个个字符串。字典树并不直接在节点上存储字符串,而是将词语视作根节点到某节点之间的一条路径,并在终点节点上做个标记“该节点对应词语的结尾”。字符串就是一条路径,要查询一个单词,只需要顺着这条路径从根节点往下走。如果能走到特殊标记的节点,则说明该字符串在集合中,否则说明不存在。
字典树的节点实现
class Node(object):
def __init__(self, value) -> None:
self._children = {}
self._value = value
def _add_child(self, char, value, overwrite=False):
child = self._children.get(char)
if child is None:
child = Node(value)
self._children[char] = child
elif overwrite:
child._value = value
return child
字典树增删改查实现
“增删改查都是查”
class Node(object):
def __init__(self, value) -> None:
self._children = {}
self._value = value
def _add_child(self, char, value, overwrite=False):
child = self._children.get(char)
if child is None:
child = Node(value)
self._children[char] = child
elif overwrite:
child._value = value
return child
class Trie(Node):
def __init__(self) -> None:
super().__init__(None)
def __contains__(self, key):
return self[key] is not None
def __getitem__(self, key):
state = self
for char in key:
state = state._children.get(char)
if state is None:
return None
return state._value
def __setitem__(self, key, value):
state = self
for i, char in enumerate(key):
if i < len(key) - 1:
state = state._add_child(char, None, False)
else:
state = state._add_child(char, value, True)
if __name__ == '__main__':
trie = Trie()
# 增
trie['自然'] = 'nature'
trie['自然人'] = 'human'
trie['自然语言'] = 'language'
trie['自语'] = 'talk to oneself'
trie['入门'] = 'introduction'
assert '自然' in trie
# 删
trie['自然'] = None
assert '自然' not in trie
# 改
trie['自然语言'] = 'human language'
assert trie['自然语言'] == 'human language'
# 查
assert trie['入门'] == 'introduction'