classNode(object):def__init__(self, value)->None:
self._children ={}
self._value = value
def_add_child(self, char, value, overwrite=False):
child = self._children.get(char)if child isNone:
child = Node(value)
self._children[char]= child
elif overwrite:
child._value = value
return child
classTrie(Node):def__init__(self)->None:super().__init__(None)def__contains__(self, key):return self[key]isnotNonedef__getitem__(self, key):
state = self
for char in key:
state = state._children.get(char)if state isNone:returnNonereturn state._value
def__setitem__(self, key, value):
state = self
for i, char inenumerate(key):if i <len(key)-1:
state = state._add_child(char,None,False)else:
state = state._add_child(char, value,True)if __name__ =='__main__':
trie = Trie()# 增
trie['自然']='nature'
trie['自然人']='human'
trie['自然语言']='language'
trie['自语']='talk to oneself'
trie['入门']='introduction'assert'自然'in trie
# 删
trie['自然']=Noneassert'自然'notin trie
# 改
trie['自然语言']='human language'assert trie['自然语言']=='human language'# 查assert trie['入门']=='introduction'print(trie)
词典数与最大后向匹配结合
classNode(object):def__init__(self, value)->None:
self._children ={}
self._value = value
def_add_child(self, char, value, overwrite=False):
child = self._children.get(char)if child isNone:
child = Node(value)
self._children[char]= child
elif overwrite:
child._value = value
return child
classTrie(Node):def__init__(self)->None:super().__init__(None)def__contains__(self, key):return self[key]isnotNonedef__getitem__(self, key):
state = self
for char in key:
state = state._children.get(char)if state isNone:returnNonereturn state._value
def__setitem__(self, key, value):
state = self
for i, char inenumerate(key):if i <len(key)-1:
state = state._add_child(char,None,False)else:
state = state._add_child(char, value,True)defbackward_match(dic,text):
i =len(text)-1
word_list =[]while i >0:
word = text[i]for j inrange(i,-1,-1):
long_word = text[j:i+1]if long_word in dic andlen(long_word)>len(word):
word = long_word
word_list.append(word)
i -=len(word)return word_list
dic ={"效果","研究","口红","中国","进口","红酒","中国进口","研究生","起源","生命"}# 1.实例化字典树
trie = Trie()# 2.添加字典到属性for word in dic:
trie[word]=1
text ="研究生命起源"print(backward_match(dic,text))