搜索树包括B_树、B+树、Trie树等以及它们的各种变形。用Trie树搜索一个关键码的时间与关键码本身及其长度有关,最快是O(1),即在第一层即可判断是否搜索到,最坏的情况是O(n),n为Trie树的层数。
Trie树的缺点是占内存,因为Trie树的大多数结点分支很少。为了减少空间浪费,可以用两个数组来表示Trie树,也就是双数组Trie树。
一、构建:
首先对词表中所有出现的10个汉字进行编码:啊-1,阿-2,唉-3,根-4,胶-5,拉-6,及-7,廷-8,伯-9,人-10。。对于每一个汉字,需要确定一个base值,使得对于所有以该汉字开头的词,在双数组中都能放下。例如,现在要确定“阿”字的base值,假设以“阿”开头的词的第二个字序列码依次为a1,a2,a3……an,我们必须找到一个值i,使得base[i+a1],check[i+a1],base[i+a2],check[i+a2]……base[i+an],check[i+an]均为0。一旦找到了这个i,“阿”的base值就确定为i。用这种方法构建双数组Trie(Double-ArrayTrie),经过四次遍历,将所有的词语放入双数组中,然后还要遍历一遍词表,修改base值。因为我们用负的base值表示该位置为词语。如果状态i对应某一个词,而且Base=0,那么令Base=(-1)*i,如果Base的值不是0,那么令Base=(-1)*Base。
二、查询:
# -*- coding:utf-8 -*-
import json
import sys
import codecs
class Trie:
def __init__(self):
"""
Initialize your data structure here.
"""
self.root = {}
def insert(self, word):
"""
Inserts a word into the trie.
"""
node = self.root
for s in word:
if s in node.keys():
node = node[s]
else:
node[s] = {}
node = node[s]
node['is_word'] = True
def search(self, word):
"""
Returns if the word is in the trie.
"""
node = self.root
for s in word:
if s in node.keys():
node = node[s]
else:
return False
if 'is_word' in node.keys():
return True
else:
return False
def startsWith(self, prefix):
"""
Returns if there is any word in the trie that starts with the given prefix.
"""
node = self.root
for s in prefix:
if s in node.keys():
node = node[s]
else:
return False
return True
def findTips(self, prefix):
#if not self.startsWith(prefix):
# return []
node = self.root
for s in prefix:
if s in node.keys():
node = node[s]
else:
return False
tips = []
self.loopfindTips(node, prefix, tips)
return tips
def loopfindTips(self, node, prefix, tips):
if ('is_word' in node and node['is_word']):
tips.append(prefix)
return
for s in node.keys():
self.loopfindTips(node[s], prefix+s, tips)
# Your Trie object will be instantiated and called as such:
if __name__ == "__main__":
obj = Trie()
obj.insert(u"北京长城")
obj.insert(u"北京故宫")
obj.insert(u"上海")
for i in obj.findTips(u"北京"):
print i
#include <iostream>
#include <cstdlib>
#include <string>
#include <string.h>
#include <vector>
using namespace std;
const int branchNum = 26;
struct TrieNode
{
bool isStr;
TrieNode *next[branchNum];
TrieNode()
:isStr(false)
{
memset(next , 0 , sizeof(next));
}
};
class Trie
{
public:
Trie();
void insert(const char *word);
bool search(const char *word);
void deleteTrie(TrieNode *root);
vector<string> findTips(const char *prefix); //here
void findTips(TrieNode *root , string track , vector<string> &tips); //here
private:
TrieNode *root;
};
Trie::Trie()
{
root = new TrieNode(); //绗竴涓猲ode鏃犵敤
}
void Trie::insert(const char *word)
{
TrieNode *location = root;
while(*word) {
if(location->next[*word - 'a'] == NULL) {
TrieNode *tmp = new TrieNode();
location->next[*word - 'a'] = tmp;
}
location = location->next[*word - 'a'];
word++;
}
location->isStr = true;
}
bool Trie::search(const char *word)
{
TrieNode *location = root;
while(*word && location) {
location = location->next[*word - 'a'];
word ++;
}
return (location != NULL && location->isStr);
}
void Trie::deleteTrie(TrieNode *root)
{
for(int i = 0 ; i < branchNum ; i++) {
if(root->next[i] != NULL) {
deleteTrie(root->next[i]);
}
}
delete root;
}
vector<string> Trie::findTips(const char *prefix)
{
vector<string> tips;
string track(prefix);
cout<< track<<endl;
int i;
struct TrieNode *node;
for(i = 0 ,node = root ; i < strlen(prefix) && node != NULL ; i++) {
node = node->next[prefix[i] - 'a'];
}
if(node == NULL && i != strlen(prefix))
return tips; //empty
findTips(node, track , tips);
return tips;
}
void Trie::findTips(TrieNode *root,string track , vector<string> &tips) //track长度最多为最长字符串长度,
{
if(root == NULL)
return;
if(root->isStr)
tips.push_back(track);
for(int i = 0 ; i < branchNum ; i++) {
findTips(root->next[i] , track + (char)('a'+i) , tips);
}
}
int main()
{
Trie t;
t.insert("a");
t.insert("abandon");
t.insert("abandoned");
t.insert("abashed");
if(t.search("abashed"))
cout << "true" << endl;
vector<string> tips = t.findTips("ab");
cout << "ab" << " tips are :(" << tips.size() <<")" <<endl;
for(int i = 0 ; i < tips.size() ; i ++)
cout << tips[i] << endl;
}
参考:
https://wenku.baidu.com/view/2f1c8b18ed630b1c58eeb528.html
https://wenku.baidu.com/view/fcf1c56a561252d380eb6e1d.html
https://wenku.baidu.com/view/71e82437f111f18583d05a54.html?rec_flag=default&mark_pay_doc=2&mark_rec_page=1&mark_rec_position=5&mark_rec=view_r_1&clear_uda_param=1