字典树

最新推荐文章于 2020-03-15 19:58:31 发布

_小青年儿_

最新推荐文章于 2020-03-15 19:58:31 发布

阅读量1k

点赞数

分类专栏：【０８】数据结构与算法文章标签： null tree insert buffer branch struct

本文链接：https://blog.csdn.net/l09711/article/details/6366004

版权

【０８】数据结构与算法专栏收录该内容

11 篇文章 0 订阅

订阅专栏

Trie,又称字典树、单词查找树,是一种树形结构，用于保存大量的字符串。它的优点是：利用字符串的公共前缀来节约存储空间。
相对来说,Trie树是一种比较简单的数据结构.理解起来比较简单,正所谓简单的东西也得付出代价.故Trie树也有它的缺点,Trie树的内存消耗非常大.当然,或许用左儿子右兄弟的方法建树的话,可能会好点.

其基本性质可以归纳为：
1. 根节点不包含字符，除根节点外每一个节点都只包含一个字符。
2. 从根节点到某一节点，路径上经过的字符连接起来，为该节点对应的字符串。
3. 每个节点的所有子节点包含的字符都不相同。

其基本操作有:查找插入和删除,当然删除操作比较少见.我在这里只是实现了对整个树的删除操作,至于单个word的删除操作也很简单.

搜索字典项目的方法为：

(1) 从根结点开始一次搜索；

(2) 取得要查找关键词的第一个字母，并根据该字母选择对应的子树并转到该子树继续进行检索；

(3) 在相应的子树上，取得要查找关键词的第二个字母,并进一步选择对应的子树进行检索。

(4) 迭代过程……

(5) 在某个结点处，关键词的所有字母已被取出，则读取附在该结点上的信息，即完成查找。
其他操作类似处理.

给你100000个长度不超过10的单词。对于每一个单词，我们要判断他出没出现过，如果出现了，第一次出现第几个位置。
这题当然可以用hash来，但是我要介绍的是trie树。在某些方面它的用途更大。比如说对于某一个单词，我要询问它的前缀是否出现过。这样hash就不好搞了，而用trie还是很简单。
现在回到例子中，如果我们用最傻的方法，对于每一个单词，我们都要去查找它前面的单词中是否有它。那么这个算法的复杂度就是O(n^2)。显然对于100000的范围难以接受。现在我们换个思路想。假设我要查询的单词是abcd，那么在他前面的单词中，以b，c，d，f之类开头的我显然不必考虑。而只要找以a开头的中是否存在abcd就可以了。同样的，在以a开头中的单词中，我们只要考虑以b作为第二个字母的……这样一个树的模型就渐渐清晰了……
假设有b，abc，abd，bcd，abcd，efg，hii这6个单词，我们构建的树就是这样的。

对于每一个节点，从根遍历到他的过程就是一个单词，如果这个节点被标记为红色，就表示这个单词存在，否则不存在。
那么，对于一个单词，我只要顺着他从跟走到对应的节点，再看这个节点是否被标记为红色就可以知道它是否出现过了。把这个节点标记为红色，就相当于插入了这个单词。
这样一来我们询问和插入可以一起完成，所用时间仅仅为单词长度，在这一个样例，便是10。
我们可以看到，trie树每一层的节点数是26^i级别的。所以为了节省空间。我们用动态链表，或者用数组来模拟动态。空间的花费，不会超过单词数×单词长度。

应用一：

#include <cstdio>   
#include <iostream>   
#include <cstring>   
using namespace std;   
  
  
const int num_chars = 26;   
  
  
class Trie {   
public:   
      Trie():root(NULL){};   
      Trie(Trie& tr);   
  
     int search(const char* word, char* entry ) const;   
     int insert(const char* word, const char* entry);   
     int remove(const char* word, char* entry);   
private:   
     struct Trie_node   
     {   
         char* data;   
          Trie_node* branch[num_chars];   
          Trie_node();   
     }* root;   
};   
Trie::Trie_node::Trie_node()    
{   
      data = NULL;   
    for (int i=0; i<num_chars; ++i)    
          branch[i] = NULL;   
}   
  
int Trie::search(const char* word, char* entry ) const    
{   
    int position = 0;   
    char char_code;   
     Trie_node *location = root;   
    while( location!=NULL && *word!=0 )    
    {   
        if (*word>='A' && *word<='Z')    
              char_code = *word-'A';   
        else if (*word>='a' && *word<='z')    
              char_code = *word-'a';   
        else return 0;   
  
  
         location = location->branch[char_code];   
         position++;   
         word++;   
    }   
    if ( location != NULL && location->data != NULL )    
    {   
        strcpy(entry,location->data);   
        return 1;   
    }   
    else return 0;   
}   
int Trie::insert(const char* word, const char* entry)    
{   
    int result = 1, position = 0;   
    if ( root == NULL ) root = new Trie_node;   
    char char_code;   
      Trie_node *location = root;   
    while( location!=NULL && *word!=0 )   
    {   
        if (*word>='A' && *word<='Z')    
              char_code = *word-'A';   
        else if (*word>='a' && *word<='z')    
              char_code = *word-'a';   
        else return 0;   
  
  
        if( location->branch[char_code] == NULL )    
              location->branch[char_code] = new Trie_node;   
  
  
          location = location->branch[char_code];   
          position++;   
          word++;   
    }   
    if (location->data != NULL)   
          result = 0;   
    else {   
          location->data = new char[strlen(entry)+1];   
        strcpy(location->data, entry);   
    }   
    return result;   
}   
int main()   
{   
      Trie t;   
      char entry[100];   
      t.insert("aa", "DET");    
      t.insert("abacus","NOUN");   
      t.insert("abalone","NOUN");    
      t.insert("abandon","VERB");   
      t.insert("abandoned","ADJ");    
      t.insert("abashed","ADJ");   
      t.insert("abate","VERB");    
      t.insert("this", "PRON");   
    if (t.search("this", entry))   
        cout<<"'this' was found. pos: "<<entry<<endl;   
    if (t.search("abate", entry))   
        cout<<"'abate' is found. pos: "<<entry<<endl;   
    if (t.search("baby", entry))   
        cout<<"'baby' is found. pos: "<<entry<<endl;   
    else  
        cout<<"'baby' does not exist at all!"<<endl;   
       
    if (t.search("aa", entry))   
        cout<<"'aa was found. pos: "<<entry<<endl;   
}

应用之二：电话本检索

#include<iostream>
#include <set>
#include <string>
#include <queue>
using namespace std;

typedef struct node
{
public:
    node()
    {
        memset(table,0,sizeof(table));
    }

public:
    set<string> name;
    node* table[26];
}Node;

typedef struct trie
{
public:
    trie():count(0)
    {
    }

public:
    Node root;
    int count;
}Trie;

Trie* createTrie()
{
    return new Trie();
}

Node* createNode()
{
    return new Node();
}

bool insert(const char *code,const char *name,Trie *tree)
{
    Node *cur=&tree->root;

while(*code!=0)
    {
        if(cur->table[*code-'a']!=NULL)
        {
            cur=cur->table[*code-'a'];
        }
        else
        {
            Node *next=new Node();
            cur->table[*code-'a']=next;
            cur=next;
        }

++code;
    }

if(cur!=&tree->root)
    {
        if(cur->name.count(name)!=0)
        {
            return false;
        }

cur->name.insert(name);
        return true;
    }
    else
    {
        return false;
    }
}

Node* find(const char *code,Trie *tree)
{
    Node *cur=&tree->root;

while(*code!=0)
    {
        if(cur->table[*code-'a']!=NULL)
        {
            cur=cur->table[*code-'a'];
        }
        else
        {
            return NULL;
        }

++code;
    }

if(cur!=&tree->root)
    {
        return cur;
    }
    else
    {
        return NULL;
    }
}

void update(const char *code,Trie *tree)
{
    Node *p=find(code,tree);

if(p)
    {
        queue<Node*> q;

q.push(p);

while(!q.empty())
        {
            Node *head=q.front();
            q.pop();

for(set<string>::iterator iter=head->name.begin();iter!=head->name.end();++iter)
            {
                cout<<*iter<<endl;
            }

for(int i=0;i<26;++i)
            {
                if(head->table[i]!=NULL)
                {
                    q.push(head->table[i]);
                }
            }
        }
    }
}

int main()
{
    Trie *tree=createTrie();
    insert("liangdong","梁栋",tree);
    insert("liangshen","梁神",tree);

char buffer[100]={0};
    int i=0;

while(true)
    {
        while( (buffer[i++]=getchar())!='/n' )
        {
        }

if(buffer[i-1]==EOF)
        {
            break;
        }

buffer[--i]=0;
        system("cls");
        cout<<buffer<<endl;
        update(buffer,tree);
        fflush(stdin);
    }

return 0;
}