字典树---trie树

最新推荐文章于 2022-10-31 17:40:50 发布

大瑞大

最新推荐文章于 2022-10-31 17:40:50 发布

阅读量157

点赞数

原文链接：http://www.cnblogs.com/zhuyuan/

版权

字典树（trie 树）

字典树(trie树)

字典树是一种在字符串查找，前缀匹配等方面应用广泛的算法，它在查找字符串时只与被查询的字符串长度有关，所以它在查找时只有O(1)的时间复杂度，但随之而来的较大的空间复杂度。

一.原理分析

如图，字典树的每一个节点是由一个数据域(用来标记是否在此处有字符串终止)与26个指针域(表示26个小写字母)组成(PS:联想链表)。每个节点表示一个字符，我们将我们将要输入的字符串插入字典树中，从根节点到某一节点(具有终止标记，上图红点)，为已插入字符串，上图中的字符串有：abc、abcd、abd、b、bcd、efg、hig(图片转载字百度图片)。

二.分步实现

1.trie树结构体定义

 1 struct node
 2 {
 3     bool k;
 4     node *next[26];    //定义的字典树为只有26个小写字母，可增加减少
 5    node()
 6     {
 7         int i;
 8         for(i=0; i<26; i++)
 9             next[i] = NULL;
10         k = false;
11     }
12     ~node()
13     {
14         int i;
15         for(i=0;i<26;i++)
16         {
17             if(next[i]!=NULL)
18             {
19                 delete next[i];
20             }
21         }
22     }
23 };
24 node *head;

PS：被注释掉部分是用构造函数初始化节点与使用析构函数删除节点。
　　上图代码中的next[26]表示每个节点的下一层节点数量，小写字母为26，加上大写就是52，加上数字就是62，以此类推。而bool型的K则表示该位置是否有终止标记，可根据题意改变为其他标记。

2.字典树的插入

 1 void insert_ch(char *ch)
 2 {
 3     int i;
 4     node *p=head;
 5     for(i=0; ch[i]; i++)
 6     {
 7         if(p->next[ch[i]-'a'] == NULL)          //判断下层节点是否存在
 8             p->next[ch[i]-'a'] = new node;  //开辟新空间
 9         p = p->next[ch[i]-'a'];                    //向下一层进行拓展
10     } 
11     p -> k = true;                                     //进行字符串结尾标记
12 }

每次从根节点进行插入，如果向下的节点已经存在，就直接读取，否则拓展一个新节点。之后将最后一个节点的k标记为true表示该位置有一个字符串结尾。

3.字符串查找

 1 bool find_ch(char *ch)
 2 {
 3     int i;
 4     node *p=head;
 5     for(i=0; ch[i]; i++)
 6     {
 7         if(p->next[ch[i]-'a']==NULL)
 8             return false;
 9         p = p -> next[ch[i]-'a'];
10     }
11     return p -> k;
12 }

基本过程与插入相同，向下查找，入过该节点不存在，直接返回false，如果存在一直向下查找，最终返回末尾标记的k。

1.模板代码

1 #include<algorithm>
 2 #include<iostream>
 3 #include<cstdio>
 4 #include<cmath>
 5 #include<cstring>
 6 using namespace std;
 7 struct node
 8 {
 9     bool k;
10     node *next[26];
11     node()
12     {
13         int i;
14         for(i=0;i<26;i++)
15         {
16             next[i]=NULL;
17             k=false;
18         }
19     }
20     ~node()
21     {
22         int i;
23         for(i=0;i<26;i++)
24         {
25             if(next[i] != NULL)
26                 delete next[i];
27         }
28 
29     }
30 };
31 node *head;
32 void insert_ch(char *ch)
33 {
34     int i;
35     node *p = head;
36     for(i=0;ch[i];i++)
37     {
38         if(p -> next[ch[i]-'a' ] == NULL)
39             p -> next[ch[i]-'a' ] = new node;
40         p = p -> next[ch[i]-'a' ];
41     }
42     p -> k = true;
43 }
44 bool find_ch(char *ch)
45 {
46     int i;
47     node *p = head;
48     for(i=0;ch[i];i++)
49     {
50         if(p -> next[ch[i]-'a' ] == NULL)
51             return false;
52         p = p -> next[ch[i]-'a' ];
53     }
54     return p -> k;
55 }
56 char ch[100];
57 int main()
58 {
59     head = new node;
60     while(~scanf("%s",ch))
61         insert_ch(ch);
62     while(~scanf("%s",ch))
63         printf("%s\n",find_ch(ch) ? "YES" : "NO");
64     delete head;                                     //删除节点
65     return 0;
66 }

三.另一种实现方式

动态分配内存对空间掌控很好，用一个节点开辟一个节点，但是在做题中有时会出现大大小小的问题不好结决，所以可以选择另一种方法，直接开辟出一个很大的结构体数组用来保存节点，不用考虑开辟与删除节点问题。

 1 #include<algorithm>
 2 #include<iostream>
 3 #include<cstdio>
 4 #include<cmath>
 5 #include<cstring>
 6 using namespace std;
 7 struct node
 8 {
 9     bool k;
10     node *next[26];
11 };
12 node no[10000000],*head;
13 int iii;
14 node *new_node()
15 {
16     int i;
17     node *p=&no[iii++];
18     for(i=0;i<26;i++)
19     {
20         p -> next[i] = NULL;
21     }
22     p -> k = false;
23 }
24 void insert_ch(char *ch)
25 {
26     int i;
27     node *p=head,*t;
28     for(i=0; ch[i] ;i++ )
29     {
30         if(p->next[ch[i]-'a']==NULL)
31         {
32             t=new_node();
33             p->next[ch[i]-'a']=t;
34         }
35         p = p->next[ch[i]-'a'];
36     }
37     p -> k = true;
38 }
39 bool find_ch(char ch[])
40 {
41     int i;
42     node *p=head;
43     for(i=0;i<ch[i];i++)
44     {
45         if(p->next[ch[i]-'a']==NULL)
46             return false;
47         p = p -> next[ch[i]-'a'];
48     }
49     return p -> k;
50 }
51 int main()
52 {
53     char s[100];
54     int i,n;
55     head = new_node();
56     while(~scanf("%d",&n))
57     {
58         iii=0;
59         for(i=0;i<n;i++)
60         {
61             scanf("%s",s);
62             insert_ch(s);
63         }
64         scanf("%d",&n);
65         for(i=0;i<n;i++)
66         {
67             scanf("%s",s);
68             printf("%s\n",find_ch(s) ? "YES" : "NO");
69         }
70     }
71     return 0;
72 }

这种方法与上面的基本相同，只是节点不在用new开辟，而是直接从结构体数组中取用，这种方法的弊端节点有最大值，当需要保存的数据量过大时，会数组越界。保存数据量小时对空间的浪费也很严重。

四.相关基础题目

HDOJ 1251 统计难题：
http://acm.hdu.edu.cn/showproblem.php?pid=1251

HDOJ 1671 Phone List：
http://acm.hdu.edu.cn/showproblem.php?pid=1671 //用第一种方法记得释放空间，否则超内存

HDOJ 1247 Hat’s Words：

http://acm.hdu.edu.cn/showproblem.php?pid=1247