字典树算法

最新推荐文章于 2023-06-04 16:32:44 发布

Wprofessor

最新推荐文章于 2023-06-04 16:32:44 发布

阅读量527

点赞数 2

本文链接：https://blog.csdn.net/Wprofessor/article/details/86547214

版权

字典树

本文将包括以下内容：

字典树的基本概念
字典树的结构
字典树的初始化
字典树的插入
根据单词前缀来遍历全部结果

1.字典树的基本概念

Trie字典树主要用于存储字符串，Trie 的每个 Node 保存一个字符。用链表来描述的话，就是一个字符串就是一个链表。每个Node都保存了它的所有子节点。字典树顾名思义是一个树形结构，它的创建和二叉树类似。它可以实现前缀搜索。

Trie 树利用字符串的公共前缀，逐层建立起一棵多叉树。在检索时类似于在字典中查单词，从第一个字符开始遍历，在 Trie 树中一层一层往下查找，查找效率可以达到 O(n)，n 为查找字符串的长度。

Trie 树有以下特点：

1.Trie 树的根结点上不存储字符，其余结点上存且只存一个字符。

2.从根结点出发，到某一结点上经过的字符，即是该结点对应的前缀。每个结点的孩子结点存储的字符各不相同。

3.Trie 树牺牲空间来换取时间，当数据量很大时，会占用很大空间。如果字符串均由小写字母组成，则每个结点最多会有 2626 个孩子结点，则最多会有 26n26n 个用于存储的结点，nn 为字符串的最大长度。（实际上这个可以优化）

4.Trie 树常用于字符串的快速检索，字符串的快速排序与去重，文本的词频统计等。

下图是一棵以词：a、to、tea、ted、ten、i、in、inn构成的字典树，其中带下划线的结点为终端结点（从根结点到终端结点的遍历过程就是 Trie 中存储的一个字符串）。注意$表示的根节点，是空的

在这里插入图片描述

2.字典树的结构

因为我们要查找公共前缀的所有单词，因此在这里加入isStr数据域用于判断是否构成完整单词

//字典树 
typedef struct tire_node {
	int count;		       //记录该节点代表的单词个数
	char word[MAXSIZE];
	bool isStr;		       //标记该节点是否构成完整单词
	struct tire_node *children[MAXSIZE];	//各个子节点
} tire;

3.字典树的初始化

//相当于建立带头节点的树 
tire *initTire() {	//初始化
	tire *root;
	root = (tire *)malloc(sizeof(tire));
	root->count = 0;
	root->isStr = false;
	int i;
	for (i = 0; i < MAXSIZE; i++)
	{
		root->children[i] = NULL;
	}
	return root;
}

4.字典树的插入

void tireInsert(tire **root, char *word) {
	tire *node = *root;
	int i = 0;
	int j;
	int id;
	while (word[i])
	{
		id = word[i] - 'a';
		if (!node->children[id])		//如果没找到相应的字符 
		{
			node->children[id] = new tire();	//开辟空间
			for (j = 0; j < MAXSIZE; j++)
			{
				node->children[id]->children[j] = NULL;
			}
			node->children[id]->count = 0;
			node->children[id]->isStr = false;
		}
		node = node->children[id];
		node->count++;
		i++;
	}
	node->isStr = true;
	strcpy(node->word,word);
}

注意：

插入的时候咱们这里默认按照字符从小到大的顺序插入，因此所建立的树如果按照层次遍历的话是一个递增序列

5.根据单词前缀来遍历全部结果

//根据单词前缀查找前缀所在的节点位置 
tire * tireSearch(tire *root, char *word) {
	tire *node = root;
	int i = 0;
	while (word[i])
	{
		int id = word[i] - 'a';
		if (node->children[id])
		{
			node = node->children[id];
			i++;
		}
		//如果没找到，返回空节点 
		else	 
		{
			return NULL;
		}
	}
	return node;
}

在这里用到了BFS的思想

//BFS遍历打印出满足前缀的图书信息
void printTire(tire *root,char *front) {
	int count = 1;
	tire *node = tireSearch(root, front);
	int i;
	if (!node)
	{
		cout << "未匹配到您需要的信息，请重新输入" << endl;
	}
	else
	{
		tire *queue[MAXSIZE];
		int left = 0, right = 0;
		queue[right++] = node;
		//如果队列不为空 
		while (left < right)
		{
			tire *p = queue[left++];
			//如果当前节点表示的是完整的单词就输出 
			if (p->count != 0 && p->isStr)
			{
				cout << p->word<< endl;
			}
			//将当前节点的孩子都加入队列 
			for (i = 0; i < MAXSIZE; i++)
			{
				if (p->children[i])
				{
					queue[right++] = p->children[i];
				}
			}
		}
	}
}

完整代码：

#include<bits/stdc++.h>
#define MAXSIZE 50 
using namespace std;
//字典树 
typedef struct tire_node {
	int count;		//记录该节点代表的单词个数
	char word[MAXSIZE];
	bool isStr;		//标记该节点是否构成完整单词
	struct tire_node *children[MAXSIZE];	//各个子节点
} tire;
//相当于建立带头节点的树 
tire *initTire() {	//初始化
	tire *root;
	root = (tire *)malloc(sizeof(tire));
	root->count = 0;
	root->isStr = false;
	int i;
	for (i = 0; i < MAXSIZE; i++)
	{
		root->children[i] = NULL;
	}
	return root;
}
void tireInsert(tire **root, char *word) {
	tire *node = *root;
	int i = 0;
	int j;
	int id;
	while (word[i])
	{
		id = word[i] - 'a';
		if (!node->children[id])		//如果没找到相应的字符 
		{
			node->children[id] = new tire();	//开辟空间
			for (j = 0; j < MAXSIZE; j++)
			{
				node->children[id]->children[j] = NULL;
			}
			node->children[id]->count = 0;
			node->children[id]->isStr = false;
		}
		node = node->children[id];
		node->count++;
		i++;
	}
	node->isStr = true;
	strcpy(node->word,word);
}
//根据单词前缀查找前缀所在的节点位置 
tire * tireSearch(tire *root, char *word) {
	tire *node = root;
	int i = 0;
	while (word[i])
	{
		int id = word[i] - 'a';
		if (node->children[id])
		{
			node = node->children[id];
			i++;
		}
		//如果没找到，返回空节点 
		else	 
		{
			return NULL;
		}
	}
	return node;
}
//BFS遍历打印出满足前缀的图书信息
void printTire(tire *root,char *front) {
	int count = 1;
	tire *node = tireSearch(root, front);
	int i;
	if (!node)
	{
		cout << "未匹配到您需要的信息，请重新输入" << endl;
	}
	else
	{
		tire *queue[MAXSIZE];
		int left = 0, right = 0;
		queue[right++] = node;
		//如果队列不为空 
		while (left < right)
		{
			tire *p = queue[left++];
			//如果当前节点表示的是完整的单词就输出 
			if (p->count != 0 && p->isStr)
			{
				cout << p->word<< endl;
			}
			//将当前节点的孩子都加入队列 
			for (i = 0; i < MAXSIZE; i++)
			{
				if (p->children[i])
				{
					queue[right++] = p->children[i];
				}
			}
		}
	}
}
int main(){
	tire *node;
	node = initTire();
	tireInsert(&node,"wangjiahao");
	tireInsert(&node,"wangli");
	tireInsert(&node,"wanggayhao");
	tireInsert(&node,"wangnima");
	tireInsert(&node,"zhangwanlin");
	tireInsert(&node,"zhangsan");
	char x[MAXSIZE];
	cout<<"请输入您查询的名字"<<endl;
	cin>>x;
	printTire(node,x);
	return 0;
}

本人菜鸟，如果有不对的地方大佬们请留言，在下一定吸取教训及时改正，谢谢！！！

Wprofessor

关注

2
点赞
踩
1

收藏

觉得还不错? 一键收藏
1
评论
字典树算法

字典树本文将包括以下内容：字典树的基本概念字典树的结构字典树的初始化字典树的插入根据单词前缀来遍历全部结果1.字典树的基本概念Trie字典树主要用于存储字符串，Trie 的每个 Node 保存一个字符。用链表来描述的话，就是一个字符串就是一个链表。每个Node都保存了它的所有子节点。字典树顾名思义是一个树形结构，它的创建和二叉树类似。它可以实现前缀搜索。T...
复制链接

扫一扫