Tire树（字典树）

最新推荐文章于 2022-06-14 17:22:22 发布

good-destiny

最新推荐文章于 2022-06-14 17:22:22 发布

阅读量4.7k

点赞数 5

分类专栏：数据结构与算法文章标签：字典树

本文链接：https://blog.csdn.net/tuwenqi2013/article/details/89389983

版权

数据结构与算法专栏收录该内容

20 篇文章 1 订阅

订阅专栏

一、概述

　　1、基本概念

　　字典树，又称为单词查找树，Tire数，是一种树形结构，它是一种哈希树的变种。

2、基本性质

根节点不包含字符，除根节点外的每一个子节点都包含一个字符
从根节点到某一节点。路径上经过的字符连接起来，就是该节点对应的字符串
每个节点的所有子节点包含的字符都不相同

　3、应用场景

　典型应用是用于统计，排序和公共字符串(不仅限于字符串)，经常被搜索引擎系统用于文本词频统计。

1)、字符串的快速查找
给出N个单词组成的熟词表，以及一篇全用小写英文书写的文章，请你按最早出现的顺序写出所有不在熟词表中的生词。在这道题中，我们可以用数组枚举，用哈希，用字典树，先把熟词建一棵树，然后读入文章进行比较，这种方法效率是比较高的。

2)、字典树在“串”排序方面的应用
给定N个互不相同的仅由一个单词构成的英文名，让你将他们按字典序从小到大输出用字典树进行排序，采用数组的方式创建字典树，这棵树的每个节点的所有儿子很显然地按照其字母大小排序,对这棵树进行先序遍历即可。

3)、字典树在最长公共前缀问题的应用
对所有串建立字典树，对于两个串的最长公共前缀的长度即他们所在的节点的公共

　4、优点

　利用字符串的公共前缀来减少查询时间，最大限度的减少无谓的字符串比较，查询效率比哈希树高。

实例： 统计含有相同前缀的单词个数的实现

以属性prefixCnt来统计，经过该节点的单词一共有多少个，这样，在给定一个前缀的时候，按照查询字符串的方法，一步步往下查找，途中出现未找到的字符，则返回0。否则到达包含前缀的最后一个字符的节点，直接返回该节点的prefixCnt的值即可。然而该值是在进行插入单词时更新的，目前的处理只能针对没有重复插入同一个单词的情况。

代码1：

#include <queue>
#include <iostream>
using namespace std;
 
const int size = 26;
 
struct TrieTreeNode
{
	char val;
	bool isEnd;
	int childCnt;
	int prefixCnt;
	TrieTreeNode *child[size];
	TrieTreeNode(char _val)
		:val(_val),isEnd(false),childCnt(0),prefixCnt(0)
	{
		memset(child,NULL,sizeof(child));//not 26!!
	}
};
 
void Insert(TrieTreeNode *&root, const char *word)
{
	TrieTreeNode *p = root;
	for (int i = 0; i < strlen(word); i++)
	{
		if(p->child[word[i]-'a'] == NULL)
		{
			p->child[word[i]-'a'] = new TrieTreeNode(word[i]);
			p->childCnt++;
		}
		//notice!!this line need to be optimized to handle with duplicated insertion
		p->child[word[i]-'a']->prefixCnt++;
 
		p = p->child[word[i]-'a'];
	}
	p->isEnd = true;
}
 
bool Find(TrieTreeNode *root, const char *word)
{
	TrieTreeNode *p = root;
	for (int i = 0; i < strlen(word); i++)
	{
		if (p->child[word[i]-'a'] == NULL)
			return false;
		p = p->child[word[i]-'a'];
	}
	return p->isEnd;
}
 
void LevelOrderTraverse(TrieTreeNode *root)
{
	if(root == NULL)
		return;
	queue<TrieTreeNode *> Q;
	Q.push(root);
	while (!Q.empty())
	{
		TrieTreeNode *p = Q.front();
		cout << p->val << "(" << p->childCnt << ") ";
		for (int i = 0; i < size; i++)
		{
			if(p->child[i] != NULL)
				Q.push(p->child[i]);
		}
		Q.pop();
	}
	cout << "\n";
}
 
void PreOrderTraverse(TrieTreeNode *treeNode)
{
	if (treeNode != NULL)
	{
		cout << treeNode->val << "(" << treeNode->childCnt << ") ";
		for (int i = 0; i < size; i++)
		{
			PreOrderTraverse(treeNode->child[i]);
		}
	}
}
 
void PostOrderTraverse(TrieTreeNode *treeNode)
{
	if (treeNode != NULL)
	{
		for (int i = 0; i < size; i++)
		{
			PostOrderTraverse(treeNode->child[i]);
		}
		cout << treeNode->val << "(" << treeNode->childCnt << ") ";
	}
}
 
void MakeEmpty(TrieTreeNode *&treeNode)
{
	if (treeNode != NULL)
	{
		for (int i = 0; i < size; i++)
		{
			MakeEmpty(treeNode->child[i]);
		}
		delete treeNode;
	}
	treeNode = NULL;
}
 
void BuildTrieTree(TrieTreeNode *&root,const char *words[], int n)
{
	for (int i = 0; i < n; i++)
	{
		Insert(root,words[i]);
	}
}
 
bool Remove(TrieTreeNode *&treeNode, const char *word,int pos, int n) 
{
	if (pos == n)
	{
		treeNode->isEnd = false;//set the node not to be an end
		//if the last node contains the last char is a leaf,return true to delete it
		return treeNode->childCnt == 0;
	}
 
	//not found, not delete this node
	if (treeNode->child[word[pos]-'a'] == NULL) 
		return false;
 
	//if true, the child is a leaf, delete the child
	if ( Remove( treeNode->child[word[pos]-'a'], word, pos+1, n))
	{ 
		delete treeNode->child[word[pos]-'a'];
		treeNode->child[word[pos]-'a'] = NULL;
 
		treeNode->prefixCnt--;
		//if the node becomes a leaf and is not an end,return true to delete it
		if (--treeNode->childCnt == 0 && treeNode->isEnd == false)
			return true;
	}
 
	//other not delete
	return false;
}
 
//Count the number of words which contain the specific prefix
int CountWordsWithPrefix(TrieTreeNode *root, const char *prefix)
{
	TrieTreeNode *p = root;
	for (int i = 0; i < strlen(prefix); i++)
	{
		if(p->child[prefix[i]-'a'] == NULL)
			return 0;
		
		p = p->child[prefix[i]-'a'];
	}
	return p->prefixCnt;
}
 
int main()
{
	TrieTreeNode *root = new TrieTreeNode('\0');
	//const char *words[] = {"b","abc","abd","bcd","abcd","efg","hii"}; //test insert
	//cout << sizeof(words) << "\n";//(4*7=28)
	//cout << sizeof(words[0]) << "\n";//4(is a pointer)
 
	const char *words[] = {"abc","ad","ef"};//test remove
	BuildTrieTree(root,words,sizeof(words)/sizeof(words[0]));
	LevelOrderTraverse(root);
	PreOrderTraverse(root);
	cout << '\n';
	PostOrderTraverse(root);
	cout << "\n";
 
	if (Find(root,"ef"))
		cout << "ef found" << endl;
	else cout << "ef not found" <<endl;
 
	Insert(root,"e");
	//after this insertion.the node 'e' becomes a end but it's not a leaf,
	//so it can not be deleted unless its leaf(leaves) is deleted
	LevelOrderTraverse(root);
 
    Remove(root,"ef",0,strlen("ef"));
	LevelOrderTraverse(root);
 
	Remove(root,"e",0,strlen("e"));
	LevelOrderTraverse(root);
 
	cout << CountWordsWithPrefix(root,"a")<<endl;
	Remove(root,"ad",0,strlen("ad"));
	cout << CountWordsWithPrefix(root,"a")<<endl;
 
	MakeEmpty(root);
	
	return 0;
}