Tire树(字典树)

一、概述

  1、基本概念

  字典树,又称为单词查找树,Tire数,是一种树形结构,它是一种哈希树的变种。

     2、基本性质

  • 根节点不包含字符,除根节点外的每一个子节点都包含一个字符
  • 从根节点到某一节点。路径上经过的字符连接起来,就是该节点对应的字符串
  • 每个节点的所有子节点包含的字符都不相同

 3、应用场景

   典型应用是用于统计,排序公共字符串(不仅限于字符串),经常被搜索引擎系统用于文本词频统计。

    1)、字符串的快速查找
          给出N个单词组成的熟词表,以及一篇全用小写英文书写的文章,请你按最早出现的顺序写出所有不在熟词表中的生词。      在这道题中,我们可以用数组枚举,用哈希,用字典树,先把熟词建一棵树,然后读入文章进行比较,这种方法效率是比较高  的。

    2)、字典树在“串”排序方面的应用
        给定N个互不相同的仅由一个单词构成的英文名,让你将他们按字典序从小到大输出用字典树进行排序,采用数组的方式创    建字典树,这棵树的每个节点的所有儿子很显然地按照其字母大小排序,对这棵树进行先序遍历即可。

    3)、字典树在最长公共前缀问题的应用
        对所有串建立字典树,对于两个串的最长公共前缀的长度即他们所在的节点的公共

 4、优点

 利用字符串的公共前缀来减少查询时间,最大限度的减少无谓的字符串比较,查询效率比哈希树高。

 实例:  统计含有相同前缀的单词个数的实现

       以属性prefixCnt来统计,经过该节点的单词一共有多少个,这样,在给定一个前缀的时候,按照查询字符串的方法,一步步往下查找,途中出现未找到的字符,则返回0。否则到达包含前缀的最后一个字符的节点,直接返回该节点的prefixCnt的值即可。然而该值是在进行插入单词时更新的,目前的处理只能针对没有重复插入同一个单词的情况。

代码1:

#include <queue>
#include <iostream>
using namespace std;
 
const int size = 26;
 
struct TrieTreeNode
{
	char val;
	bool isEnd;
	int childCnt;
	int prefixCnt;
	TrieTreeNode *child[size];
	TrieTreeNode(char _val)
		:val(_val),isEnd(false),childCnt(0),prefixCnt(0)
	{
		memset(child,NULL,sizeof(child));//not 26!!
	}
};
 
void Insert(TrieTreeNode *&root, const char *word)
{
	TrieTreeNode *p = root;
	for (int i = 0; i < strlen(word); i++)
	{
		if(p->child[word[i]-'a'] == NULL)
		{
			p->child[word[i]-'a'] = new TrieTreeNode(word[i]);
			p->childCnt++;
		}
		//notice!!this line need to be optimized to handle with duplicated insertion
		p->child[word[i]-'a']->prefixCnt++;
 
		p = p->child[word[i]-'a'];
	}
	p->isEnd = true;
}
 
bool Find(TrieTreeNode *root, const char *word)
{
	TrieTreeNode *p = root;
	for (int i = 0; i < strlen(word); i++)
	{
		if (p->child[word[i]-'a'] == NULL)
			return false;
		p = p->child[word[i]-'a'];
	}
	return p->isEnd;
}
 
void LevelOrderTraverse(TrieTreeNode *root)
{
	if(root == NULL)
		return;
	queue<TrieTreeNode *> Q;
	Q.push(root);
	while (!Q.empty())
	{
		TrieTreeNode *p = Q.front();
		cout << p->val << "(" << p->childCnt << ") ";
		for (int i = 0; i < size; i++)
		{
			if(p->child[i] != NULL)
				Q.push(p->child[i]);
		}
		Q.pop();
	}
	cout << "\n";
}
 
void PreOrderTraverse(TrieTreeNode *treeNode)
{
	if (treeNode != NULL)
	{
		cout << treeNode->val << "(" << treeNode->childCnt << ") ";
		for (int i = 0; i < size; i++)
		{
			PreOrderTraverse(treeNode->child[i]);
		}
	}
}
 
void PostOrderTraverse(TrieTreeNode *treeNode)
{
	if (treeNode != NULL)
	{
		for (int i = 0; i < size; i++)
		{
			PostOrderTraverse(treeNode->child[i]);
		}
		cout << treeNode->val << "(" << treeNode->childCnt << ") ";
	}
}
 
void MakeEmpty(TrieTreeNode *&treeNode)
{
	if (treeNode != NULL)
	{
		for (int i = 0; i < size; i++)
		{
			MakeEmpty(treeNode->child[i]);
		}
		delete treeNode;
	}
	treeNode = NULL;
}
 
void BuildTrieTree(TrieTreeNode *&root,const char *words[], int n)
{
	for (int i = 0; i < n; i++)
	{
		Insert(root,words[i]);
	}
}
 
bool Remove(TrieTreeNode *&treeNode, const char *word,int pos, int n) 
{
	if (pos == n)
	{
		treeNode->isEnd = false;//set the node not to be an end
		//if the last node contains the last char is a leaf,return true to delete it
		return treeNode->childCnt == 0;
	}
 
	//not found, not delete this node
	if (treeNode->child[word[pos]-'a'] == NULL) 
		return false;
 
	//if true, the child is a leaf, delete the child
	if ( Remove( treeNode->child[word[pos]-'a'], word, pos+1, n))
	{ 
		delete treeNode->child[word[pos]-'a'];
		treeNode->child[word[pos]-'a'] = NULL;
 
		treeNode->prefixCnt--;
		//if the node becomes a leaf and is not an end,return true to delete it
		if (--treeNode->childCnt == 0 && treeNode->isEnd == false)
			return true;
	}
 
	//other not delete
	return false;
}
 
//Count the number of words which contain the specific prefix
int CountWordsWithPrefix(TrieTreeNode *root, const char *prefix)
{
	TrieTreeNode *p = root;
	for (int i = 0; i < strlen(prefix); i++)
	{
		if(p->child[prefix[i]-'a'] == NULL)
			return 0;
		
		p = p->child[prefix[i]-'a'];
	}
	return p->prefixCnt;
}
 
int main()
{
	TrieTreeNode *root = new TrieTreeNode('\0');
	//const char *words[] = {"b","abc","abd","bcd","abcd","efg","hii"}; //test insert
	//cout << sizeof(words) << "\n";//(4*7=28)
	//cout << sizeof(words[0]) << "\n";//4(is a pointer)
 
	const char *words[] = {"abc","ad","ef"};//test remove
	BuildTrieTree(root,words,sizeof(words)/sizeof(words[0]));
	LevelOrderTraverse(root);
	PreOrderTraverse(root);
	cout << '\n';
	PostOrderTraverse(root);
	cout << "\n";
 
	if (Find(root,"ef"))
		cout << "ef found" << endl;
	else cout << "ef not found" <<endl;
 
	Insert(root,"e");
	//after this insertion.the node 'e' becomes a end but it's not a leaf,
	//so it can not be deleted unless its leaf(leaves) is deleted
	LevelOrderTraverse(root);
 
    Remove(root,"ef",0,strlen("ef"));
	LevelOrderTraverse(root);
 
	Remove(root,"e",0,strlen("e"));
	LevelOrderTraverse(root);
 
	cout << CountWordsWithPrefix(root,"a")<<endl;
	Remove(root,"ad",0,strlen("ad"));
	cout << CountWordsWithPrefix(root,"a")<<endl;
 
	MakeEmpty(root);
	
	return 0;
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值