UVA 11732-"strcmp()" Anyone?-trie（左儿子右兄弟表示法(省空间)）

最新推荐文章于 2023-11-28 23:23:25 发布

yuhong_liu

最新推荐文章于 2023-11-28 23:23:25 发布

阅读量755

点赞数

分类专栏：数据结构字典树

本文链接：https://blog.csdn.net/viphong/article/details/48368645

版权

数据结构字典树专栏收录该内容

7 篇文章 0 订阅

订阅专栏

本题数据略大，显然不能用普通的儿子节点法建立trie，因此采用左儿子右兄弟表示法建树，其实就是二叉树表示法，本质就是把原来的26叉树转成2叉树，时间复杂度常数增大，空间复杂度尽可能减小

我们知道26叉树中，如果数据不那么变态，应该是大部分节点都是浪费掉的，而使用二叉树建树，能充分利用空间，缺点就是原来访问某个节点的子节点如果查询子节点中有没‘a’字符，我们只需要访问ch[u][0]即可，O（1），而在二叉树中，我们得遍历他所有的儿子节点，最差的情况是O（26）

当然，如果数据出到了最极端的情况，那么两种方法就没什么区别了。

本题按照LRJ的做法是把单词包括最后的‘\0’都插入字典树了。。其实也可以不插入。就是要判断一下，我2种方法都写了一下，时间差不多

分别是 512ms,492ms

插入‘\0’：

// UVa11732 strcmp() Anyone?
// Yuhong Liu
#include<cstring>
#include<vector>
#include<cstdio>
#include<algorithm>
using namespace std;

const int maxnode = 4005 * 1005 + 10; 
int  ok,ok2; 
__int64      ans; // 答案
// 字母表为全体小写字母的Trie	
struct Trie 
{
	int head[maxnode]; // head[i]为第i个结点的左儿子编号
	int next[maxnode]; // next[i]为第i个结点的右兄弟编号
	
	char ch[maxnode];  // ch[i]为第i个结点上的字符
	int tot[maxnode];  // tot[i]为第i个结点为根的子树包含的叶结点总数
	int sz; // 结点总数
	
	void clear() { sz = 1; tot[0] = head[0] = next[0] = 0; } // 初始时只有一个根结点
	
	// 插入字符串s（包括最后的'\0'），沿途更新tot
	void insert(const char *s) 
	{
		int u = 0, v, n = strlen(s);
		tot[0]++;
		for(int i = 0; i <=n; i++) 
		{
			// 找字符a[i]
			bool found = false;
			for(v = head[u]; v != 0; v = next[v])
			{
				if(ch[v] == s[i]) 
				{ // 找到了
					found = true;
					break;
				}
			}
			if(!found)
			{
				v = sz++; // 新建结点
				tot[v] = 0;
				ch[v] = s[i];
				next[v] = head[u];
				head[u] = v; // 插入到链表的首部
				head[v] = 0;
			}
			
			tot[v]++;
			u = v;
		} 
	} 
	
	
	// 统计
	__int64   cal(int depth,int u) 
	{
		if (ch[u]=='\0'&&u!=0) 
		{
			ans+=tot[u]*(tot[u]-1)/2 *(2*depth);
			return 0;
		}
		int v,i;
		for(v = head[u]; v != 0; v = next[v])
		{
			if (tot[v]==tot[u])
			{
				cal(depth+1,v);
				return 0;
			}
			else
				break;
		}

	 
		int cun=0;
		int ttt[70];
		int vv[70];
		__int64 tmp=0;
		for(v = head[u]; v != 0; v = next[v])
		{
			ttt[++cun]=tot[v];
			tmp+=tot[v];
			vv[cun]=v;
		}
//之前没把'\0'插入字典树，导致计算时要对两个相同字符串特殊处理(完全相同长度为n的2个串比较次数为2*（n+1）)
		__int64 tmp2=0;
		for (i=1;i<=cun;i++)
		{
			tmp-=ttt[i];
			tmp2+=((__int64)ttt[i])*tmp;
		}
		tmp2*=(2*depth+1);
		ans+=tmp2; 
		for (i=1;i<=cun;i++)
		{
		
			if (tot[vv[i]]==1) 
				continue;
			cal(depth+1,vv[i]);
		}
	}
	
}; 


const int maxl = 1000 + 10;   // 每个单词最大长度

int n;
char  word[maxl];
Trie trie;

int main()
{
	int i; 
	int kase = 1; 
	while(scanf("%d", &n) == 1 && n) 
	{ 
		
		trie.clear();
		ok=0;
		for(  i = 1; i <=n; i++) 
		{
			scanf("%s", word);   
			trie.insert(word); 
		}
		ans=0; 
		
		trie.cal(0,0); 
		
		printf("Case %d: %lld\n", kase++, ans);
	}
	return 0;
}

不插入‘\0’：

// UVa11732 strcmp() Anyone?
// Yuhong Liu
#include<cstring>
#include<vector>
#include<cstdio>
#include<algorithm>
using namespace std;

const int maxnode = 4005 * 1005 + 10; 
int  ok,ok2; 
long long       ans; // 答案
// 字母表为全体小写字母的Trie	
struct Trie 
{
	int head[maxnode]; // head[i]为第i个结点的左儿子编号
	int next[maxnode]; // next[i]为第i个结点的右兄弟编号
	
	char ch[maxnode];  // ch[i]为第i个结点上的字符
	int tot[maxnode];  // tot[i]为第i个结点为根的子树包含的叶结点总数
	int sz; // 结点总数
	
	void clear() { sz = 1; tot[0] = head[0] = next[0] = 0; } // 初始时只有一个根结点
	
	// 插入字符串s（包括最后的'\0'），沿途更新tot
	void insert(const char *s) 
	{
		int u = 0, v, n = strlen(s);
		tot[0]++;
		for(int i = 0; i < n; i++) 
		{
			// 找字符a[i]
			bool found = false;
			for(v = head[u]; v != 0; v = next[v])
			{
				if(ch[v] == s[i]) 
				{ // 找到了
					found = true;
					break;
				}
			}
			if(!found)
			{
				v = sz++; // 新建结点
				tot[v] = 0;
				ch[v] = s[i];
				next[v] = head[u];
				head[u] = v; // 插入到链表的首部
				head[v] = 0;
			}
			
			tot[v]++;
			u = v;
		} 
	} 
	
	
	// 统计
	long long    cal(int depth,int u) 
	{
		int v,i;

		for(v = head[u]; v != 0; v = next[v])
		{
			if (tot[v]==tot[u])
			{
				cal(depth+1,v);
				return 0;
			}
			else
				break;
		}


		int cun=0;
		int ttt[70];
		int vv[70];
		long long  tmp=0;
		for(v = head[u]; v != 0; v = next[v])
		{
			ttt[++cun]=tot[v];
			tmp+=tot[v];
			vv[cun]=v;
		}
			int temp=0;
		if (tmp<tot[u])
		{
			  temp=(tot[u]-tmp);  
			  //必须要把相同的合并在一起，否则导致run error，因为一种字符只能占一个位置
				  ttt[++cun]=temp;
				vv[cun]=-1; 
		}
		tmp=tot[u];
		long long  tmp2=0;
		for (i=1;i<=cun-1;i++)
		{
			tmp-=ttt[i];
			tmp2+=((long long )ttt[i])*tmp;
		}
		tmp2*=(2*depth+1);
		ans+=tmp2;
		if (temp)
		{ 
			ans+=((temp-1)*temp)/2*(2*depth+2);
		}
		for (i=1;i<=cun;i++)
		{

			if (vv[i]==-1)continue; 
			if (tot[vv[i]]==1) 
				continue;
			cal(depth+1,vv[i]);
		}
	}
	
}; 


const int maxl = 1000 + 10;   // 每个单词最大长度

int n;
char  word[maxl];
Trie trie;

int main()
{
	int i; 
	int kase = 1; 
	while(scanf("%d", &n) == 1 && n) 
	{ 
		
		trie.clear();
		ok=0;
		for(  i = 1; i <=n; i++) 
		{
			scanf("%s", word);   
			trie.insert(word); 
		}
		ans=0; 
		
		trie.cal(0,0); 
		
		printf("Case %d: %lld\n", kase++, ans);
	}
	return 0;
}

yuhong_liu

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
UVA 11732-"strcmp()" Anyone?-trie（左儿子右兄弟表示法(省空间)）

本题数据略大，显然不能用普通的儿子节点法建立trie，因此采用左儿子右兄弟表示法建树，其实就是二叉树表示法，本质就是把原来的26叉树转成2叉树，时间复杂度常数增大，空间复杂度尽可能减小我们知道26叉树中，如果数据不那么变态，应该是大部分节点都是浪费掉的，而使用二叉树建树，能充分利用空间，缺点就是原来访问某个节点的子节点如果查询子节点中有没‘a’字符，我们只需要访问ch[u][0]即可，O（
复制链接

扫一扫