trie树的几种实现 Hdu 1251 统计难题

最新推荐文章于 2020-03-04 11:18:02 发布

haikuc

最新推荐文章于 2020-03-04 11:18:02 发布

阅读量104

点赞数

分类专栏：字典树

本文链接：https://blog.csdn.net/qq_33553218/article/details/102489566

版权

字典树专栏收录该内容

1 篇文章 0 订阅

订阅专栏

Hdu 1251 统计难题
这是一个比较基础的字典树的题，通过它展示普通trie树和压缩字典树的实现。

普通trie树

#include<iostream>
#include<cstring>

using namespace std;
struct node {
	int chd[26], cnt; //cnt表示以当前节点为前缀的字符串数
	//bool isfinish;
	node() {
		memset(chd, 0, sizeof(chd));
		cnt = 0;
		//isfinish = false;
	}
}r[500000];
struct trie {
	int sz; //当前的节点个数
	trie() { sz = 0; }
	void add(char *s) {
		int len = strlen(s), now = 0, i, p;
		for (i = 0; i < len; ++i) {
			p = s[i] - 'a'; //字母对应编号
			if (!r[now].chd[p]) r[now].chd[p] = ++sz; //没有对应的孩子
			now = r[now].chd[p];
			r[now].cnt++;
		}
		//r[now].isfinish = true;
	}
	int query(char *s) {
		int len = strlen(s), now = 0, i, p;
		for (i = 0; i < len; ++i) {
			p = s[i] - 'a';
			if (!r[now].chd[p]) return 0;
			now = r[now].chd[p];
		}
		return r[now].cnt;
	}
};
char s[25];

trie myt;
int main()
{
	while (1) {
		cin.getline(s, 15);
		if (strlen(s) == 0) break;
		myt.add(s);
	}
	while (scanf("%s", s) != EOF) {
		printf("%d\n", myt.query(s));
	}
	return 0;
}

可以用孩子兄弟节点表示法或者在node里面用vector表示孩子节点，可以节约空间，但时间可能会受影响。

压缩字典树

#include<iostream>
#include<cstring>
using namespace std;
struct node {
	int st, ed; //该节点对应的字符串在list中的开始和结束位置
	int cnt;
	node *ch[26];
};
char list[10000000], s[20]; //list每次新增加的字母的和
//按样例的顺序插入list的变化banana bananad bananadee bananadeeabsolute bananadeeabsoluteacm 

node *T = new node, *tnode; //T根节点
int len, sz; //len当前输入的字符串长度，sz是list长度
void clear(node *p) { //初始化node
	p->st = p->ed = sz;
	p->cnt = 0;
	for (int i = 0; i < 26; ++i) p->ch[i] = nullptr;
}
int same(node *p, int st) { //当前节点和待插入或查询的字符串的重合长度
	int i;
	for (i = 0; p->st + i < p->ed && st + i < len && list[p->st + i] == s[st + i]; ++i);
	return i;
}
void diliver(node *&p, int ll) { //待插入或查询的字符串和某个节点的公共前缀小于该
					//节点对应的字符串长度，比如abab和abc或者abab和aba。abab是树中的某个节点
	node *t = new node;
	clear(t);
	t->cnt = p->cnt;
	t->st = p->st;
	t->ed = t->st + ll;
	t->ch[list[t->ed] - 'a'] = p;
	p->st = t->ed;
	p = t;
}
void insert(node * &p, int st) {
	int ll = same(p, st);
	if (ll < p->ed - p->st) //如果满足diliver中描述的状态，就把一个长节点分割为两个节点。在后面那个节点中继续插入新字符串的后半部分。
		diliver(p, ll);
	p->cnt++;
	st += ll;
	if (st == len) return; //如果待插入的字符串是某个节点的前缀（包括相等），就结束插入
	else {
		if (p->ch[s[st] - 'a'])  //已经存在和待插入字符串的首字母对应的节点
			insert(p->ch[s[st] - 'a'], st);
		else { 
			tnode = new node;
			clear(tnode);
			tnode->cnt = 1;
			for (int i = st; i < len; ++i)
				list[tnode->ed++] = s[i];
			p->ch[s[st] - 'a'] = tnode;
			sz = tnode->ed;

		}
	}
}
int query(node* p, int st) {
	int ll;
	p = p->ch[s[st] - 'a'];
	while (1) {
		if (!p) return 0; //如果不存在和待查询的字符串首字母对应的节点，表示无法匹配
		ll = same(p, st);
		st += ll;
		if (st == len) break;
		p = p->ch[s[st] - 'a'];
	}
	return p->cnt;
}
int main()
{
	clear(T);
	while (1) {
		cin.getline(s, 15);
		if (strlen(s) == 0) break;
		len = strlen(s);
		insert(T, 0);
	}
	while (scanf("%s", s) != EOF) {
		len = strlen(s);
		printf("%d\n", query(T, 0));
	}
	return 0;
}