字典树基本概念与代码演示

非英杰不图

已于 2023-04-08 22:38:16 修改

阅读量186

点赞数

文章标签：算法

于 2023-04-08 20:48:00 首次发布

本文链接：https://blog.csdn.net/qq_34603425/article/details/129915242

版权

文章介绍了Trie树（字典树）的基本概念，作为用于快速查找和字符串排序的数据结构。它通过边表示字符，节点表示字符串集合，能以O(n)时间复杂度进行排序。文章提供了C++代码示例，展示了如何使用指针实现Trie树的插入和搜索功能，并探讨了用数组替代指针实现的版本，两者在逻辑上保持一致。

摘要由CSDN通过智能技术生成

基本概念

名称：Trie
其他名称：字典树，单词查找树
作用：
单词查找
字符串排序
解释：字典树是存储字符串集合的一种数据结构，可以看成一种N叉树。每一条边表示一个字符。不同颜色的点表示当前截止到这个字符的字符串是否存在与集合中。例如对于如下字典树：

黄色的点表示当前截止的字符串存在，白色表示不存在，那么上面的字典树就表示如下字符串的集合：

a,
aae,
ac,
b,
gz,
gzf,
gzh

可以很明显看到从字典树的根节点入手可以进行字符串的查找。

此外构造字典树时还规定同一个节点出发的从左到右的边必须是有顺序的。

因此如果深度遍历一个字典树，那么遍历的结果就是升序的。

因此字典树也能做字符串的排序，而且排序的时间复杂度是O(n)的。

字典树的升华：
树的节点代表集合。树的边代表关系。

在字典树中，特定的节点和边也代表了特定的集合和关系：

在这里插入图片描述

代码演示

插入与搜索操作

#include <iostream>
#include <algorithm>
#include <string>
#include <map>
#include <set>
#include <vector>
using namespace std;
#define BASE 26

class node {
	public:
		node () {
			flag = false;
			for (int i = 0; i < BASE; i++) next[i] = nullptr;
		}
		~node () {}
		bool flag;
		node *next[BASE];

};

class Trie {
	public:
		Trie () {
			root = new node();
		}
		static void clearTrie(node *root) {
			if (root == nullptr) return;
			for (int i = 0; i < BASE; i++) clearTrie(root->next[i]);
			delete root;
			return;
		}
		bool insert(string word) {
			//返回值表示是否第一次插入
			node *p = root;
			for (auto x : word) {
				int ind = x - 'a';
				if (p->next[ind] == nullptr) {
					p->next[ind] = new node();
				}
				p = p->next[ind];
			}
			if (p->flag) return false;
			p->flag = true;
			return true;
		}

		bool search(string word) {
			node *p = root;
			for (auto x : word) {
				int ind = x - 'a';
				p = p->next[ind];
				if (p == nullptr) return false;
			}
			return p->flag;
		}

		~Trie () {
			clearTrie(root);
		}

	private:
		node * root;

};

int main() {
	//   字典树插入和查找代码
	Trie t;
	int op;
	string s;
	while (cin >> op >> s) {
		switch (op) {
			case 1: t.insert(s); break;
			case 2: cout << "search word = " << s << " result : "  << t.search(s) << endl; break;
		}
	}

代码运行结果：

1 aaa
1 hello
1 abc
1 world
1 bca
1 acc
2 hello
search word = hello result : 1
2 abc
search word = abc result : 1
2 a
search word = a result : 0
2 hell
search word = hell result : 0
2 ac
search word = ac result : 0
2 acc
search word = acc result : 1

排序操作

#include <iostream>
#include <algorithm>
#include <string>
#include <map>
#include <set>
#include <vector>
using namespace std;
#define BASE 26

class node {
	public:
		node () {
			flag = false;
			for (int i = 0; i < BASE; i++) next[i] = nullptr;
		}
		~node () {}
		bool flag;
		node *next[BASE];

};

class Trie {
	public:
		Trie () {
			root = new node();
		}
		static void clearTrie(node *root) {
			if (root == nullptr) return;
			for (int i = 0; i < BASE; i++) clearTrie(root->next[i]);
			delete root;
			return;
		}
		bool insert(string word) {
			//返回值表示是否第一次插入
			node *p = root;
			for (auto x : word) {
				int ind = x - 'a';
				if (p->next[ind] == nullptr) {
					p->next[ind] = new node();
				}
				p = p->next[ind];
			}
			if (p->flag) return false;
			p->flag = true;
			return true;
		}

		bool search(string word) {
			node *p = root;
			for (auto x : word) {
				int ind = x - 'a';
				p = p->next[ind];
				if (p == nullptr) return false;
			}
			return p->flag;
		}

		void dfs(node * Node, string prePath) {
			if (Node == nullptr) return;
			if (Node->flag) cout << prePath << " ";
			for (int ind = 0; ind < BASE; ind++) {
				dfs(Node->next[ind], prePath + char('a' + ind));
			}
		}

		void output() {
			dfs(root, "");
			cout << endl;
		}

		~Trie () {
			clearTrie(root);
		}

	private:
		node * root;

};

int main() {
	//字典树用作字符串排序
	
	Trie t;
	int n;
	cin >> n;
	string s;
	for (int i = 0; i < n; i++) {
		cin >> s;
		t.insert(s);
	}
	t.output();
	return 0;
}

程序运行结果：

input: 
4
aaa
abc
bca
acc

output:
aaa abc acc bca

用数组代替指针实现字典树

#include <iostream>
#include <algorithm>
#include <string>
#include <map>
#include <set>
#include <vector>
using namespace std;
#define BASE 26

class node {
	public:
		int flag;
		int next[BASE]; //每一个节点在数组中的下标
		void clear() {
			flag = 0;
			for (int i = 0; i < BASE; i++) {
				next[i] = 0;
			}
		}
} trie[10000];

int cnt = 2, root = 1;
void clearTie() {
	int cnt = 2; //能够使用的节点的第一个编号
	int root = 1; //根节点的编号
	trie[root].clear();
	return;
}

int getNewNode() {
	trie[cnt].clear();
	return cnt++;
}

void insert(string s) {
	int p = root;
	for (auto x : s) {
		int ind = x - 'a';
		if (trie[p].next[ind] == 0) {
			trie[p].next[ind] = getNewNode();
		}
		p = trie[p].next[ind];
		printf("insert ind %d, p %d\n", ind, p);
	}
	trie[p].flag = 1;
	return;
}

bool search (string s) {
	int p = root;
	for (auto x : s){
		int ind = x - 'a';
		p = trie[p].next[ind];
		printf("search ind %d, p %d\n", ind, p);
		if (p == 0) {
			return false;
		}

	}
	return trie[p].flag;
}

int main() {
	cout << "trie version 2 : " << endl;
	clearTie();
	printf("cnt : %d, root : %d\n", cnt, root);
	int op;
	string s;
	while (cin >> op >> s) {
		switch (op) {
			case 1: insert(s); break;
			case 2: cout << "search word = " << s << " result : "  << search(s) << endl; break;
		}
	}
	return 0;
}