Trie树-Java实现

最新推荐文章于 2024-04-10 15:17:12 发布

zhuguorong11

最新推荐文章于 2024-04-10 15:17:12 发布

阅读量363

点赞数

分类专栏：算法 java

本文链接：https://blog.csdn.net/zhuguorong11/article/details/78162651

版权

算法同时被 2 个专栏收录

127 篇文章 0 订阅

订阅专栏

java

41 篇文章 0 订阅

订阅专栏

这篇博客探讨了如何使用Trie树解决数据存储和查找问题，特别是在处理大量词汇数据时。通过Java代码展示了Trie树的插入、查找、遍历和实现单词联想功能，以应对如查找高频词等挑战。

摘要由CSDN通过智能技术生成

刷leetcode时候，经常碰见有使用Trie的，其意思就是字典树，

一组单词，inn, int, at, age, adv, ant, 我们可以得到下面的Trie：

当遇到一种问题，比如“ 有一个1G大小的一个文件，里面每一行是一个词，词的大小不超过16字节，内存限制大小是1M。返回频数最高的100个词。” 该如何解决？有一种方案就是使用Trie树加排序实现。

以下代码用Java实现插入、查找、遍历、单词联想（找公共前缀）/**

package jdkproxy;

import java.util.HashMap;

public class Trie_Tree {
	public  Node root;/// 树根
	public Trie_Tree() {
		/// 初始化trie 树
		root = new Node();
	}
	/**
	 * 插入字串，用循环代替迭代实现
	 * 
	 * @param words
	 */
	public void insert(String words) {
		insert(this.root, words);
	}

	/**
	 * 插入字串，用循环代替迭代实现
	 * 
	 * @param root
	 * @param words
	 */
	private void insert(Node root, String words) {
		words = words.toLowerCase(); 转化为小写
		char[] chrs = words.toCharArray();

		for (int i = 0, length = chrs.length; i < length; i++) {
			/// 用相对于a字母的值作为下标索引，也隐式地记录了该字母的值
			int index = chrs[i] - 'a';
			if (root.childs[index] != null) {
				 已经存在了，该子节点prefix_num++
				root.childs[index].prefix_num++;
			} else {
				/// 如果不存在
				root.childs[index] = new Node();
				root.childs[index].prefix_num++;
			}

			/// 如果到了字串结尾，则做标记
			if (i == length - 1) {
				root.childs[index].isLeaf = true;
				root.childs[index].dumpli_num++;
			}
			/// root指向子节点，继续处理
			root = root.childs[index];
		}

	}

	/**
	 * 遍历Trie树，查找所有的words以及出现次数
	 * 
	 * @return HashMap<String, Integer> map
	 */
	public HashMap<String, Integer> getAllWords() {
		// HashMap<String, Integer> map=new HashMap<String, Integer>();

		return preTraversal(this.root, "");
	}

	/**
	 * 前序遍历。。。
	 * 
	 * @param root
	 *            子树根节点
	 * @param prefixs
	 *            查询到该节点前所遍历过的前缀
	 * @return
	 */
	private HashMap<String, Integer> preTraversal(Node root, String prefixs) {
		HashMap<String, Integer> map = new HashMap<String, Integer>();

		if (root != null) {

			if (root.isLeaf == true) {
				 当前即为一个单词
				map.put(prefixs, root.dumpli_num);
			}

			for (int i = 0, length = root.childs.length; i < length; i++) {
				if (root.childs[i] != null) {
					char ch = (char) (i + 'a');
					 递归调用前序遍历
					String tempStr = prefixs + ch;
					map.putAll(preTraversal(root.childs[i], tempStr));
				}
			}
		}

		return map;
	}

	/**
	 * 判断某字串是否在字典树中
	 * 
	 * @param word
	 * @return true if exists ,otherwise false
	 */
	public boolean isExist(String word) {
		return search(this.root, word);
	}

	/**
	 * 查询某字串是否在字典树中
	 * 
	 * @param word
	 * @return true if exists ,otherwise false
	 */
	private boolean search(Node root, String word) {
		char[] chs = word.toLowerCase().toCharArray();
		for (int i = 0, length = chs.length; i < length; i++) {
			int index = chs[i] - 'a';
			if (root.childs[index] == null) {
				/// 如果不存在，则查找失败
				return false;
			}
			root = root.childs[index];
		}

		return true;
	}

	/**
	 * 得到以某字串为前缀的字串集，包括字串本身！ 类似单词输入法的联想功能
	 * 
	 * @param prefix
	 *            字串前缀
	 * @return 字串集以及出现次数，如果不存在则返回null
	 */
	public HashMap<String, Integer> getWordsForPrefix(String prefix) {
		return getWordsForPrefix(this.root, prefix);
	}

	/**
	 * 得到以某字串为前缀的字串集，包括字串本身！
	 * 
	 * @param root
	 * @param prefix
	 * @return 字串集以及出现次数
	 */
	private HashMap<String, Integer> getWordsForPrefix(Node root, String prefix) {
		char[] chrs = prefix.toLowerCase().toCharArray();
		
		for (int i = 0, length = chrs.length; i < length; i++) {

			int index = chrs[i] - 'a';
			if (root.childs[index] == null) {
				return null;
			}

			root = root.childs[index];

		}
		/// 结果包括该前缀本身
		/// 此处利用之前的前序搜索方法进行搜索
		return preTraversal(root, prefix);
	}
	
	/*test*/
	public static void main(String args[]) // Just used for test
	{
		Trie_Tree trie = new Trie_Tree();
		trie.insert("I");
		trie.insert("Love");
		trie.insert("China");
		trie.insert("China");
		trie.insert("China");
		trie.insert("China");
		trie.insert("China");
		trie.insert("xiaoliang");
		trie.insert("xiaoliang");
		trie.insert("man");
		trie.insert("handsome");
		trie.insert("love");
		trie.insert("chinaha");
		trie.insert("her");
		trie.insert("know");

		HashMap<String, Integer> map = trie.getAllWords();

		for (String key : map.keySet()) {
			System.out.println(key + " 出现: " + map.get(key) + "次");
		}

		map = trie.getWordsForPrefix("chin");

		System.out.println("\n\n包含chin（包括本身）前缀的单词及出现次数：");
		for (String key : map.keySet()) {
			System.out.println(key + " 出现: " + map.get(key) + "次");
		}

		if (trie.isExist("xiaoming") == false) {
			System.out.println("\n\n字典树中不存在：xiaoming ");
		}

	}
}
class Node {
	public  int dumpli_num; 该字串的重复数目， 该属性统计重复次数的时候有用,取值为0、1、2、3、4、5……
	public int prefix_num;/// 以该字串为前缀的字串数， 应该包括该字串本身！！！！！
	public Node childs[]; 此处用数组实现，当然也可以map或list实现以节省空间
	public boolean isLeaf;/// 是否为单词节点

	public Node() {
		dumpli_num = 0;
		prefix_num = 0;
		isLeaf = false;
		childs = new Node[26];
	}
}