数据结构之字典树

最新推荐文章于 2023-05-05 18:00:00 发布

leo_fengj

最新推荐文章于 2023-05-05 18:00:00 发布

阅读量309

点赞数

分类专栏： Java 数据结构

本文链接：https://blog.csdn.net/lionel_fengj/article/details/74147575

版权

Java 同时被 2 个专栏收录

13 篇文章 0 订阅

订阅专栏

数据结构

3 篇文章 0 订阅

订阅专栏

Tire Tree，又名字典树，是一种树形结构。常用于统计、排序和保存大量的字符串，比如说，在自然语言处理中，常用来统计词频。优点是查询效率高。

Trie Tree 具有以下三个性质：
1. 根节点不包含字符，除根节点意外每个节点只包含一个字符；
2. 从根节点到某一个节点，路径上经过的字符连接起来，为该节点对应的字符串；
3. 每个节点的所有子节点包含的字符串不相同。

Tire Tree 的基本实现

package com.feng.nlp.changedalgorithm;

import lombok.Data;
import org.apache.commons.lang3.StringUtils;

import java.util.HashMap;
import java.util.Map;

/**
 * TrieTree 字典树实现
 * <p/>
 * Created by lionel on 17/4/15.
 */
public class TrieTree {
    private TrieNode root;

    public TrieTree() {
        root = new TrieNode();
    }

    /**
     * 往字典树插入单词
     *
     * @param word 单词
     */
    public void insert(String word) {
        insert(root, word);
    }

    private void insert(TrieNode root, String word) {
        if (StringUtils.isBlank(word)) {
            return;
        }
        word = word.toLowerCase();
        char[] characters = word.toCharArray();
        int length = characters.length;
        for (int i = 0; i < length; i++) {
            int index = characters[i] - '0';
            if (root.childs[index] != null) {
                root.childs[index].prefixNum++;
            } else {
                root.childs[index] = new TrieNode(characters[i]);
            }
            if (i == length - 1) {
                root.childs[index].isleaf = true;
                root.childs[index].num++;
            }

            root = root.childs[index];
        }
    }

    /**
     * 遍历字典树 ，找出所有单词及其词频
     *
     * @return 所有单词及其词频
     */
    public HashMap<String, Integer> getAllWords() {
        return preOrderTraversal(this.root, "");
    }

    private HashMap<String, Integer> preOrderTraversal(TrieNode root, String prefixs) {
        HashMap<String, Integer> map = new HashMap<String, Integer>();
        if (root == null) {
            return map;
        }
        if (root.isleaf) {
            map.put(prefixs, root.getNum());
        }
        for (int i = 0; i < root.childs.length; i++) {
            if (root.childs[i] != null) {
                char ch = root.childs[i].getCharacter();
                String tmpStr = prefixs + ch;
                map.putAll(preOrderTraversal(root.childs[i], tmpStr));
            }
        }
        return map;
    }

    /**
     * 查询某字符串是否在字典树种
     *
     * @param word 单词
     * @return 在，返回 true；不在，返回 false
     */
    public boolean isExist(String word) {
        return isExist(this.root, word);
    }

    private boolean isExist(TrieNode root, String word) {
        if (StringUtils.isBlank(word)) {
            return false;
        }
        char[] characters = word.toLowerCase().toCharArray();
        for (char character : characters) {
            int index = character - '0';
            if (root.childs[index] == null) {
                return false;
            }
            root = root.childs[index];
        }
        return true;
    }

    /**
     * 得到以某字串为前缀的字串集，包括字串本身！ 类似单词输入法的联想功能
     *
     * @param prefix 字串前缀
     * @return 串集以及出现次数
     */
    public Map<String, Integer> getWordsFroPrefix(String prefix) {
        return getWordsFroPrefix(this.root, prefix);
    }

    private Map<String, Integer> getWordsFroPrefix(TrieNode root, String prefix) {
        Map<String, Integer> map = new HashMap<String, Integer>();
        if (StringUtils.isBlank(prefix)) {
            return map;
        }
        char[] characters = prefix.toLowerCase().toCharArray();
        for (char character : characters) {
            int index = character - '0';
            if (root.childs[index] == null) {
                return null;
            }
            root = root.childs[index];
        }
        return preOrderTraversal(root, prefix);
    }

    @Data
    private class TrieNode {
        private char character;//节点存储字符
        private int num;//该词的出现次数
        private int prefixNum;//以该字串为前缀的字串数， 应该包括该字串本身
        private TrieNode[] childs;//节点孩子
        private boolean isleaf;

        public TrieNode() {
            character = '~';
            num = 0;
            prefixNum = 0;
            isleaf = false;
            childs = new TrieNode[40911];
        }

        public TrieNode(char character) {
            this.character = character;
            num = 0;
            prefixNum = 1;
            isleaf = false;
            childs = new TrieNode[40911];
        }
    }
}