字典树的实现与应用

最新推荐文章于 2023-01-06 17:27:26 发布

砥砺-前行

最新推荐文章于 2023-01-06 17:27:26 发布

阅读量1.3k

点赞数

分类专栏： java

本文链接：https://blog.csdn.net/qq_27081015/article/details/117112510

版权

字典树 TrieTree Java实现词频统计数据结构

关键词由CSDN通过智能技术生成

java 专栏收录该内容

30 篇文章 0 订阅

订阅专栏

字典树概念

字典树(TrieTree)，又称单词查找树或键树。字典树的基本特性，根节点是不包含信息的，根节点到叶子节点之间的信息连接起来就是数据的所有信息，每个节点子节点的信息时不一样的。
字典树的结构图

2.字典树java实现
先定义每个节点

package com.jvm;


import lombok.Data;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

@Data
public class DictTree {
    @Data
    private class Node {
        /**
         * 前缀是当前字符串的数量
         */
        int prefix_num;
        /**
         * 全词匹配的数量
         */
        int full_num;
        /**
         * 当前对象
         */
        char obj;
        /**
         * 当前节点的子节点，现在用的是英文字母所以当期节点数量为26，如果我们操作其他对象我们可以用list代替数组
         */
        Node[] childs = new Node[5];
        /**
         * 判断当前内容是否完结
         */
        boolean end = false;

        @Override
        public String toString() {
            return "\nNode{" +
                    "prefix_num=" + prefix_num +
                    ", full_num=" + full_num +
                    ", obj=" + obj +
                    ", childs=" + Arrays.toString(childs) +
                    ", end=" + end +
                    '}';
        }
    }

    private Node root;

    public DictTree() {
        this.root = new Node();
    }

    public static void main(String[] args) {
        DictTree dict = new DictTree();
        dict.insert("abce");
        dict.insert("abce");
        dict.insert("bace");
        dict.insert("cbae");
        dict.insert("abcc");
        System.out.println(dict.toString());

        System.out.println(dict.search("abc"));

        System.out.println(dict.wordFrequency("abc"));

        System.out.println(dict.wordFrequencyForPrefix("abc"));

        System.out.println(dict.wordsFrequency());
    }

    /**
     * 所有词的词频
     *
     * @return {@link Map<String, Integer>}
     */
    private Map<String, Integer> wordsFrequency() {
        Map<String, Integer> map = new HashMap<>();
        StringBuffer buffer = new StringBuffer();
        buffer.append(root.obj);
        if (root.end) {
            map.put(buffer.toString(), root.full_num);
        }
        Node[] childs = root.getChilds();
        for (Node node : childs) {
            if (node != null) {
                recursion(map, node, buffer);
            }
        }
        return map;
    }

    /**
     * 递归获取该树所有节点
     *
     * @param map    地图
     * @param node   节点
     * @param buffer 缓冲
     */
    private void recursion(Map<String, Integer> map, Node node, StringBuffer buffer) {
        buffer.append(node.obj);
        if (node.end) {
            map.put(buffer.toString(), node.full_num);
        }
        System.out.println(map);
        Node[] childs = node.getChilds();
        for (Node child : childs) {
            StringBuffer buffer1 = new StringBuffer();
            buffer1.append(buffer);
            if (child != null) {
                recursion(map, child, buffer1);
            }
        }
    }

    /**
     * 当前词出现的次数
     *
     * @param s 年代
     * @return {@link Map<String, Integer>}
     */
    private Map<String, Integer> wordFrequency(String s) {
        return wordFrequency(root, s.toCharArray());
    }

    /**
     * 当前词为前缀出现的次数
     *
     * @param s 年代
     * @return {@link Map<String, Integer>}
     */
    private Map<String, Integer> wordFrequencyForPrefix(String s) {
        return wordFrequencyForPrefix(root, s.toCharArray());
    }

    private Map<String, Integer> wordFrequencyForPrefix(Node root, char[] array) {
        Map<String, Integer> map = new HashMap<>();
        StringBuffer buffer = new StringBuffer();
        if (array.length != 0) {
            for (char chr : array) {
                int index = chr - 'a';
                if (root.childs[index] == null && chr != root.obj) {
                    break;
                }
                buffer.append(chr);
                root = root.childs[index];
            }
        }
        map.put(buffer.toString(), root.prefix_num);
        return map;
    }

    private Map<String, Integer> wordFrequency(Node root, char[] array) {
        Map<String, Integer> map = new HashMap<>();
        StringBuffer buffer = new StringBuffer();
        if (array.length != 0) {
            for (char chr : array) {
                int index = chr - 'a';
                if (root.childs[index] == null && chr != root.obj) {
                    break;
                }
                buffer.append(chr);
                root = root.childs[index];
            }
        }
        map.put(buffer.toString(), root.full_num);
        return map;
    }

    /**
     * 查看树中是否存在该字符串
     *
     * @param s 年代
     * @return boolean
     */
    private boolean search(String s) {
        char[] array = s.toCharArray();
        return search(this.root, array);
    }

    private boolean search(Node root, char[] array) {
        for (char chr : array) {
            int index = chr - 'a';
            if (root.childs[index] == null && chr != root.obj) {
                return false;
            }
            root = root.childs[index];
        }
        return root.end;
    }

    /**
     * 插入一个新的字符串
     *
     * @param s 年代
     */
    private void insert(String s) {
        char[] array = s.toCharArray();
        insert(this.root, array);
    }

    private void insert(Node root, char[] array) {
        int x = 0;
        for (char chr : array) {
            int index = chr - 'a';
            if (root.childs[index] == null) {
                root.childs[index] = new Node();
                root.childs[index].obj = chr;
            }
            if (x == array.length - 1) {
                root.childs[index].end = true;
                root.childs[index].full_num++;
            }
            root.childs[index].prefix_num++;
            root = root.childs[index];
            x++;
        }
    }
}

3.字典树应用场景
词频统计，从我的个人理解来看，在查看大文本中统计每条信息的重复次数也是有用的，可以将大文本中变化为这种结构类型，单不需要全部信息都进来统计，将每条信息的关键信息，放进来做统计就可以了，

砥砺-前行

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
字典树的实现与应用

字典树概念字典树(TrieTree)，又称单词查找树或键树。字典树的基本特性，根节点是不包含信息的，根节点到叶子节点之间的信息连接起来就是数据的所有信息，每个节点子节点的信息时不一样的。2.字典树java实现先定义每个节点package com.jvm;import lombok.Data;import java.util.Arrays;import java.util.HashMap;import java.util.Map;@Datapublic class DictT.
复制链接

扫一扫

专栏目录