字典树(Trie)

最新推荐文章于 2023-02-07 21:33:32 发布

小小绿豆

最新推荐文章于 2023-02-07 21:33:32 发布

阅读量211

点赞数 1

分类专栏：数据结构和算法文章标签：数据结构算法

本文链接：https://blog.csdn.net/ltx1472580369/article/details/115918694

版权

数据结构和算法专栏收录该内容

20 篇文章 0 订阅

订阅专栏

字典树(Trie)是一种空间换时间的字典搜索算法，很适合做字典类数据的搜索和统计；

此demo算法，包含字典树Trie的单词添加、删除、遍历、查找；

一个简单字典树(Trie)的结构：

demo中测试数据：

注：此demo测试数据仅限于小写字母表的单词，遂，这段话入字典表前做了大写转换小写；

节点类 Node.java

/**
 * @Author: ltx
 * @Description:
 */
public class Node {
    //字符值
    public char value;
    //单词个数
    public int count;
    //下一个字符集
    public Node[] next;

    public Node() {
    }

    public Node(char value) {
        this.value = value;
    }
}

字典树实现类 Trie.java

/**
 * @Author: ltx
 * @Description: trie字典树
 */
public class Trie {
    Node root;

    public Trie() {
        root = new Node();
    }

    /**
     * 增加单词
     *
     * @param word
     */
    private void add(String word) {
        char[] chs = word.toCharArray();
        Node temp = root;
        for (int i = 0; i < chs.length; i++) {
            if (temp.next == null) {
                temp.next = new Node[26];
            }
            Node n = temp.next[chs[i] - 'a'];
            if (n == null) {
                temp.next[chs[i] - 'a'] = new Node(chs[i]);
            }
            temp = temp.next[chs[i] - 'a'];
        }
        //单词个数+1
        temp.count++;
    }

    /**
     * 删除单词
     *
     * @param word
     */
    private Boolean remove(String word) {
        //先搜索下是否有这个单词
        if (search(word, false) == null) {
            System.out.println("删除失败, 字典里面不存在该单词!");
            return false;
        }
        char[] chs = word.toCharArray();
        Node temp = root;
        //一个栈存一下走过的路径
        Stack<Node> st = new Stack<>();
        for (int i = 0; i < chs.length; i++) {
            st.add(temp);
            temp = temp.next[chs[i] - 'a'];
        }
        //单词个数-1；
        temp.count--;
        if (temp.count > 0) {
            //单词个数减完之后还有则退出
            return true;
        } else {
            //单词个数减完之后，此节点不是单词尾部了
            int i = chs.length - 1;
            while (!st.isEmpty()) {
                Node pre = st.pop();
//                System.out.println(pre.value + "-" + pre.count + "-" + chs[i]);
                //判断节点下面是否还有其他后续节点, 有其他后续节点则不能删除该节点，没有后续节点则可以删除该节点
                boolean flag = false;
                if (pre.next[chs[i] - 'a'].next != null) {
                    for (Node n : pre.next[chs[i] - 'a'].next) {
                        if (n != null) {
                            //说明后续有字符
                            flag = true;
                            break;
                        }
                    }
                }
                if (!flag) {
                    pre.next[chs[i] - 'a'] = null;
                }
                i--;
            }
        }
        return true;
    }

    /**
     * 单词查询
     *
     * @param word  查询的单词
     * @param fuzzy 是否后缀模糊查询, true: 后缀模糊查询, false 精确查询
     * @return
     */
    private Map<String, Integer> search(String word, boolean fuzzy) {
        char[] chs = word.toCharArray();
        Node temp = root;
        for (int i = 0; i < chs.length; i++) {
            if (temp.next == null) {
                return null;
            }
            Node n = temp.next[chs[i] - 'a'];
            if (n == null) {
                return null;
            }
            temp = temp.next[chs[i] - 'a'];
        }
        if (fuzzy) {
            //模糊匹配
            //遍历查询出匹配到的所有单词
            Map<String, Integer> words = new HashMap<>();
            if (temp.next != null) {
                for (Node c : temp.next) {
                    if (c != null) {
                        traverse(c, new StringBuilder(word), words);
                    }
                }
            }
//            traverse(temp, new StringBuilder(word), words);
            //后缀模糊查询
            return words;
        } else {
            //精确查询
            if (temp.count > 0) {
                Map<String, Integer> words = new HashMap<>();
                words.put(word, temp.count);
                return words;
            } else {
                return null;
            }
        }
    }

    /**
     * 递归遍历打印所有单词
     * 类似树的遍历
     *
     * @param n     遍历起始节点
     * @param sb    字符串拼接-递归遍历用
     * @param words 单词-数量的map
     * @return
     */
    private Map<String, Integer> traverse(Node n, StringBuilder sb, Map<String, Integer> words) {
        //去掉root节点的空格字符串
        if (n.value != 0) {
            sb.append(n.value);
            //打印单词
            if (n.count > 0) {
                words.put(sb.toString(), n.count);
                System.out.printf("单词: %s\t数量: %d\n", sb.toString(), n.count);
            }
        }
        //有点类似深度优先遍历
        if (n.next != null) {
            for (Node c : n.next) {
                if (c != null) {
                    traverse(c, sb, words);
                }
            }
        }
        //回溯向上走减少一个字符
        if (sb.length() > 0) {
            sb.deleteCharAt(sb.length() - 1);
        }
        return words;
    }

    /**
     * 遍历字典树
     */
    private Map<String, Integer> show() {
        return traverse(root, new StringBuilder(), new HashMap<>());
    }

    /**
     * 字符串获取单词list
     *
     * @param str 英文文本
     * @return
     */
    private static List<String> getWords(String str) {
        List<String> wordList = new ArrayList<>();
        //转字符数组
        char[] chs = str.toCharArray();
        int i = 0;
        StringBuilder sb = new StringBuilder();
        while (i < chs.length) {
            while (i < chs.length && chs[i] >= 'a' && chs[i] <= 'z') {
                sb.append(chs[i++]);
            }
            if (sb.length() > 0) {
                wordList.add(sb.toString());
                //重置sb
                sb.delete(0, sb.length());
            }
            //跳过非小写字符
            i++;
        }
        return wordList;
    }

    /**
     * 初始化字典树
     * @param str 文章
     * @return
     */
    private static Trie initTrie(String str) {
        List<String> words = getWords(str);
        Trie trie = new Trie();
        for (String word : words) {
            trie.add(word);
        }
        return trie;
    }

    public static void main(String[] args) {
        //初始化字典树
        String str = "java is the #1 programming language and development platform. it reduces costs, shortens development timeframes, drives innovation, and improves application services. with millions of developers running more than 51 billion java virtual machines worldwide, java continues to be the development platform of choice for enterprises and developers.";
        Trie trie = Trie.initTrie(str);
        System.out.println("------遍历字典树------");
        trie.show();
        System.out.println("------后缀模糊查询------");
        //后缀模糊查询
        System.out.println(trie.search("th", true));
        System.out.println("------精确查询------");
        //精确查询
        System.out.println(trie.search("th", false));
        System.out.println("------删除单词------");
        //删除单词
        trie.remove("and");
        trie.remove("than");
        System.out.println("------遍历字典树------");
        trie.show();
    }
}

测试main输出：

------遍历字典树------
单词: and	数量: 3
单词: application	数量: 1
单词: be	数量: 1
单词: billion	数量: 1
单词: choice	数量: 1
单词: continues	数量: 1
单词: costs	数量: 1
单词: developers	数量: 2
单词: development	数量: 3
单词: drives	数量: 1
单词: enterprises	数量: 1
单词: for	数量: 1
单词: improves	数量: 1
单词: innovation	数量: 1
单词: is	数量: 1
单词: it	数量: 1
单词: java	数量: 3
单词: language	数量: 1
单词: machines	数量: 1
单词: millions	数量: 1
单词: more	数量: 1
单词: of	数量: 2
单词: platform	数量: 2
单词: programming	数量: 1
单词: reduces	数量: 1
单词: running	数量: 1
单词: services	数量: 1
单词: shortens	数量: 1
单词: than	数量: 1
单词: the	数量: 2
单词: timeframes	数量: 1
单词: to	数量: 1
单词: virtual	数量: 1
单词: with	数量: 1
单词: worldwide	数量: 1
------后缀模糊查询------
单词: than	数量: 1
单词: the	数量: 2
{the=2, than=1}
------精确查询------
null
------删除单词------
------遍历字典树------
单词: and	数量: 2
单词: application	数量: 1
单词: be	数量: 1
单词: billion	数量: 1
单词: choice	数量: 1
单词: continues	数量: 1
单词: costs	数量: 1
单词: developers	数量: 2
单词: development	数量: 3
单词: drives	数量: 1
单词: enterprises	数量: 1
单词: for	数量: 1
单词: improves	数量: 1
单词: innovation	数量: 1
单词: is	数量: 1
单词: it	数量: 1
单词: java	数量: 3
单词: language	数量: 1
单词: machines	数量: 1
单词: millions	数量: 1
单词: more	数量: 1
单词: of	数量: 2
单词: platform	数量: 2
单词: programming	数量: 1
单词: reduces	数量: 1
单词: running	数量: 1
单词: services	数量: 1
单词: shortens	数量: 1
单词: the	数量: 2
单词: timeframes	数量: 1
单词: to	数量: 1
单词: virtual	数量: 1
单词: with	数量: 1
单词: worldwide	数量: 1

Process finished with exit code 0

小小绿豆

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
打赏
1
评论
字典树(Trie)

字典树(Trie)是一种空间换时间的字典搜索算法，很适合做字典类数据的搜索和统计；此demo算法，包含字典树Trie的单词添加、删除、遍历、查找；demo中测试数据：注：此demo测试数据仅限于小写字母表的单词，遂，这段话入字典表前做了大写转换小写；节点类 Node.java/** * @Author: ltx * @Description: */public class Node { //字符值 public char value; //单词..
复制链接

扫一扫