数据结构之Trie(前缀树或字典树)

最新推荐文章于 2024-07-18 17:57:16 发布

一点星Cloud

最新推荐文章于 2024-07-18 17:57:16 发布

阅读量77

点赞数

分类专栏：数据结构文章标签：数据结构 java 算法

本文链接：https://blog.csdn.net/qq_44129924/article/details/128772287

版权

数据结构专栏收录该内容

13 篇文章 0 订阅

订阅专栏

Trie

Trie(前缀树或字典树)

特点：与二叉查找树不同，值不是直接保存在结点中，而是由结点在树中的位置决定，时间复杂度为O(W)，W为字符长度。
一个节点的所有子孙都有相同的前缀，也就是这个结点对应的字符串，而根结点对应空字符串。

应用：常用于统计，利用字符串的公共前缀来减少查询时间，最大限度地减少无谓的字符串比较。

自定义trie的结点结构

package com.company.trie;

import java.util.Map;
import java.util.TreeMap;

/**
 * @Author: wenhua
 * @CreateTime: 2023-01-12  22:46
 */
public class Node {

    // isWord属性，表示当前结点是否为单词
    boolean isWord;
    // 通过map对象的结点存储字符
    Map<Character, Node> next;

    public Node() {
        next = new TreeMap<>();
        isWord = false;
    }

    @Override
    public String toString() {
        return "Node{" +
                "isWord=" + isWord +
                ", next=" + next +
                '}';
    }
}

自定义trie结构

package com.company.trie;

import java.util.Map;

/**
 * 自定义Trie(前缀树或字典树)
 *
 * @param <T> 泛型
 * @Author: wenhua
 * @CreateTime: 2023-01-12  22:55
 */
public class Trie<T> {

    private Node root;
    private int size;

    /**
     * trie构造函数
     */
    public Trie() {
        root = new Node();
        size = 0;
    }

    /**
     * 获取trie树中单词个数
     *
     * @return
     */
    public int getSize() {
        return size;
    }

    /**
     * 判断该字符串(单词)是否存在于该trie树中
     *
     * @param ele 字符串
     * @return 返回值
     */
    public boolean isContain(String ele) {
        // 判断该字符串是否为空
        if (ele.isEmpty() || ele == null) {
            System.out.println("The " + ele + " is empty.");
            return true;
        }
        // 声明当前临时结点对象
        Node cur = root;
        // for循环遍历字符串，判断是否存在
        for (int i = 0; i < ele.length(); i++) {
            // 通过字符串索引获取当前字符
            char c = ele.charAt(i);
            // 获取下一个结点的map对象
            Map<Character, Node> children = cur.next;
            // 判断当前字符是否存在在map对象中;
            if (!cur.next.keySet().contains(c)) {
                // 如果不存在，就返回false
                return false;
            }
            // 存在，则将临时结点对象替换为当前字符的结点对象;
            cur = children.get(c);
        }
        // 返回当前字符串是否为单词
        return cur.isWord;
    }

    /**
     * 添加字符串(单词)
     *
     * @param ele
     */
    public void add(String ele) {
        // 判断该字符串是否为空
        if (ele == null || ele.isEmpty()) {
            System.out.println("The " + ele + " is empty.");
            return;
        }
        // 声明当前临时结点对象
        Node cur = root;
        // for循环遍历字符串，判断是否存在
        for (int i = 0; i < ele.length(); i++) {
            // 通过字符串索引获取当前字符
            char c = ele.charAt(i);
            // 获取下一个结点的map对象
            Map<Character, Node> children = cur.next;
            // 判断当前字符是否存在在map对象中;
            if (!cur.next.keySet().contains(c)) {
                // 如果不存在，则将当前字符存储，并声明下一个结点对象
                children.put(c, new Node());
            }
            // 存在，则将临时结点对象替换为当前字符的结点对象;
            cur = children.get(c);
        }
        // 判断当前结点是否为单词，不是则单词数量加1，将isWord属性替换为true
        if (!cur.isWord) {
            size++;
            cur.isWord = true;
        }
    }

    /**
     * 判断当前字符串是否为单词前缀
     *
     * @param ele
     * @return
     */
    public boolean isPrefix(String ele) {
        // 判断该字符串是否为空
        if (ele == null || ele.isEmpty()) {
            System.out.println("The " + ele + " is empty.");
            return true;
        }
        // 声明当前临时结点对象
        Node cur = root;
        // for循环遍历字符串，判断是否存在
        for (int i = 0; i < ele.length(); i++) {
            // 通过字符串索引获取当前字符
            char c = ele.charAt(i);
            // 获取下一个结点的map对象
            Map<Character, Node> children = cur.next;
            // 判断当前字符是否存在在map对象中;
            if (!children.containsKey(c)) {// children.keySet().contains(c)
                // 如果不存在，就返回false
                return false;
            }
            // 存在，则将临时结点对象替换为当前字符的结点对象;
            cur = children.get(c);
        }
        // 最终遍历完，返回true
        return true;
    }

    /**
     * 简单的模糊匹配, eg: p.n
     *
     * @param word 匹配字符串
     * @return 返回值
     */
    public boolean likeWord(String word) {
        // 判断字符串(模糊单词)是否为空
        if (word == null || word.length() == 0) {
            System.out.println("The " + word + " is empty.");
            return false;
        }
        // 递归查询
        return likeWord(root, word, 0);
    }

    /**
     * 递归模糊匹配
     *
     * @param node  当前结点
     * @param word  匹配字符串
     * @param index 匹配字符串的当前索引
     * @return 返回值
     */
    private boolean likeWord(Node node, String word, int index) {
        // 递归到底的情况
        if (index == word.length()) {
            return node.isWord;
        }
        // 通过索引获取当前字符串中的字符
        char c = word.charAt(index);
        // 获取下一个结点的map对象
        Map<Character, Node> next = node.next;
        // 判断如果字符为.则跳过进行下一个字符，如果不是.则判断map对象中是否存在当前字符
        if (c != '.') {
            if (!next.containsKey(c)) {
                // 如果不存在，则返回false
                return false;
            }
            // 如果存在，则继续递归循环，直至返回false或着索引等于字符串长度
            return likeWord(next.get(c), word, index + 1);
        } else {
            for (char key : next.keySet()) {
                // 如果存在，则继续递归循环，直至返回false或着索引等于字符串长度
                if (likeWord(next.get(key), word, index + 1)) {
                    // 其返回true，则返回true
                    return true;
                }
            }
            return false;
        }
    }

    /**
     * 从trie中移除单词
     * 1、先判断该单词是否存在，如果存在则遍历，判断其链表上是否存在分支等等情况
     * 	1.1如果该链表不存在分支，则从根结点的map中直接删除
     * 	1.2如果该链表中存在分支，则判断分支后的链表中是否还存在单词
     * 		如果存在则从单词后进行删除，
     * 		如果不存在，则直接从分支后一个单词删除
     * 	1.3如果该单词提前结束(属于其他单词的前缀),则只需要修改isWord为false即可。
     *
     * @param word
     */
    public void remove(String word) {
        // 判断该单词是否为空，
        if (word == null || word.isEmpty()) {
            System.out.println(word + "单词为空");
            return;
        }
        Node cur = root;
        Node multiNode = null;
        int multiIndex = -1;
        // 循环遍历单词的每个字符，判断其是否存在并作好记录
        for (int i = 0; i < word.length(); i++) {
            char c = word.charAt(i);
            Map<Character, Node> next = cur.next;
            if (!next.containsKey(c)) {
                return;
            }
            Node node = next.get(c);
            /**
             * 判断该结点是否有分支：
             *  如果有超过一个分支，
             *  如果该结点存在下一个结点且当前结点isWord为true，
             * 则记录分支结点以及分支时单词索引位置
             */
            if (node.next.size() > 1 || (node.next.size() == 1 && node.isWord)) {
                multiNode = node;
                multiIndex = i;
            }
            // 继续遍历单词的下一个字符
            cur = next.get(c);
        }
        if (cur.isWord) {
            // 如果当前结点的下一个结点单词大于0，即存在多个单词时，只需要将isWord改为false
            if (cur.next.size() > 0) {
                cur.isWord = false;
            } else if (multiNode == null) {// 如果分支结点为空，则表示从根结点(root)到叶结点只有一个单词(即查询的单词)
                root.next.remove(word.charAt(0));
            } else {// 如果分支结点不为空，则从分支的下一个结点中移除分支结点索引的下一个字符
                multiNode.next.remove(word.charAt(multiIndex + 1));
            }
            // 单词数量减一
            size--;
        }
    }

    @Override
    public String toString() {
        return "Trie{" +
                "root" + root
                + '}';
    }
}

测试

package com.company.trie;

import java.util.stream.Stream;

/**
 * @Author: wenhua
 * @CreateTime: 2023-01-12  23:06
 */
public class Main {

    public static void main(String[] args) {
        String[] str = {"wenhua", "Jack", "Jim", "ban", "band", "bank", "banner"};
        Trie<String> trie = new Trie<>();
        for (int i = 0; i < str.length; i++) {
            trie.add(str[i]);
        }
        System.out.println("当前trie树中单词个数为:" + trie.getSize());
        // 通过trie判断是否存在该单词
        // System.out.println(trie.isContain("wenhua"));
        // 通过数组对象流批量判断是否存在该单词或单词前缀
        String[] st = {"wenhua", "ck", "Ja"};
        Stream.of(st).forEach(word -> System.out.println("字符串：" + word + ",是否为单词" + trie.isContain(word)));
        Stream.of(st).forEach(word -> System.out.println("字符串：" + word + ",是否为单词前缀" + trie.isPrefix(word)));
        System.out.println("当前trie树中是否有与wen.ua相似的单词:" + trie.likeWord("wen.ua"));
        System.out.println("当前trie树中单词个数为:" + trie.getSize());
        trie.remove("wenhua");
        System.out.println("当前trie树中单词个数为:" + trie.getSize());
    }
}

添加单词后，构成的trie树结构如下：
在这里插入图片描述

测试结果

当前trie树中单词个数为:7
字符串：wenhua,是否为单词true
字符串：ck,是否为单词false
字符串：Ja,是否为单词false
字符串：wenhua,是否为单词前缀true
字符串：ck,是否为单词前缀false
字符串：Ja,是否为单词前缀true
当前trie树中是否有与wen.ua相似的单词:true
当前trie树中单词个数为:7
当前trie树中单词个数为:6

一点星Cloud

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
数据结构之Trie(前缀树或字典树)

Trie(前缀树或字典树)特点：与二叉查找树不同，值不是直接保存在结点中，而是由结点在树中的位置决定，时间复杂度为O(W)，W为字符长度。一个节点的所有子孙都有相同的前缀，也就是这个结点对应的字符串，而根结点对应空字符串。应用：常用于统计，利用字符串的公共前缀来减少查询时间，最大限度地减少无谓的字符串比较。
复制链接

扫一扫