前缀树笔记

最新推荐文章于 2023-09-10 10:27:23 发布

L__ear

最新推荐文章于 2023-09-10 10:27:23 发布

阅读量228

点赞数

分类专栏：算法文章标签：前缀树字典树数据结构

本文链接：https://blog.csdn.net/l__ear/article/details/108884267

版权

算法专栏收录该内容

52 篇文章 4 订阅

订阅专栏

1. 前言

发现笔试题经常爱考前缀树。

每次慢慢回忆，再慢悠悠的写代码，时间根本来不及。

需要总结一下记忆方法，目标是以后碰到前缀树的题能上手就来。

记忆点一：图
前缀树
记忆点二：

边表示字符，结点对应 isEnd 和 count 信息。
从结点到根只有一条路径，表示一个字符串前缀。

记忆点三：

class Trie {
	final int max = 1000;
	int[][] tree = new tree[max][26]; // 一行表示一个结点；孩子表示法
	int[] count = new int[max];
	boolean[] isEnd = new boolean[max];
	int last;

	public void insert(String word) {
		int p = 0;
		for(int i = 0; i < word.length(); ++i) {
			int c = word.charAt(i)-'a';
			if(tree[p][c] == 0) tree[p][c] = ++last; // 分配结点
			p = tree[p][c];
			count[p]++; // 更新结点对应的计数信息
		}
		isEnd[p] = true; // 更新 isEnd 信息
	}
}

原来总结知识点的时候，总想总结的很全，没有纰漏。
结果就是总结的很复杂，抓不住重点，华而不实。
大脑抗拒复杂的东西，总结的再全再复杂，只能在纸上，不能进脑子里，都是白搭。

所以别看前缀树那么多操作，记住插入即可！

2. 阿里笔试题

题目描述：
输入法里预存了一些词汇。在我们输入一个单词时，我们有两种操作可选：

花费 1 个单位的时间输入一个字符。
在我们输入时，输入法会以你的输入作为前缀去匹配它的预存词汇，并给出匹配的 m 个单词，你可以花费 m 个单位的时间选择任一单词代替现有输入。

问输入一个单词所花费的最短时间是多少。

输入描述：
第一行两个整数 n，p，后面每一行表示一个单词，前 n 行为输入法的预存词汇，后 p 行表示 p 个单词输入。

输出描述：
输出 p 行，表示 p 个单词输入对应的最短时间。

例如：
输入，
3 1
nt
nowcoder
nc
nowcoderhhh

输出，
6

输出解释：输入 no 花费 2 个单位时间，只匹配一个词 nowcoder，花费 1 个单位时间用 nowcoder 替换 no，最后花 3 个单位时间输入后面的 hhh。共花费 6 个单位时间。

代码求解：

import java.util.*;

public class Main{
	public static void main(String[] args) {
		Scanner sc = new Scanner(System.in);
		int n = sc.nextInt(), p = sc.nextInt();
		
		Trie trie = new Trie();
		for(int i = 0; i < n; ++i) {
			trie.insert(sc.next());
		}
		
		for(int i = 0; i < p; ++i) {
			char[] word = sc.next().toCharArray();
			// 首先判断前缀树里是否有 word 的前缀，有的话最长前缀有多长
			int len = trie.longestPrefix(word);
			if(len < 0) {
				System.out.println(word.length);
				continue;
			}
			int min = len+1;
			// 得到最长前缀的最短时间
			for(int j = 0; j <= len; ++j) {
				int temp = j+1+trie.countPrefix(word, j);
				if(temp < min) min = temp;
			}
			System.out.println(min+(word.length-1-len));
		}
	}
}


class Trie{
	final int max = 1000;
	int[][] tree = new int[max][26];
	int[] count = new int[max];
	boolean[] isEnd = new boolean[max];
	int last;
	
	public void insert(String word) {
		int p = 0;
		for(int i = 0; i < word.length(); ++i) {
			int c = word.charAt(i) - 'a';
			if(tree[p][c] == 0) tree[p][c] = ++last;
			p = tree[p][c];
			count[p]++;
		}
		isEnd[p] = true;
	}
	
	public int countPrefix(char[] prefix, int j) {
		int p = 0;
		for(int i = 0; i <= j; ++i) {
			int c = prefix[i] - 'a';
			if(tree[p][c] == 0) return 0;
			p = tree[p][c];
		}
		return count[p];
	}
	
	public int longestPrefix(char[] word) {
		int p = 0;
		for(int i = 0; i < word.length; ++i) {
			int c = word[i] - 'a';
			if(tree[p][c] == 0 && isEnd[p]) return i-1;
			p = tree[p][c];
		}
		return -1;
	}
}

3. 附录：前缀树的更常规实现

class Trie {
    class TrieNode{
        TrieNode[] links = new TrieNode[26];
        boolean isEnd;
    }

    TrieNode root;

    /** Initialize your data structure here. */
    public Trie() {
        root = new TrieNode();
    }
    
    /** Inserts a word into the trie. */
    public void insert(String word) {
        TrieNode p = root;
        for(int i = 0; i < word.length(); ++i){
            char c = word.charAt(i);
            if(p.links[c-'a'] == null) p.links[c-'a'] = new TrieNode();
            p = p.links[c-'a'];
        }
        p.isEnd = true;
    }

    private TrieNode searchPrefix(String prefix){
        TrieNode p = root;
        for(int i = 0; i < prefix.length(); ++i){
            char c = prefix.charAt(i);
            if(p.links[c-'a'] == null) return null;
            p = p.links[c-'a'];
        }
        return p;
    }
    
    /** Returns if the word is in the trie. */
    public boolean search(String word) {
        TrieNode p = searchPrefix(word);
        return p == null ? false : p.isEnd;
    }
    
    /** Returns if there is any word in the trie that starts with the given prefix. */
    public boolean startsWith(String prefix) {
        TrieNode p = searchPrefix(prefix);
        return p != null;
    }
}