字典树(Trie) : 又称前缀树,是一种树形结构,是一种哈希树的变种。典型应用是用于统计,排序和保存大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计。它的优点是:利用字符串的公共前缀来减少查询时间,最大限度地减少无谓的字符串比较,查询效率比哈希树高。
如果有n个条目,Trie查询每个条目的时间复杂度和字典中一共有多少条目无关,时间复杂度为O(w),w为查询单词的长度!而大多数单词的长度小于10。
使用java中的TreeMap实现Trie:
有增加与查询,删除功能
import java.util.TreeMap;
public class Trie {
private class Node{
public boolean isWord;
public TreeMap<Character,Node> next;
public Node(boolean isWord){
this.isWord = isWord;
next = new TreeMap<>();
}
public Node(){
this(false);
}
}
private Node root;
private int size;
public Trie(){
root = new Node();
size = 0;
}
//获得Trie中存储的单词数量
public int getSize(){
return size;
}
//向Trie中添加一个新的单词word
public void add(String word){
Node cur = root;
for (int i = 0; i < word.length(); i ++){
char c = word.charAt(i);
if (cur.next.get(c) == null){
cur.next.put(c,new Node());
}
cur = cur.next.get(c);
}
//判断是否是已存在的单词
if (!cur.isWord){
cur.isWord = true;
size ++;
}
}
//查询单词word是否在Trie中
public boolean contains(String word){
Node cur = root;
for (int i = 0; i < word.length(); i ++){
char c = word.charAt(i);
if (cur.next.get(c) == null)
return false;
cur = cur.next.get(c);
}
return cur.isWord;
}
//查询是否在Trie中有单词以prefix为前缀
public boolean isPrefix(String prefix){
Node cur = root;
for (int i = 0; i < prefix.length(); i ++){
char c = prefix.charAt(i);
if (cur.next.get(c) == null)
return false;
cur = cur.next.get(c);
}
return true;
}
}
import java.util.TreeMap;
/**
* @author ymn
* @version 1.0
* @date 2020\5\22 0022 15:39
*/
public class Trie {
private class Node{
public boolean isWord;
public TreeMap<Character,Node> next;
public Node(boolean isWord){
this.isWord = isWord;
next = new TreeMap<>();
}
public Node(){
this(false);
}
}
private Node root;
private int size;
public Trie(){
root = new Node();
size = 0;
}
//获得Trie中存储的单词数量
public int getSize(){
return size;
}
//向Trie中添加一个新的单词word
public void add(String word){
Node cur = root;
for (int i = 0; i < word.length(); i ++){
char c = word.charAt(i);
if (cur.next.get(c) == null){
cur.next.put(c,new Node());
}
cur = cur.next.get(c);
}
//判断是否是已存在的单词
if (!cur.isWord){
cur.isWord = true;
size ++;
}
}
//查询单词word是否在Trie中
public boolean contains(String word){
Node cur = root;
for (int i = 0; i < word.length(); i ++){
char c = word.charAt(i);
if (cur.next.get(c) == null)
return false;
cur = cur.next.get(c);
}
return cur.isWord;
}
//查询是否在Trie中有单词以prefix为前缀
public boolean isPrefix(String prefix){
Node cur = root;
for (int i = 0; i < prefix.length(); i ++){
char c = prefix.charAt(i);
if (cur.next.get(c) == null)
return false;
cur = cur.next.get(c);
}
return true;
}
// 删除word, 返回是否删除成功, 递归算法
public boolean remove(String word){
if(word.equals(""))
return false;
return remove(root, word, 0);
}
// 在以Node为根的Trie中删除单词word[index...end),返回是否删除成功, 递归算法
private boolean remove(Node node, String word, int index){
if(index == word.length()){
if(!node.isWord)
return false;
node.isWord = false;
size --;
return true;
}
char c = word.charAt(index);
if(!node.next.containsKey(c))
return false;
boolean ret = remove(node.next.get(c), word, index + 1);
Node nextNode = node.next.get(c);
if(!nextNode.isWord && nextNode.next.size() == 0)
node.next.remove(word.charAt(index));
return ret;
}
}
除了Trie外,还有压缩字典树(Compressed Trie),三分搜索树(Ternary Search Tire)等。