Patricia前缀树(Patricia Trie)及其基本操作
Trie是一种字典树,用于存储文本字符,并利用了单词之间共享前缀的特点,所以叫做前缀树。不像平衡BST,Trie的高度只与最长的文本串的长度s有 关系,而与单词的数量n无关。Trie的节点分两种:内部结点和叶子结点,内部结点用来存储单词key的成分字母,如果设字母表大小为d,那么每个内部结 点最多有d个孩子,叶子结点存储该单词作为key的数据内容(data)。注意内部结点和叶子结点并不是互斥的,一个内部结点本身可以有儿子结点,同时它 也可以是一个叶子结点。例如:
这里ab结点本身就是叶子结点,因为它以#结束符标记了,同时它有两个儿子结点a和c,c是叶子结点。
如果一颗Trie中有很多单词只有一个儿子结点,可以用Patricia Trie(Linux内核中叫做Radix Tree)压缩存储。由于#结束符标记被看作是一个叶子结点,那么一颗Patricia Trie的任何内部结点有2个或以上的孩子结点。
Patricia Trie的基本操作包括:插入、删除和查询。插入操作可能会涉及到split(拆分),删除操作可能会涉及到merge(合并)。基于Patricia Trie的基本性质,split和merge操作都是局部的,这样实现起来比较简单。详细内容请参考以下代码:
实现:
import java.util.LinkedList;
import java.util.List;
/**
* Patricia Trie
*
* Copyright (c) 2011 ljs (http://blog.csdn.net/ljsspace/)
* Licensed under GPL (http://www.opensource.org/licenses/gpl-license.php)
*
* @author ljs
* 2011-06-27
*
*/
public class PatriciaTrie {
private class PatriciaTrieNode {
private String key;
private Integer data;
private List<PatriciaTrieNode> children = new LinkedList<PatriciaTrieNode>();
//use "#" for terminal char
private boolean terminal;
public PatriciaTrieNode(){
this.key = "";
}
public PatriciaTrieNode(String key){
this.key = key;
}
public String toString(){
return this.key + (this.terminal?"#":"") + "(" + children.size() +")";
}
}
private PatriciaTrieNode root;
//return the value of the external node if found;
//otherwise, return null
public Integer find(String key){
if(key == null || key.length() == 0)
return null;
if(root==null){
return null;
}else{
return find(root,key);
}
}
private Integer find(PatriciaTrieNode currNode,String key) {
for(int i=0;i<currNode.children.size();i++){
PatriciaTrieNode child = currNode.children.get(i);
//use min(child.key.length, key.length)
int len = child.key.length()<key.length()?child.key.length():
key.length();
int j = 0;
for(;j<len;j++){
if(key.charAt(j) != child.key.charAt(j)){
break;
}
}
if(j==0){//this child doesn't match any character with the new key
//order keys by lexi-order
if(key.charAt(0)<child.key.charAt(0)){
//e.g. child="e", key="c" (currNode="abc")
// abc
// / \
// e h
return null;
}else{
//e.g. child="e", key="h" (currNode="abc")
continue;
}
}else{//current child's key partially matches with the new key; 0<j<=len
if(j==len){
if(key.length()==child.key.length()){
if(child.terminal){
//e.g. child="ab", key="ab"
// ab#
// \
// f#
return child.data;
}else{
//e.g. child="ab", key="ab"
// ab
// / \
// e f
return null;
}
}else if(key.length()>child.key.length()){
//e.g. child="ab#", key="abc"
// ab#
// / \
// a c#
String subkey = key.substring(j); //c
//recursion
return find(child,subkey);
}else{ //key.length()<child.key.length()
//e.g. child="abc", key="ab"
// abc
// / \
// e f
return null;
}
}else{//0<j<len
//e.g. child="abc", key="abd"
// abc
// / \
// e f
return null;
}
}
}
return null;
}
public void delete(String key) throws Exception{
if(key == null || key.length() == 0) return;
if(root==null){
return;
}
delete(root,key);
}
private void delete(PatriciaTrieNode currNode,String key) throws Exception{
boolean done = false;
for(int i=0;i<currNode.children.size();i++){
PatriciaTrieNode child = currNode.children.get(i);
//use min(child.key.length, key.length)
int len = child.key.length()<key.length()?child.key.length():
key.length();
int j = 0;
for(;j<len;j++){
if(key.charAt(j) != child.key.charAt(j)){
break;
}
}
if(j==0){//this child doesn't match any character with the new key
//order keys by lexi-order
if(key.charAt(0)<child.key.charAt(0)){
//e.g. child="e", key="c" (currNode="abc")
// abc
// / \
// e h
done = true;
throw new Exception("No such key is found for removal!");
}else{
//e.g. child="e", key="h" (currNode="abc")
continue;
}
}else{//current child's key partially matches with the new key; 0<j<=len
if(j==len){
if(key.length()==child.key.length()){
if(child.terminal){
//found key, delete it
if(child.children.size()==0){
//e.g. child="ab#", key="ab", currNode="a"
// a
// / \
// d ab#
currNode.children.remove(i);
//merge node for currNode
if(!currNode.terminal && currNode.children.size()==1){
PatriciaTrieNode singleChild = currNode.children.get(0); //d
currNode.key += singleChild.key;
currNode.data = singleChild.data;
currNode.terminal = singleChild.terminal;
currNode.children = singleChild.children;
}
}else{ //child.children.size()>=1
//e.g. child="ab#", key="ab", currNode="a"
// a#
// \
// ab#
// \
// f#
child.terminal = false;
//merge node for child
if(child.children.size()==1){
PatriciaTrieNode singleChild = child.children.get(0); //f#
child.key += singleChild.key;
child.data = singleChild.data;
child.terminal = singleChild.terminal; //Note: singleChild may not be external node
child.children = singleChild.children;
}
}
}else{
//e.g. child="ab", key="ab"
// ab
// / \
// e f
throw new Exception("No such key is found for removal!");
}
}else if(key.length()>child.key.length()){
//e.g. child="ab#", key="abc"
// ab#
// / \
// a c#
String subkey = key.substring(j); //c
//recursion
delete(child,subkey);
}else{ //key.length()<child.key.length()
//e.g. child="abc", key="ab"
// abc
// / \
// e f
throw new Exception("No such key is found for removal!");
}
}else{//0<j<len
//e.g. child="abc", key="abd"
// abc
// / \
// e f
throw new Exception("No such key is found for removal!");
}
done = true;
break;
}
}
if(!done) {
throw new Exception("No such key is found for removal!");
}
}
//value is only located at the external node
private void insert(PatriciaTrieNode currNode,String key,Integer value) throws Exception{
boolean done = false;
for(int i=0;i<currNode.children.size();i++){
PatriciaTrieNode child = currNode.children.get(i);
//use min(child.key.length, key.length)
int len = child.key.length()<key.length()?child.key.length():
key.length();
int j = 0;
for(;j<len;j++){
if(key.charAt(j) != child.key.charAt(j)){
break;
}
}
if(j==0){//this child doesn't match any character with the new key
//order keys by lexi-order
if(key.charAt(0)<child.key.charAt(0)){
//e.g. child="e" (currNode="abc")
// abc abc
// / \ =========> / | \
// e f insert "c" c# e f
PatriciaTrieNode node = new PatriciaTrieNode(key);
currNode.children.add(i,node);
node.terminal = true;
node.data = value;
done = true;
break;
}else{ //key.charAt(0)>child.key.charAt(0)
//don't forget to add the largest new key after iterating all children
continue;
}
}else{//current child's key partially matches with the new key; 0<j<=len
if(j==len){
if(key.length()==child.key.length()){
if(child.terminal){
throw new Exception("Duplicate Key is found when insertion!");
}else{
//e.g. child="ab"
// ab ab#
// / \ =========> / \
// e f insert "ab" e f
child.terminal = true;
child.data = value;
}
}else if(key.length()>child.key.length()){
//e.g. child="ab#"
// ab# ab#
// / \ ==========> / | \
// e f insert "abc" c# e f
String subkey = key.substring(j);
//recursion
insert(child,subkey,value);
}else{ //key.length()<child.key.length()
//e.g. child="abc#"
// abc# ab#
// / \ =========> /
// e f insert "ab" c#
// / \
// e f
String childSubkey = child.key.substring(j); //c
PatriciaTrieNode subChildNode = new PatriciaTrieNode(childSubkey);
subChildNode.terminal = child.terminal;
subChildNode.data = child.data;
subChildNode.children = child.children; //inherited from parent
child.key = key; //ab
child.terminal = true; //ab#
child.data = value;
child.children = new LinkedList<PatriciaTrieNode>();
child.children.add(subChildNode);
}
}else{//0<j<len
//e.g. child="abc#"
// abc# ab
// / \ ==========> / \
// e f insert "abd" c# d#
// / \
// e f
//split at j
String childSubkey = child.key.substring(j); //c
String subkey = key.substring(j); //d
PatriciaTrieNode subChildNode = new PatriciaTrieNode(childSubkey);
subChildNode.terminal = child.terminal;
subChildNode.data = child.data;
subChildNode.children = child.children; //inherited from parent
//update child's key
child.key = child.key.substring(0,j);
//child is not terminal now due to split, it is inherited by subChildNode
child.terminal = false;
//Note: no need to merge subChildNode
PatriciaTrieNode node = new PatriciaTrieNode(subkey);
node.terminal = true;
node.data = value;
child.children = new LinkedList<PatriciaTrieNode>();
if(subkey.charAt(0)<childSubkey.charAt(0)){
child.children.add(node);
child.children.add(subChildNode);
}else{
child.children.add(subChildNode);
child.children.add(node);
}
}
done = true;
break;
}
}
if(!done){
PatriciaTrieNode node = new PatriciaTrieNode(key);
node.terminal = true;
node.data = value;
currNode.children.add(node);
}
}
public void insert(String key,Integer value) throws Exception{
if(key == null || key.length() == 0) return;
if(root==null){
root = new PatriciaTrieNode();
}
insert(root,key,value);
}
public PatriciaTrieNode getRoot(){
return root;
}
//for test purpose only
public void printTree(){
this.print(0, this.root);
}
private void print(int level, PatriciaTrieNode node){
for (int i = 0; i < level; i++) {
System.out.format(" ");
}
System.out.format("|");
for (int i = 0; i < level; i++) {
System.out.format("-");
}
if (node.terminal)
System.out.format("%s[%s]#%n", node.key,node.data);
else
System.out.format("%s%n", node.key);
for (PatriciaTrieNode child : node.children) {
print(level + 1, child);
}
}
public void testFind(String key){
Integer val = this.find(key);
if(val != null)
System.out.format("Found key \"%s\" at: %s%n",key,val);
else
System.out.format("Found no such key: \"%s\"%n",key);
}
public static void main(String[] args) throws Exception {
//test insertion
PatriciaTrie ptrie = new PatriciaTrie();
ptrie.insert("ab",1);
ptrie.insert("abc",2);
ptrie.insert("abde",3);
ptrie.insert("abd",4);
//ptrie.insert("dc");
ptrie.insert("dce",5);
ptrie.insert("dceh",6);
ptrie.insert("dceg",7);
ptrie.insert("dca",8);
ptrie.insert("dcf",9);
ptrie.insert("ghk",10);
ptrie.insert("gh",11);
ptrie.insert("mns",12);
ptrie.insert("mnt",13);
ptrie.insert("mn",14);
ptrie.insert("mg",15);
ptrie.printTree();
String key = "dc";
ptrie.testFind(key);
key = "d";
ptrie.testFind(key);
key = "ab";
ptrie.testFind(key);
key = "ef";
ptrie.testFind(key);
key = "zz";
ptrie.testFind(key);
key = "dk";
ptrie.testFind(key);
key = "dcf";
ptrie.testFind(key);
key = "dck";
ptrie.testFind(key);
key = "abd";
ptrie.delete(key);
System.out.format("After delete key: %s%n",key);
ptrie.printTree();
System.out.println("****************************");
ptrie = new PatriciaTrie();
ptrie.insert("bear",1);
ptrie.insert("bell",2);
ptrie.insert("bid",3);
ptrie.insert("bull",4);
ptrie.insert("buy",5);
ptrie.insert("sell",6);
ptrie.insert("stock",7);
ptrie.insert("stop",8);
ptrie.printTree();
System.out.println("****************************");
ptrie = new PatriciaTrie();
ptrie.insert("allot",1);
ptrie.insert("alloy",2);
ptrie.insert("all",3);
ptrie.insert("aloe",4);
ptrie.insert("ant",5);
ptrie.insert("an",6);
ptrie.insert("are",7);
ptrie.insert("ate",8);
ptrie.insert("be",9);
ptrie.printTree();
System.out.println("****************************");
ptrie = new PatriciaTrie();
ptrie.insert("minimize",0);
ptrie.insert("mize",4);
ptrie.insert("ze",6);
ptrie.insert("nimize",2);
ptrie.insert("ize",5);
ptrie.insert("inimize",1);
ptrie.insert("imize",3);
ptrie.insert("e",7);
ptrie.printTree();
key = "ize";
ptrie.testFind(key);
key = "zee";
ptrie.testFind(key);
key = "mize";
ptrie.testFind(key);
key = "mize";
ptrie.delete(key);
System.out.format("After delete key: %s%n",key);
ptrie.printTree();
}
}
测试输出:
|
|-ab[1]#
|--c[2]#
|--d[4]#
|---e[3]#
|-dc
|--a[8]#
|--e[5]#
|---g[7]#
|---h[6]#
|--f[9]#
|-gh[11]#
|--k[10]#
|-m
|--g[15]#
|--n[14]#
|---s[12]#
|---t[13]#
Found no such key: "dc"
Found no such key: "d"
Found key "ab" at: 1
Found no such key: "ef"
Found no such key: "zz"
Found no such key: "dk"
Found key "dcf" at: 9
Found no such key: "dck"
After delete key: abd
|
|-ab[1]#
|--c[2]#
|--de[3]#
|-dc
|--a[8]#
|--e[5]#
|---g[7]#
|---h[6]#
|--f[9]#
|-gh[11]#
|--k[10]#
|-m
|--g[15]#
|--n[14]#
|---s[12]#
|---t[13]#
****************************
|
|-b
|--e
|---ar[1]#
|---ll[2]#
|--id[3]#
|--u
|---ll[4]#
|---y[5]#
|-s
|--ell[6]#
|--to
|---ck[7]#
|---p[8]#
****************************
|
|-a
|--l
|---l[3]#
|----o
|-----t[1]#
|-----y[2]#
|---oe[4]#
|--n[6]#
|---t[5]#
|--re[7]#
|--te[8]#
|-be[9]#
****************************
|
|-e[7]#
|-i
|--mize[3]#
|--nimize[1]#
|--ze[5]#
|-mi
|--nimize[0]#
|--ze[4]#
|-nimize[2]#
|-ze[6]#
Found key "ize" at: 5
Found no such key: "zee"
Found key "mize" at: 4
After delete key: mize
|
|-e[7]#
|-i
|--mize[3]#
|--nimize[1]#
|--ze[5]#
|-minimize[0]#
|-nimize[2]#
|-ze[6]#