关于Trie树请参见另一篇博客
Jetty-util包里提供了Trie树的实现。 先来看Trie接口
/* ------------------------------------------------------------ */
/** A Trie String lookup data structure.
* @param <V> the Trie entry type
*/
public interface Trie<V>
{
/* ------------------------------------------------------------ */
/** Put and entry into the Trie
* @param s The key for the entry
* @param v The value of the entry
* @return True if the Trie had capacity to add the field.
*/
public boolean put(String s, V v);
/* ------------------------------------------------------------ */
/** Put a value as both a key and a value.
* @param v The value and key
* @return True if the Trie had capacity to add the field.
*/
public boolean put(V v);
/* ------------------------------------------------------------ */
public V remove(String s);
/* ------------------------------------------------------------ */
/** Get and exact match from a String key
* @param s The key
* @return the value for the string key
*/
public V get(String s);
/* ------------------------------------------------------------ */
/** Get and exact match from a String key
* @param s The key
* @param offset The offset within the string of the key
* @param len the length of the key
* @return the value for the string / offset / length
*/
public V get(String s,int offset,int len);
/* ------------------------------------------------------------ */
/** Get and exact match from a segment of a ByteBuufer as key
* @param b The buffer
* @return The value or null if not found
*/
public V get(ByteBuffer b);
/* ------------------------------------------------------------ */
/** Get and exact match from a segment of a ByteBuufer as key
* @param b The buffer
* @param offset The offset within the buffer of the key
* @param len the length of the key
* @return The value or null if not found
*/
public V get(ByteBuffer b,int offset,int len);
/* ------------------------------------------------------------ */
/** Get the best match from key in a String.
* @param s The string
* @return The value or null if not found
*/
public V getBest(String s);
/* ------------------------------------------------------------ */
/** Get the best match from key in a String.
* @param s The string
* @param offset The offset within the string of the key
* @param len the length of the key
* @return The value or null if not found
*/
public V getBest(String s,int offset,int len);
/* ------------------------------------------------------------ */
/** Get the best match from key in a byte array.
* The key is assumed to by ISO_8859_1 characters.
* @param b The buffer
* @param offset The offset within the array of the key
* @param len the length of the key
* @return The value or null if not found
*/
public V getBest(byte[] b,int offset,int len);
/* ------------------------------------------------------------ */
/** Get the best match from key in a byte buffer.
* The key is assumed to by ISO_8859_1 characters.
* @param b The buffer
* @param offset The offset within the buffer of the key
* @param len the length of the key
* @return The value or null if not found
*/
public V getBest(ByteBuffer b,int offset,int len);
/* ------------------------------------------------------------ */
public Set<String> keySet();
/* ------------------------------------------------------------ */
public boolean isFull();
/* ------------------------------------------------------------ */
public boolean isCaseInsensitive();
/* ------------------------------------------------------------ */
public void clear();
}
Trie树的方法定义基本可以类比为Map。 接着来看关于Trie的抽象类,AbstractTrie类。
/**
* Abstract Trie implementation.
* <p>Provides some common implementations, which may not be the most
* efficient. For byte operations, the assumption is made that the charset
* is ISO-8859-1</p>
*
* @param <V> the type of object that the Trie holds
*/
public abstract class AbstractTrie<V> implements Trie<V>
{
final boolean _caseInsensitive;
protected AbstractTrie(boolean insensitive)
{
_caseInsensitive=insensitive;
}
@Override
public boolean put(V v)
{
return put(v.toString(),v);
}
@Override
public V remove(String s)
{
V o=get(s);
put(s,null);
return o;
}
@Override
public V get(String s)
{
return get(s,0,s.length());
}
@Override
public V get(ByteBuffer b)
{
return get(b,0,b.remaining());
}
@Override
public V getBest(String s)
{
return getBest(s,0,s.length());
}
@Override
public V getBest(byte[] b, int offset, int len)
{
return getBest(new String(b,offset,len,StandardCharsets.ISO_8859_1));
}
@Override
public boolean isCaseInsensitive()
{
return _caseInsensitive;
}
}
AbstractTrie基本定义了几个重写方法的指向。
再接着看具体的实现类,最常见的ArrayTrie。
public class ArrayTrie<V> extends AbstractTrie<V>
{
/**
* The Size of a Trie row is how many characters can be looked
* up directly without going to a big index. This is set at
* 32 to cover case insensitive alphabet and a few other common
* characters.
*/
private static final int ROW_SIZE = 32;
/**
* The index lookup table, this maps a character as a byte
* (ISO-8859-1 or UTF8) to an index within a Trie row
*/
private static final int[] __lookup =
{ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
/*0*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/*1*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/*2*/31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, 27, 30, -1,
/*3*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28, 29, -1, -1, -1, -1,
/*4*/-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
/*5*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
/*6*/-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
/*7*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
};
/**
* The Trie rows in a single array which allows a lookup of row,character
* to the next row in the Trie. This is actually a 2 dimensional
* array that has been flattened to achieve locality of reference.
* The first ROW_SIZE entries are for row 0, then next ROW_SIZE
* entries are for row 1 etc. So in general instead of using
* _rows[row][index], we use _rows[row*ROW_SIZE+index] to look up
* the next row for a given character.
*
* The array is of characters rather than integers to save space.
*/
private final char[] _rowIndex;
/**
* The key (if any) for a Trie row.
* A row may be a leaf, a node or both in the Trie tree.
*/
private final String[] _key;
/**
* The value (if any) for a Trie row.
* A row may be a leaf, a node or both in the Trie tree.
*/
private final V[] _value;
/**
* A big index for each row.
* If a character outside of the lookup map is needed,
* then a big index will be created for the row, with
* 256 entries, one for each possible byte.
*/
private char[][] _bigIndex;
/**
* The number of rows allocated
*/
private char _rows;
public ArrayTrie()
{
this(128);
}
/* ------------------------------------------------------------ */
/**
* @param capacity The capacity of the trie, which at the worst case
* is the total number of characters of all keys stored in the Trie.
* The capacity needed is dependent of the shared prefixes of the keys.
* For example, a capacity of 6 nodes is required to store keys "foo"
* and "bar", but a capacity of only 4 is required to
* store "bar" and "bat".
*/
@SuppressWarnings("unchecked")
public ArrayTrie(int capacity)
{
super(true);
_value=(V[])new Object[capacity];
_rowIndex=new char[capacity*32];
_key=new String[capacity];
}
/* ------------------------------------------------------------ */
@Override
public void clear()
{
_rows=0;
Arrays.fill(_value,null);
Arrays.fill(_rowIndex,(char)0);
Arrays.fill(_key,null);
}
/* ------------------------------------------------------------ */
@Override
public boolean put(String s, V v)
{
int t=0;
int k;
int limit = s.length();
for(k=0; k < limit; k++)
{
char c=s.charAt(k);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
t=_rowIndex[idx];
if (t==0)
{
if (++_rows>=_value.length)
return false;
t=_rowIndex[idx]=_rows;
}
}
else if (c>127)
throw new IllegalArgumentException("non ascii character");
else
{
if (_bigIndex==null)
_bigIndex=new char[_value.length][];
if (t>=_bigIndex.length)
return false;
char[] big=_bigIndex[t];
if (big==null)
big=_bigIndex[t]=new char[128];
t=big[c];
if (t==0)
{
if (_rows==_value.length)
return false;
t=big[c]=++_rows;
}
}
}
if (t>=_key.length)
{
_rows=(char)_key.length;
return false;
}
_key[t]=v==null?null:s;
_value[t] = v;
return true;
}
/* ------------------------------------------------------------ */
@Override
public V get(String s, int offset, int len)
{
int t = 0;
for(int i=0; i < len; i++)
{
char c=s.charAt(offset+i);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
t=_rowIndex[idx];
if (t==0)
return null;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
t=big[c];
if (t==0)
return null;
}
}
return _value[t];
}
/* ------------------------------------------------------------ */
@Override
public V get(ByteBuffer b,int offset,int len)
{
int t = 0;
for(int i=0; i < len; i++)
{
byte c=b.get(offset+i);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
t=_rowIndex[idx];
if (t==0)
return null;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
t=big[c];
if (t==0)
return null;
}
}
return (V)_value[t];
}
/* ------------------------------------------------------------ */
@Override
public V getBest(byte[] b,int offset,int len)
{
return getBest(0,b,offset,len);
}
/* ------------------------------------------------------------ */
@Override
public V getBest(ByteBuffer b,int offset,int len)
{
if (b.hasArray())
return getBest(0,b.array(),b.arrayOffset()+b.position()+offset,len);
return getBest(0,b,offset,len);
}
/* ------------------------------------------------------------ */
@Override
public V getBest(String s, int offset, int len)
{
return getBest(0,s,offset,len);
}
/* ------------------------------------------------------------ */
private V getBest(int t, String s, int offset, int len)
{
int pos=offset;
for(int i=0; i < len; i++)
{
char c=s.charAt(pos++);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
int nt=_rowIndex[idx];
if (nt==0)
break;
t=nt;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
int nt=big[c];
if (nt==0)
break;
t=nt;
}
// Is the next Trie is a match
if (_key[t]!=null)
{
// Recurse so we can remember this possibility
V best=getBest(t,s,offset+i+1,len-i-1);
if (best!=null)
return best;
return (V)_value[t];
}
}
return (V)_value[t];
}
/* ------------------------------------------------------------ */
private V getBest(int t,byte[] b,int offset,int len)
{
for(int i=0; i < len; i++)
{
byte c=b[offset+i];
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
int nt=_rowIndex[idx];
if (nt==0)
break;
t=nt;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
int nt=big[c];
if (nt==0)
break;
t=nt;
}
// Is the next Trie is a match
if (_key[t]!=null)
{
// Recurse so we can remember this possibility
V best=getBest(t,b,offset+i+1,len-i-1);
if (best!=null)
return best;
break;
}
}
return (V)_value[t];
}
private V getBest(int t,ByteBuffer b,int offset,int len)
{
int pos=b.position()+offset;
for(int i=0; i < len; i++)
{
byte c=b.get(pos++);
int index=__lookup[c&0x7f];
if (index>=0)
{
int idx=t*ROW_SIZE+index;
int nt=_rowIndex[idx];
if (nt==0)
break;
t=nt;
}
else
{
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big==null)
return null;
int nt=big[c];
if (nt==0)
break;
t=nt;
}
// Is the next Trie is a match
if (_key[t]!=null)
{
// Recurse so we can remember this possibility
V best=getBest(t,b,offset+i+1,len-i-1);
if (best!=null)
return best;
break;
}
}
return (V)_value[t];
}
@Override
public String toString()
{
StringBuilder buf = new StringBuilder();
toString(buf,0);
if (buf.length()==0)
return "{}";
buf.setCharAt(0,'{');
buf.append('}');
return buf.toString();
}
private void toString(Appendable out, int t)
{
if (_value[t]!=null)
{
try
{
out.append(',');
out.append(_key[t]);
out.append('=');
out.append(_value[t].toString());
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
for(int i=0; i < ROW_SIZE; i++)
{
int idx=t*ROW_SIZE+i;
if (_rowIndex[idx] != 0)
toString(out,_rowIndex[idx]);
}
char[] big = _bigIndex==null?null:_bigIndex[t];
if (big!=null)
{
for (int i:big)
if (i!=0)
toString(out,i);
}
}
@Override
public Set<String> keySet()
{
Set<String> keys = new HashSet<>();
keySet(keys,0);
return keys;
}
private void keySet(Set<String> set, int t)
{
if (t<_value.length&&_value[t]!=null)
set.add(_key[t]);
for(int i=0; i < ROW_SIZE; i++)
{
int idx=t*ROW_SIZE+i;
if (idx<_rowIndex.length && _rowIndex[idx] != 0)
keySet(set,_rowIndex[idx]);
}
char[] big = _bigIndex==null||t>=_bigIndex.length?null:_bigIndex[t];
if (big!=null)
{
for (int i:big)
if (i!=0)
keySet(set,i);
}
}
@Override
public boolean isFull()
{
return _rows+1>=_key.length;
}
}
先看几个参数: 1 int ROW_SIZE=32,代表了26个字母表和其他6个常用字符,依次为字母a-z(不区分大小写)以及符号(+-:;.space)。不需要大索引即可在Trie树中查找的字符数。 2 int[] __lookup,索引查找表,字符映射为Trie行的一个索引byte。看索引表里>=0的位置,位于20、2B、2D、2E、3A、3B以及41至5A和61至7A的位置,查找ASCII编码表,可知分别为空格、加号、减号、句号、冒号、分号以及A-Z和a-z这32个字符。 3 char[] _rowIndex,一个允许查找的array,第一行的ROW_SIZE为0,下一行ROW_SIZE则为1。所以我们一般不使用_rows[row][index],而用_rows[row*ROW_SIZE+index]来找到给定字符的行。 4 String[] _key,Trie行的标记,可以是leaf、node或同时存在。 V[] _value,key对应的值 5 char[][] _bigIndex,当一个字符在索引查找表外时,一个大索引就建立了,标示了256项可能的byte。 6 char _rows,分配的行数,指针。
然后看构造方法,默认为ArrayTrie(128)。
@SuppressWarnings("unchecked")
public ArrayTrie(int capacity)
{
super(true); //不区分大小写
_value=(V[])new Object[capacity]; //128个V的数组,用于存放值
_rowIndex=new char[capacity*32]; //128*32个字符,用于存放行索引
_key=new String[capacity]; //128个字符串数组,用于存放key
}
然后再看主要方法put、get和getBest。
@Override
public boolean put(String s, V v)
{
int t=0; //行数
int k;
int limit = s.length();
for(k=0; k < limit; k++)
{
char c=s.charAt(k); //遍历字符串s的字符
int index=__lookup[c&0x7f]; //0x7f=127,8位相当于首位清零
if (index>=0) //是32个字符之一
{
int idx=t*ROW_SIZE+index;
t=_rowIndex[idx]; //第一行时t=0
if (t==0)
{
if (++_rows>=_value.length)//行数指针下移,_value.length默认128
return false;
t=_rowIndex[idx]=_rows;//将行数指针放入行索引
}
}
else if (c>127)
throw new IllegalArgumentException("non ascii character");
else
{
if (_bigIndex==null)
_bigIndex=new char[_value.length][];
if (t>=_bigIndex.length)
return false;
char[] big=_bigIndex[t];
if (big==null)
big=_bigIndex[t]=new char[128];
t=big[c];
if (t==0)
{
if (_rows==_value.length)
return false;
t=big[c]=++_rows;
}
}
}
if (t>=_key.length)
{
_rows=(char)_key.length;
return false;
}
_key[t]=v==null?null:s;
_value[t] = v;
return true;
}