Jetty之Trie树

关于Trie树请参见另一篇博客

Jetty-util包里提供了Trie树的实现。 先来看Trie接口

/* ------------------------------------------------------------ */
/** A Trie String lookup data structure.
 * @param <V> the Trie entry type
 */
public interface Trie<V>
{
    /* ------------------------------------------------------------ */
    /** Put and entry into the Trie
     * @param s The key for the entry
     * @param v The value of the entry
     * @return True if the Trie had capacity to add the field.
     */
    public boolean put(String s, V v);
    
    /* ------------------------------------------------------------ */
    /** Put a value as both a key and a value.
     * @param v The value and key
     * @return True if the Trie had capacity to add the field.
     */
    public boolean put(V v);

    /* ------------------------------------------------------------ */
    public V remove(String s);

    /* ------------------------------------------------------------ */
    /** Get and exact match from a String key
     * @param s The key
     * @return the value for the string key
     */
    public V get(String s);

    /* ------------------------------------------------------------ */
    /** Get and exact match from a String key
     * @param s The key
     * @param offset The offset within the string of the key
     * @param len the length of the key
     * @return the value for the string / offset / length
     */
    public V get(String s,int offset,int len);

    /* ------------------------------------------------------------ */
    /** Get and exact match from a segment of a ByteBuufer as key
     * @param b The buffer
     * @return The value or null if not found
     */
    public V get(ByteBuffer b);

    /* ------------------------------------------------------------ */
    /** Get and exact match from a segment of a ByteBuufer as key
     * @param b The buffer
     * @param offset The offset within the buffer of the key
     * @param len the length of the key
     * @return The value or null if not found
     */
    public V get(ByteBuffer b,int offset,int len);
    
    /* ------------------------------------------------------------ */
    /** Get the best match from key in a String.
     * @param s The string
     * @return The value or null if not found
     */
    public V getBest(String s);
    
    /* ------------------------------------------------------------ */
    /** Get the best match from key in a String.
     * @param s The string
     * @param offset The offset within the string of the key
     * @param len the length of the key
     * @return The value or null if not found
     */
    public V getBest(String s,int offset,int len); 

    /* ------------------------------------------------------------ */
    /** Get the best match from key in a byte array.
     * The key is assumed to by ISO_8859_1 characters.
     * @param b The buffer
     * @param offset The offset within the array of the key
     * @param len the length of the key
     * @return The value or null if not found
     */
    public V getBest(byte[] b,int offset,int len);

    /* ------------------------------------------------------------ */
    /** Get the best match from key in a byte buffer.
     * The key is assumed to by ISO_8859_1 characters.
     * @param b The buffer
     * @param offset The offset within the buffer of the key
     * @param len the length of the key
     * @return The value or null if not found
     */
    public V getBest(ByteBuffer b,int offset,int len);
    
    /* ------------------------------------------------------------ */
    public Set<String> keySet();

    /* ------------------------------------------------------------ */
    public boolean isFull();

    /* ------------------------------------------------------------ */
    public boolean isCaseInsensitive();

    /* ------------------------------------------------------------ */
    public void clear();

}

Trie树的方法定义基本可以类比为Map。 接着来看关于Trie的抽象类,AbstractTrie类。

/** 
 * Abstract Trie implementation.
 * <p>Provides some common implementations, which may not be the most
 * efficient. For byte operations, the assumption is made that the charset
 * is ISO-8859-1</p>
 * 
 * @param <V> the type of object that the Trie holds
 */
public abstract class AbstractTrie<V> implements Trie<V>
{
    final boolean _caseInsensitive;
    
    protected AbstractTrie(boolean insensitive)
    {
        _caseInsensitive=insensitive;
    }

    @Override
    public boolean put(V v)
    {
        return put(v.toString(),v);
    }

    @Override
    public V remove(String s)
    {
        V o=get(s);
        put(s,null);
        return o;
    }

    @Override
    public V get(String s)
    {
        return get(s,0,s.length());
    }

    @Override
    public V get(ByteBuffer b)
    {
        return get(b,0,b.remaining());
    }

    @Override
    public V getBest(String s)
    {
        return getBest(s,0,s.length());
    }
    
    @Override
    public V getBest(byte[] b, int offset, int len)
    {
        return getBest(new String(b,offset,len,StandardCharsets.ISO_8859_1));
    }

    @Override
    public boolean isCaseInsensitive()
    {
        return _caseInsensitive;
    }

}

AbstractTrie基本定义了几个重写方法的指向。
再接着看具体的实现类,最常见的ArrayTrie。

public class ArrayTrie<V> extends AbstractTrie<V>
{
    /**
     * The Size of a Trie row is how many characters can be looked
     * up directly without going to a big index.  This is set at 
     * 32 to cover case insensitive alphabet and a few other common
     * characters. 
     */
    private static final int ROW_SIZE = 32;
    
    /**
     * The index lookup table, this maps a character as a byte 
     * (ISO-8859-1 or UTF8) to an index within a Trie row
     */
    private static final int[] __lookup = 
    { // 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
   /*0*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
   /*1*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
   /*2*/31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, 27, 30, -1,
   /*3*/-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28, 29, -1, -1, -1, -1,
   /*4*/-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
   /*5*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
   /*6*/-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
   /*7*/15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
    };
    
    /**
     * The Trie rows in a single array which allows a lookup of row,character
     * to the next row in the Trie.  This is actually a 2 dimensional
     * array that has been flattened to achieve locality of reference.
     * The first ROW_SIZE entries are for row 0, then next ROW_SIZE 
     * entries are for row 1 etc.   So in general instead of using
     * _rows[row][index], we use _rows[row*ROW_SIZE+index] to look up
     * the next row for a given character.
     * 
     * The array is of characters rather than integers to save space. 
     */
    private final char[] _rowIndex;
    
    /**
     * The key (if any) for a Trie row. 
     * A row may be a leaf, a node or both in the Trie tree.
     */
    private final String[] _key;
    
    /**
     * The value (if any) for a Trie row. 
     * A row may be a leaf, a node or both in the Trie tree.
     */
    private final V[] _value;
    
    /**
     * A big index for each row.
     * If a character outside of the lookup map is needed,
     * then a big index will be created for the row, with
     * 256 entries, one for each possible byte.
     */
    private char[][] _bigIndex;
    
    /**
     * The number of rows allocated
     */
    private char _rows;

    public ArrayTrie()
    {
        this(128);
    }
    
    /* ------------------------------------------------------------ */
    /**
     * @param capacity  The capacity of the trie, which at the worst case
     * is the total number of characters of all keys stored in the Trie.
     * The capacity needed is dependent of the shared prefixes of the keys.
     * For example, a capacity of 6 nodes is required to store keys "foo" 
     * and "bar", but a capacity of only 4 is required to
     * store "bar" and "bat".
     */
    @SuppressWarnings("unchecked")
    public ArrayTrie(int capacity)
    {
        super(true);
        _value=(V[])new Object[capacity];
        _rowIndex=new char[capacity*32];
        _key=new String[capacity];
    }

    /* ------------------------------------------------------------ */
    @Override
    public void clear()
    {
        _rows=0;
        Arrays.fill(_value,null);
        Arrays.fill(_rowIndex,(char)0);
        Arrays.fill(_key,null);
    }
    
    /* ------------------------------------------------------------ */
    @Override
    public boolean put(String s, V v)
    {
        int t=0;
        int k;
        int limit = s.length();
        for(k=0; k < limit; k++)
        {
            char c=s.charAt(k);
            
            int index=__lookup[c&0x7f];
            if (index>=0)
            {
                int idx=t*ROW_SIZE+index;
                t=_rowIndex[idx];
                if (t==0)
                {
                    if (++_rows>=_value.length)
                        return false;
                    t=_rowIndex[idx]=_rows;
                }
            }
            else if (c>127)
                throw new IllegalArgumentException("non ascii character");
            else
            {
                if (_bigIndex==null)
                    _bigIndex=new char[_value.length][];
                if (t>=_bigIndex.length)
                    return false;
                char[] big=_bigIndex[t];
                if (big==null)
                    big=_bigIndex[t]=new char[128];
                t=big[c];
                if (t==0)
                {
                    if (_rows==_value.length)
                        return false;
                    t=big[c]=++_rows;
                }
            }
        }
        
        if (t>=_key.length)
        {
            _rows=(char)_key.length;
            return false;
        }
        
        _key[t]=v==null?null:s;
        _value[t] = v;
        return true;
    }

    /* ------------------------------------------------------------ */
    @Override
    public V get(String s, int offset, int len)
    {
        int t = 0;
        for(int i=0; i < len; i++)
        {
            char c=s.charAt(offset+i);
            int index=__lookup[c&0x7f];
            if (index>=0)
            {
                int idx=t*ROW_SIZE+index;
                t=_rowIndex[idx];
                if (t==0)
                    return null;
            }
            else
            {
                char[] big = _bigIndex==null?null:_bigIndex[t];
                if (big==null)
                    return null;
                t=big[c];
                if (t==0)
                    return null;
            }
        }
        return _value[t];
    }

    /* ------------------------------------------------------------ */
    @Override
    public V get(ByteBuffer b,int offset,int len)
    {
        int t = 0;
        for(int i=0; i < len; i++)
        {
            byte c=b.get(offset+i);
            int index=__lookup[c&0x7f];
            if (index>=0)
            {
                int idx=t*ROW_SIZE+index;
                t=_rowIndex[idx];
                if (t==0)
                    return null;
            }
            else
            {
                char[] big = _bigIndex==null?null:_bigIndex[t];
                if (big==null)
                    return null;
                t=big[c];
                if (t==0)
                    return null;
            }
        }
        return (V)_value[t];
    }

    /* ------------------------------------------------------------ */
    @Override
    public V getBest(byte[] b,int offset,int len)
    {
        return getBest(0,b,offset,len);
    }

    /* ------------------------------------------------------------ */
    @Override
    public V getBest(ByteBuffer b,int offset,int len)
    {
        if (b.hasArray())
            return getBest(0,b.array(),b.arrayOffset()+b.position()+offset,len);
        return getBest(0,b,offset,len);
    }

    /* ------------------------------------------------------------ */
    @Override
    public V getBest(String s, int offset, int len)
    {
        return getBest(0,s,offset,len);
    }
    
    /* ------------------------------------------------------------ */
    private V getBest(int t, String s, int offset, int len)
    {
        int pos=offset;
        for(int i=0; i < len; i++)
        {
            char c=s.charAt(pos++);
            int index=__lookup[c&0x7f];
            if (index>=0)
            {
                int idx=t*ROW_SIZE+index;
                int nt=_rowIndex[idx];
                if (nt==0)
                    break;
                t=nt;
            }
            else
            {
                char[] big = _bigIndex==null?null:_bigIndex[t];
                if (big==null)
                    return null;
                int nt=big[c];
                if (nt==0)
                    break;
                t=nt;
            }
            
            // Is the next Trie is a match
            if (_key[t]!=null)
            {
                // Recurse so we can remember this possibility
                V best=getBest(t,s,offset+i+1,len-i-1);
                if (best!=null)
                    return best;
                return (V)_value[t];
            }
        }
        return (V)_value[t];
    }

    /* ------------------------------------------------------------ */
    private V getBest(int t,byte[] b,int offset,int len)
    {
        for(int i=0; i < len; i++)
        {
            byte c=b[offset+i];
            int index=__lookup[c&0x7f];
            if (index>=0)
            {
                int idx=t*ROW_SIZE+index;
                int nt=_rowIndex[idx];
                if (nt==0)
                    break;
                t=nt;
            }
            else
            {
                char[] big = _bigIndex==null?null:_bigIndex[t];
                if (big==null)
                    return null;
                int nt=big[c];
                if (nt==0)
                    break;
                t=nt;
            }
            
            // Is the next Trie is a match
            if (_key[t]!=null)
            {
                // Recurse so we can remember this possibility
                V best=getBest(t,b,offset+i+1,len-i-1);
                if (best!=null)
                    return best;
                break;
            }
        }
        return (V)_value[t];
    }
    
    private V getBest(int t,ByteBuffer b,int offset,int len)
    {
        int pos=b.position()+offset;
        for(int i=0; i < len; i++)
        {
            byte c=b.get(pos++);
            int index=__lookup[c&0x7f];
            if (index>=0)
            {
                int idx=t*ROW_SIZE+index;
                int nt=_rowIndex[idx];
                if (nt==0)
                    break;
                t=nt;
            }
            else
            {
                char[] big = _bigIndex==null?null:_bigIndex[t];
                if (big==null)
                    return null;
                int nt=big[c];
                if (nt==0)
                    break;
                t=nt;
            }
            
            // Is the next Trie is a match
            if (_key[t]!=null)
            {
                // Recurse so we can remember this possibility
                V best=getBest(t,b,offset+i+1,len-i-1);
                if (best!=null)
                    return best;
                break;
            }
        }
        return (V)_value[t];
    }
    
    
    

    @Override
    public String toString()
    {
        StringBuilder buf = new StringBuilder();
        toString(buf,0);
        
        if (buf.length()==0)
            return "{}";
        
        buf.setCharAt(0,'{');
        buf.append('}');
        return buf.toString();
    }


    private void toString(Appendable out, int t)
    {
        if (_value[t]!=null)
        {
            try
            {
                out.append(',');
                out.append(_key[t]);
                out.append('=');
                out.append(_value[t].toString());
            }
            catch (IOException e)
            {
                throw new RuntimeException(e);
            }
        }

        for(int i=0; i < ROW_SIZE; i++)
        {
            int idx=t*ROW_SIZE+i;
            if (_rowIndex[idx] != 0)
                toString(out,_rowIndex[idx]);
        }

        char[] big = _bigIndex==null?null:_bigIndex[t];
        if (big!=null)
        {
            for (int i:big)
                if (i!=0)
                    toString(out,i);
        }

    }

    @Override
    public Set<String> keySet()
    {
        Set<String> keys = new HashSet<>();
        keySet(keys,0);
        return keys;
    }
    
    private void keySet(Set<String> set, int t)
    {
        if (t<_value.length&&_value[t]!=null)
            set.add(_key[t]);

        for(int i=0; i < ROW_SIZE; i++)
        {
            int idx=t*ROW_SIZE+i;
            if (idx<_rowIndex.length && _rowIndex[idx] != 0)
                keySet(set,_rowIndex[idx]);
        }
        
        char[] big = _bigIndex==null||t>=_bigIndex.length?null:_bigIndex[t];
        if (big!=null)
        {
            for (int i:big)
                if (i!=0)
                    keySet(set,i);
        }
    }
    
    @Override
    public boolean isFull()
    {
        return _rows+1>=_key.length;
    }
}

先看几个参数: 1 int ROW_SIZE=32,代表了26个字母表和其他6个常用字符,依次为字母a-z(不区分大小写)以及符号(+-:;.space)。不需要大索引即可在Trie树中查找的字符数。 2 int[] __lookup,索引查找表,字符映射为Trie行的一个索引byte。看索引表里>=0的位置,位于20、2B、2D、2E、3A、3B以及41至5A和61至7A的位置,查找ASCII编码表,可知分别为空格、加号、减号、句号、冒号、分号以及A-Z和a-z这32个字符。 3 char[] _rowIndex,一个允许查找的array,第一行的ROW_SIZE为0,下一行ROW_SIZE则为1。所以我们一般不使用_rows[row][index],而用_rows[row*ROW_SIZE+index]来找到给定字符的行。 4 String[] _key,Trie行的标记,可以是leaf、node或同时存在。 V[] _value,key对应的值 5 char[][] _bigIndex,当一个字符在索引查找表外时,一个大索引就建立了,标示了256项可能的byte。 6 char _rows,分配的行数,指针。
然后看构造方法,默认为ArrayTrie(128)。

    @SuppressWarnings("unchecked")
    public ArrayTrie(int capacity)
    {
        super(true);                      //不区分大小写
        _value=(V[])new Object[capacity]; //128个V的数组,用于存放值
        _rowIndex=new char[capacity*32];  //128*32个字符,用于存放行索引
        _key=new String[capacity];        //128个字符串数组,用于存放key
    }

然后再看主要方法put、get和getBest。

    @Override
    public boolean put(String s, V v)
    {
        int t=0; //行数
        int k;
        int limit = s.length();
        for(k=0; k < limit; k++)
        {
            char c=s.charAt(k); //遍历字符串s的字符
            
            int index=__lookup[c&0x7f]; //0x7f=127,8位相当于首位清零
            if (index>=0) //是32个字符之一
            {
                int idx=t*ROW_SIZE+index; 
                t=_rowIndex[idx]; //第一行时t=0
                if (t==0)
                {
                    if (++_rows>=_value.length)//行数指针下移,_value.length默认128
                        return false;
                    t=_rowIndex[idx]=_rows;//将行数指针放入行索引
                }
            }
            else if (c>127)
                throw new IllegalArgumentException("non ascii character");
            else
            {
                if (_bigIndex==null)
                    _bigIndex=new char[_value.length][];
                if (t>=_bigIndex.length)
                    return false;
                char[] big=_bigIndex[t];
                if (big==null)
                    big=_bigIndex[t]=new char[128];
                t=big[c];
                if (t==0)
                {
                    if (_rows==_value.length)
                        return false;
                    t=big[c]=++_rows;
                }
            }
        }
        
        if (t>=_key.length)
        {
            _rows=(char)_key.length;
            return false;
        }
        
        _key[t]=v==null?null:s;
        _value[t] = v;
        return true;
    }

转载于:https://my.oschina.net/daidetian/blog/704707

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值