R way Trie最大问题是占空间,R = 256,插入百万数量级的key就 OutOfMemory了。在key的数目比较小的情况下,正如理论指出的一样,性能高于HashMap;但到了占内存比较多的时候由于影响系统整体运行,性能不如HashMap。
用HashMap而不是大小为256的数组作为link的时候,占内存少的多,几千万级的都不会OutOfMemory。 256-way Trie的一个Node大小 至少为为256 * 8字节指针大小,也就是2K。
结论就是普通256-way Trie实用价值不大,除非确定key总数很小,空间换时间追求极致的性能。比较实用的是:
1)26-way Trie
2)用HashMap做为link容器的Trie
3)TST(Ternary Search Trie) 排序树和Trie结合起来,每个节点只有3个指针域,实现较复杂。
public class TrieMap<Value> {
private static final int R = 256;
private static class Node {
Object value;
//String prefix;
int branches = 0;
Node[] next = new Node[R];
}
private Node root = new Node();
private Node put(Node x, String key, int i, Value v) {
if (x == null) x = new Node();
//x.prefix = key.substring(0, i);
if (i == key.length()) {
x.value = v;
return x;
}
Node link = x.next[key.charAt(i)];
x.next[key.charAt(i)] = put(x.next[key.charAt(i)], key, i + 1, v);
if (link == null && x.next[key.charAt(i)] != null) x.branches += 1;
return x;
}
public void put(String key, Value value) { root = put(root, key, 0, value);}
private Node get(Node x, String key, int i) {
if (x == null) return null;
if (i == key.length()) return x;
return get(x.next[key.charAt(i)], key, i + 1);
}
public Value get(String key) {
Node x = get(root, key, 0);
return x == null ? null : (Value)x.value;
}
private Node remove(Node x, String key, int i) {
if (x == null) return null;
if (i == key.length()) {
x.value = null;
if (x.branches == 0) return null;
return x;
}
Node link = x.next[key.charAt(i)];
x.next[key.charAt(i)] = remove(x.next[key.charAt(i)], key, i + 1);
if (link != null && x.next[key.charAt(i)] == null) x.branches -= 1;
if (x.value == null && x.branches == 0) return null;
return x;
}
public void remove(String key) { remove(root, key, 0);}
private void collect (Node root, String prefix, List<String> list) {
if (root == null) return;
if (root.value != null) list.add(prefix);
for (char c = 0; c < R; ++c) {
collect(root.next[c], prefix + c, list);
}
}
public List<String> keys() {
return keysWithPrefix("");
}
public List<String> keysWithPrefix(String prefix) {
Node x = get(root, prefix, 0);
List<String> list = new ArrayList<String>();
collect(x, prefix, list);
return list;
}
int search (Node x, String query, int i, int length) {
if (x == null) return length;
if (x.value != null) length = i;
if (i == query.length()) return length;
return search(x.next[query.charAt(i)], query, i + 1, length);
}
public String longestPrefixOf(String query) {
int len = search(root, query, 0, 0);
return query.substring(0, len);
}
private String maxKey(Node x, String prefix) {
if (x == null) return null;
for (char c = R - 1; c >= 0; --c) {
String s = maxKey(x.next[c], prefix + c);
if (s != null) return s;
}
if (x.value != null) return prefix;
return null;
}
}