In-Memory Trie
Tries are specialized data structures where a word can be stored as a sequence of characters. Reading the word involves traversing down the branch of the tree. At each node, the possible completions of the partial word can be found by traversing down all possible paths to the leaf level. It seemed ideal for modeling auto-completions, which is why I chose it. A Trie is modelled as a collection of TrieNode objects. A TrieNode is basically the current character and a Map of completions. Here is the code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
// Trie.java
public class Trie {
private TrieNode rootNode;
public Trie() {
super();
rootNode = new TrieNode(' ');
}
public void load(String phrase) {
loadRecursive(rootNode, phrase + "$");
}
private void loadRecursive(TrieNode node, String phrase) {
if (StringUtils.isBlank(phrase)) {
return;
}
char firstChar = phrase.charAt(0);
node.add(firstChar);
TrieNode childNode = node.getChildNode(firstChar);
if (childNode != null) {
loadRecursive(childNode, phrase.substring(1));
}
}
public boolean matchPrefix(String prefix) {
TrieNode matchedNode = matchPrefixRecursive(rootNode, prefix);
return (matchedNode != null);
}
private TrieNode matchPrefixRecursive(TrieNode node, String prefix) {
if (StringUtils.isBlank(prefix)) {
return node;
}
char firstChar = prefix.charAt(0);
TrieNode childNode = node.getChildNode(firstChar);
if (childNode == null) {
// no match at this char, exit
return null;
} else {
// go deeper
return matchPrefixRecursive(childNode, prefix.substring(1));
}
}
public List<String> findCompletions(String prefix) {
TrieNode matchedNode = matchPrefixRecursive(rootNode, prefix);
List<String> completions = new ArrayList<String>();
findCompletionsRecursive(matchedNode, prefix, completions);
return completions;
}
private void findCompletionsRecursive(TrieNode node, String prefix, List<String> completions) {
if (node == null) {
// our prefix did not match anything, just return
return;
}
if (node.getNodeValue() == '$') {
// end reached, append prefix into completions list. Do not append
// the trailing $, that is only to distinguish words like ann and anne
// into separate branches of the tree.
completions.add(prefix.substring(0, prefix.length() - 1));
return;
}
Collection<TrieNode> childNodes = node.getChildren();
for (TrieNode childNode : childNodes) {
char childChar = childNode.getNodeValue();
findCompletionsRecursive(childNode, prefix + childChar, completions);
}
}
public String toString() {
return "Trie:" + rootNode.toString();
}
}
// TrieNode.java
public class TrieNode {
private Character character;
private HashMap<Character,TrieNode> children;
public TrieNode(char c) {
super();
this.character = new Character(c);
children = new HashMap<Character,TrieNode>();
}
public char getNodeValue() {
return character.charValue();
}
public Collection<TrieNode> getChildren() {
return children.values();
}
public Set<Character> getChildrenNodeValues() {
return children.keySet();
}
public void add(char c) {
if (children.get(new Character(c)) == null) {
// children does not contain c, add a TrieNode
children.put(new Character(c), new TrieNode(c));
}
}
public TrieNode getChildNode(char c) {
return children.get(new Character(c));
}
public boolean contains(char c) {
return (children.get(new Character(c)) != null);
}
public int hashCode() {
return character.hashCode();
}
public boolean equals(Object obj) {
if (!(obj instanceof TrieNode)) {
return false;
}
TrieNode that = (TrieNode) obj;
return (this.getNodeValue() == that.getNodeValue());
}
public String toString() {
return ReflectionToStringBuilder.reflectionToString(this, ToStringStyle.DEFAULT_STYLE);
}
}
|