TrieTree与其说是一种算法,还不如说是一种解题的思路,我对海量数据的处理一直都是保持着一种积极的学习态度,TrieTree可以说是处理海量数据诸多方法中比较典型的一种,下面我提供了两种TrieTree的实现:使用动态数组和使用HashMap。
1、动态数组的应用场景大多是使用在确定的较小范围的字符集,且字符的重复率较高。
2、HashMap的应用场景大多是使用在较大范围的字符集,且字符的重复率不是很高。
Trie接口类:
package com.au.algorithm;
import java.util.List;
public interface Trie {
public void add(String item);
public List<String> getItemsWithPrefix(String prefix);
public int getCount(String item);
public List<String> getAllItems();
public List<String> getMultiItems();
public List<String> getSingleItems();
}
使用HashMap实现的TrieTree:
package com.au.algorithm;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
*
* @author fuyouming
*
*/
public class HashMapTrie implements Trie {
private final Item root = new Item();
/**
* 字符节点的内部类
*
* @author fuyouming
*
*/
private class Item {
private int endCount = 0;// 多少个字符串以该字符做为结束符
private Map<Integer, Item> nextItems = new HashMap<Integer, Item>();
}
public void add(String str) {
add(root, str);
}
public List<String> getItemsWithPrefix(String prefix) {
List<String> list = new LinkedList<String>();
Item current = walk2FixedItem(prefix);
if (current != null) {
walk2NextItem(list, current, new StringBuffer(prefix));
}
return list;
}
public int getCount(String str) {
Item current = walk2FixedItem(str);
if (current != null) {
return current.endCount;
}
return 0;
}
public List<String> getAllItems() {
List<String> list = new LinkedList<String>();
Iterator<Integer> iterator = root.nextItems.keySet().iterator();
while (iterator.hasNext()) {
Integer i = iterator.next();
Item current = (Item) root.nextItems.get(i);
StringBuffer buf = new StringBuffer().append((char) i.intValue());
walk2NextItem(list, current, buf);
}
return list;
}
public List<String> getMultiItems() {
return null;
}
public List<String> getSingleItems() {
return null;
}
private void add(Item item, String str) {
if (str.length() == 0) {
item.endCount++;
} else {
int index = str.charAt(0);
Item nextItem = null;
if (!item.nextItems.containsKey(index)) {
nextItem = new Item();
item.nextItems.put(index, nextItem);
} else {
nextItem = (Item) item.nextItems.get(index);
}
add(nextItem, str.substring(1));
}
}
private Item walk2FixedItem(String prefix) {
if (prefix.length() > 0) {
Item current = root;
for (int i = 0; i < prefix.length(); i++) {
int index = prefix.charAt(i);
current = current.nextItems.get(index);
if (current == null) {
break;
}
}
return current;
} else {
return null;
}
}
private void walk2NextItem(List<String> list, Item previous,
StringBuffer buf) {
for (int i = 0; i < previous.endCount; i++) {
list.add(buf.toString());
}
Iterator<Integer> iterator = previous.nextItems.keySet().iterator();
while (iterator.hasNext()) {
Integer i = iterator.next();
Item current = (Item) previous.nextItems.get(i);
StringBuffer newBuf = new StringBuffer(buf).append((char) i
.intValue());
walk2NextItem(list, current, newBuf);
}
}
public static void main(String args[]) {
Trie trie = new HashMapTrie();
trie.add("ab");
trie.add("abc");
trie.add("abcd");
trie.add("abd");
trie.add("abd");
trie.add("a cbd");
trie.add("acrtfg");
List<String> list = trie.getAllItems();
for (String s : list) {
System.out.println(s);
}
System.out.println("=================================");
list = trie.getItemsWithPrefix("ac");
for (String s : list) {
System.out.println(s);
}
}
}
使用动态数组实现的TrieTree:
package com.au.algorithm;
import java.util.LinkedList;
import java.util.List;
/**
*
* @author fuyouming
*
*/
public class DynamicArrayTrie implements Trie {
private final Item root = new Item();
private int startASCII = 32;// 空格的ASCII码,排在它之前的字符都没有图形表示,所以不做处理
private int endASCII = 255;// 最后一个字符的ASCII码
/**
* 字符节点的内部类
*
* @author fuyouming
*
*/
private class Item {
private int endCount = 0;// 多少个字符串以该字符做为结束符
private Item[] nextItems = new Item[32];
}
public void add(String str) {
add(root, str);
}
public List<String> getItemsWithPrefix(String prefix) {
List<String> list = new LinkedList<String>();
Item current = walk2FixedItem(prefix);
if (current != null) {
walk2NextItem(list, current, new StringBuffer(prefix));
}
return list;
}
public int getCount(String str) {
Item current = walk2FixedItem(str);
if (current != null) {
return current.endCount;
}
return 0;
}
public List<String> getAllItems() {
List<String> list = new LinkedList<String>();
for (int i = 0; i < root.nextItems.length; i++) {
Item current = root.nextItems[i];
if (current != null) {
StringBuffer buf = new StringBuffer().append(idx2Chr(i));
walk2NextItem(list, current, buf);
}
}
return list;
}
public List<String> getMultiItems() {
return null;
}
public List<String> getSingleItems() {
return null;
}
private void add(Item item, String str) {
if (str.length() == 0) {
item.endCount++;
} else {
int index = chr2Idx(str.charAt(0));
ensureCapacity(item, index);
if (item.nextItems[index] == null) {
item.nextItems[index] = new Item();
}
add(item.nextItems[index], str.substring(1));
}
}
private int chr2Idx(char c) {
return c - startASCII;
}
private char idx2Chr(int i) {
return (char) (i + startASCII);
}
private void ensureCapacity(Item item, int index) {
if (index + 1 > item.nextItems.length) {
Item[] oldItems = item.nextItems;
item.nextItems = new Item[Math.min(endASCII - startASCII,
oldItems.length + Math.max(10, index - oldItems.length)) + 1];
System.arraycopy(oldItems, 0, item.nextItems, 0, oldItems.length);
}
}
private Item walk2FixedItem(String prefix) {
if (prefix.length() > 0) {
Item current = root;
for (int i = 0; i < prefix.length(); i++) {
int index = chr2Idx(prefix.charAt(i));
current = current.nextItems[index];
if (current == null) {
break;
}
}
return current;
} else {
return null;
}
}
private void walk2NextItem(List<String> list, Item previous,
StringBuffer buf) {
for (int i = 0; i < previous.endCount; i++) {
list.add(buf.toString());
}
for (int i = 0; i < previous.nextItems.length; i++) {
Item current = previous.nextItems[i];
if (current != null) {
StringBuffer newBuf = new StringBuffer(buf).append(idx2Chr(i));
walk2NextItem(list, current, newBuf);
}
}
}
public static void main(String args[]) {
Trie trie = new DynamicArrayTrie();
trie.add("ab");
trie.add("abc");
trie.add("abcd");
trie.add("abd");
trie.add("abd");
trie.add("a cbd");
trie.add("acrtfg");
List<String> list = trie.getAllItems();
for (String s : list) {
System.out.println(s);
}
System.out.println("=================================");
list = trie.getItemsWithPrefix("ac");
for (String s : list) {
System.out.println(s);
}
}
}