如何使用Java实现AC自动机全文检索实例
导语:如何使用Java实现AC自动机全文检索,下面是小编给大家推荐的代码实现过程,大家可以参考阅读,更多详情请关注应届毕业生考试网。
第一步,构建Trie树,定义Node类型:
/**
* Created by zhaoyy on 2017/2/7.
*/
interface Node {
char value();
boolean exists();
boolean isRoot();
Node parent();
Node childOf(char c);
Node fail();
void setFail(Node node);
void setExists(boolean exists);
void add(Node child);
List children();
}
第二步,实现两种Node,如果词汇全是可打印的ASCII字符,就采用AsciiNode,否则(比如包含汉字),使用基于hash表的.MapNode;这两种Node均集成自AbstractNode:
/**
* Created by zhaoyy on 2017/2/8.
*/
abstract class AbstractNode implements Node {
private static final char EMPTY = '\0';
private final char value;
private final Node parent;
private boolean exists;
private Node fail;
public AbstractNode(Node parent, char value) {
this.parent = parent;
this.value = value;
this.exists = false;
this.fail = null;
}
public AbstractNode() {
this(null, EMPTY);
}
private static String tab(int n) {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < n; i++) {
builder.append('\t');
}
return builder.toString();
}
private static String toString(Node node, int depth) {
StringBuilder builder = new StringBuilder();
String tab = tab(depth);
Node fail = node.fail();
Node parent = node.parent();
builder
.append(tab)
.append('
.append(node.value())
.append(" exists=\"")
.append(node.exists())
.append('"')
.append(" parent=\"")
.append(parent == null ? "null" : parent.value())
.append('"')
.append(" fail=\"")
.append(fail == null ? "null" : fail.value())
.append("\">\r\n");
for (Node child : node.children())
builder.append(toString(child, depth + 1));
builder
.append(tab)
.append("")
.append(node.value())
.append(">\r\n");
return builder.toString();
}
@Override
public char value() {
return value;
}
@Override
public boolean exists() {
return exists;
}
@Override
public boolean isRoot() {
return value == EMPTY;
}
@Override
public Node parent() {
return parent;
}
@Override
public Node fail() {
return fail;
}
@Override
public void setFail(Node node) {
this.fail = node;
}
@Override
public void setExists(boolean exists) {
this.exists = exists;
}
@Override
public String toString() {
return toString(this, 0);
}
}
/
/**
* Created by zhaoyy on 2017/2/8.
*/
final class AsciiNode extends AbstractNode implements Node {
private static final char FROM = 32;
private static final char TO = 126;
private final Node[] children;
public AsciiNode() {
super();
this.children = new Node[TO - FROM + 1];
}
public AsciiNode(Node parent, char value) {
super(parent, value);
this.children = new Node[TO - FROM + 1];
}
@Override
public Node childOf(char c) {
if (c >= FROM && c <= TO)
return children[(int) c - FROM];
else return null;
}
@Override
public void add(Node child) {
int index = (int) child.value();
children[index - FROM] = child;
}
@Override
public List children() {
List nodes = new ArrayList();
for (Node child : children)
if (child != null)
nodes.add(child);
return nodes;
}
}
//
/**
* Created by zhaoyy on 2017/2/8.
*/
final class MapNode extends AbstractNode implements Node {
private final Map children;
public MapNode() {
super();
this.children = new HashMap();
}
public MapNode(Node parent, char value) {
super(parent, value);
this.children = new HashMap();
}
@Override
public Node childOf(char c) {
return children.get(c);
}
@Override
public void add(Node child) {
children.put(child.value(), child);
}
@Override
public List children() {
List nodes = new ArrayList();
nodes.addAll(children.values());
return nodes;
}
}
第三步,
首先定义一个Node构造器:
/**
* Created by zhaoyy on 2017/2/8.
*/
public interface NodeMaker {
Node make(Node parent, char value);
Node makeRoot();
}
然后构建AC自动机,实现创建及查找方法:
/**
* Created by zhaoyy on 2017/2/7.
*/
public final class WordTable {
private final Node root;
private WordTable(Collection extends CharSequence> words, NodeMaker maker) {
Node root = buildTrie(words, maker);
setFailNode(root);
this.root = root;
}
public static WordTable compile(Collection extends CharSequence> words) {
if (words == null || words.isEmpty())
throw new IllegalArgumentException();
final NodeMaker maker;
if (isAscii(words))
maker = new NodeMaker() {
@Override
public Node make(Node parent, char value) {