java ac自动机_Java实现AC自动机全文检索示例

第一步,构建Trie树,定义Node类型:

/**

* Created by zhaoyy on 2017/2/7.

*/

interface Node {

char value();

boolean exists();

boolean isRoot();

Node parent();

Node childOf(char c);

Node fail();

void setFail(Node node);

void setExists(boolean exists);

void add(Node child);

List children();

}

第二步,实现两种Node,如果词汇全是可打印的ASCII字符,就采用AsciiNode,否则(比如包含汉字),使用基于hash表的MapNode;这两种Node均集成自AbstractNode:

/**

* Created by zhaoyy on 2017/2/8.

*/

abstract class AbstractNode implements Node {

private static final char EMPTY = '\0';

private final char value;

private final Node parent;

private boolean exists;

private Node fail;

public AbstractNode(Node parent, char value) {

this.parent = parent;

this.value = value;

this.exists = false;

this.fail = null;

}

public AbstractNode() {

this(null, EMPTY);

}

private static String tab(int n) {

StringBuilder builder = new StringBuilder();

for (int i = 0; i < n; i++) {

builder.append('\t');

}

return builder.toString();

}

private static String toString(Node node, int depth) {

StringBuilder builder = new StringBuilder();

String tab = tab(depth);

Node fail = node.fail();

Node parent = node.parent();

builder

.append(tab)

.append('

.append(node.value())

.append(" exists=\"")

.append(node.exists())

.append('"')

.append(" parent=\"")

.append(parent == null ? "null" : parent.value())

.append('"')

.append(" fail=\"")

.append(fail == null ? "null" : fail.value())

.append("\">\r\n");

for (Node child : node.children())

builder.append(toString(child, depth + 1));

builder

.append(tab)

.append("")

.append(node.value())

.append(">\r\n");

return builder.toString();

}

@Override

public char value() {

return value;

}

@Override

public boolean exists() {

return exists;

}

@Override

public boolean isRoot() {

return value == EMPTY;

}

@Override

public Node parent() {

return parent;

}

@Override

public Node fail() {

return fail;

}

@Override

public void setFail(Node node) {

this.fail = node;

}

@Override

public void setExists(boolean exists) {

this.exists = exists;

}

@Override

public String toString() {

return toString(this, 0);

}

}

/

/**

* Created by zhaoyy on 2017/2/8.

*/

final class AsciiNode extends AbstractNode implements Node {

private static final char FROM = 32;

private static final char TO = 126;

private final Node[] children;

public AsciiNode() {

super();

this.children = new Node[TO - FROM + 1];

}

public AsciiNode(Node parent, char value) {

super(parent, value);

this.children = new Node[TO - FROM + 1];

}

@Override

public Node childOf(char c) {

if (c >= FROM && c <= TO)

return children[(int) c - FROM];

else return null;

}

@Override

public void add(Node child) {

int index = (int) child.value();

children[index - FROM] = child;

}

@Override

public List children() {

List nodes = new ArrayList();

for (Node child : children)

if (child != null)

nodes.add(child);

return nodes;

}

}

//

/**

* Created by zhaoyy on 2017/2/8.

*/

final class MapNode extends AbstractNode implements Node {

private final Map children;

public MapNode() {

super();

this.children = new HashMap();

}

public MapNode(Node parent, char value) {

super(parent, value);

this.children = new HashMap();

}

@Override

public Node childOf(char c) {

return children.get(c);

}

@Override

public void add(Node child) {

children.put(child.value(), child);

}

@Override

public List children() {

List nodes = new ArrayList();

nodes.addAll(children.values());

return nodes;

}

}

第三步,

首先定义一个Node构造器:

/**

* Created by zhaoyy on 2017/2/8.

*/

public interface NodeMaker {

Node make(Node parent, char value);

Node makeRoot();

}

然后构建AC自动机,实现创建及查找方法:

/**

* Created by zhaoyy on 2017/2/7.

*/

public final class WordTable {

private final Node root;

private WordTable(Collection extends CharSequence> words, NodeMaker maker) {

Node root = buildTrie(words, maker);

setFailNode(root);

this.root = root;

}

public static WordTable compile(Collection extends CharSequence> words) {

if (words == null || words.isEmpty())

throw new IllegalArgumentException();

final NodeMaker maker;

if (isAscii(words))

maker = new NodeMaker() {

@Override

public Node make(Node parent, char value) {

return new AsciiNode(parent, value);

}

@Override

public Node makeRoot() {

return new AsciiNode();

}

};

else maker = new NodeMaker() {

@Override

public Node make(Node parent, char value) {

return new MapNode(parent, value);

}

@Override

public Node makeRoot() {

return new MapNode();

}

};

return new WordTable(words, maker);

}

private static boolean isAscii(Collection extends CharSequence> words) {

for (CharSequence word : words) {

int len = word.length();

for (int i = 0; i < len; i++) {

int c = (int) word.charAt(i);

if (c < 32 || c > 126)

return false;

}

}

return true;

}

private static Node buildTrie(Collection extends CharSequence> sequences, NodeMaker maker) {

Node root = maker.makeRoot();

for (CharSequence sequence : sequences) {

int len = sequence.length();

Node current = root;

for (int i = 0; i < len; i++) {

char c = sequence.charAt(i);

Node node = current.childOf(c);

if (node == null) {

node = maker.make(current, c);

current.add(node);

}

current = node;

if (i == len - 1)

node.setExists(true);

}

}

return root;

}

private static void setFailNode(final Node root) {

root.setFail(null);

Queue queue = new LinkedList();

queue.add(root);

while (!queue.isEmpty()) {

Node parent = queue.poll();

Node temp;

for (Node child : parent.children()) {

if (parent.isRoot())

child.setFail(root);

else {

temp = parent.fail();

while (temp != null) {

Node node = temp.childOf(child.value());

if (node != null) {

child.setFail(node);

break;

}

temp = temp.fail();

}

if (temp == null)

child.setFail(root);

}

queue.add(child);

}

}

}

public boolean findAnyIn(CharSequence cs) {

int len = cs.length();

Node node = root;

for (int i = 0; i < len; i++) {

Node next = node.childOf(cs.charAt(i));

if (next == null) {

next = node.fail();

if (next == null) {

node = root;

continue;

}

}

if (next.exists())

return true;

}

return false;

}

public List search(CharSequence cs) {

if (cs == null || cs.length() == 0)

return Collections.emptyList();

List result = new ArrayList();

int len = cs.length();

Node node = root;

for (int i = 0; i < len; i++) {

Node next = node.childOf(cs.charAt(i));

if (next == null) {

next = node.fail();

if (next == null) {

node = root;

continue;

}

}

if (next.exists()) {

MatchInfo info = new MatchInfo(i, next);

result.add(info);

node = root;

continue;

}

node = next;

}

return result;

}

@Override

public String toString() {

return root.toString();

}

}

定义一个保存查找结果的实体:

/**

* Created by zhaoyy on 2017/2/7.

*/

public final class MatchInfo {

private final int index;

private final String word;

public MatchInfo(int index, String word) {

this.index = index;

this.word = word;

}

public MatchInfo(int index, Node node) {

StringBuilder builder = new StringBuilder();

while (node != null) {

if (!node.isRoot())

builder.append(node.value());

node = node.parent();

}

String word = builder.reverse().toString();

this.index = index + 1 - word.length();

this.word = word;

}

public int getIndex() {

return index;

}

public String getWord() {

return word;

}

@Override

public String toString() {

return index + ":" + word;

}

}

第四步,调用Demo:

public static void main(String[] args) {

List list = Arrays.asList("say", "her", "he", "she", "shr", "alone");

WordTable table = WordTable.compile(list);

System.out.println(table);

System.out.println(table.search("1shesaynothingabouthislivinghimalone"));

}

以下是输出结果:

< exists="false" parent="null" fail="null">

>

[1:she, 4:say, 31:alone]

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持脚本之家。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值