java ac自动机_如何使用Java实现AC自动机全文检索实例

如何使用Java实现AC自动机全文检索实例

return new AsciiNode(parent, value);

}

@Override

public Node makeRoot() {

return new AsciiNode();

}

};

else maker = new NodeMaker() {

@Override

public Node make(Node parent, char value) {

return new MapNode(parent, value);

}

@Override

public Node makeRoot() {

return new MapNode();

}

};

return new WordTable(words, maker);

}

private static boolean isAscii(Collection extends CharSequence> words) {

for (CharSequence word : words) {

int len = word.length();

for (int i = 0; i < len; i++) {

int c = (int) word.charAt(i);

if (c < 32 || c > 126)

return false;

}

}

return true;

}

private static Node buildTrie(Collection extends CharSequence> sequences, NodeMaker maker) {

Node root = maker.makeRoot();

for (CharSequence sequence : sequences) {

int len = sequence.length();

Node current = root;

for (int i = 0; i < len; i++) {

char c = sequence.charAt(i);

Node node = current.childOf(c);

if (node == null) {

node = maker.make(current, c);

current.add(node);

}

current = node;

if (i == len - 1)

node.setExists(true);

}

}

return root;

}

private static void setFailNode(final Node root) {

root.setFail(null);

Queue queue = new LinkedList();

queue.add(root);

while (!queue.isEmpty()) {

Node parent = queue.poll();

Node temp;

for (Node child : parent.children()) {

if (parent.isRoot())

child.setFail(root);

else {

temp = parent.fail();

while (temp != null) {

Node node = temp.childOf(child.value());

if (node != null) {

child.setFail(node);

break;

}

temp = temp.fail();

}

if (temp == null)

child.setFail(root);

}

queue.add(child);

}

}

}

public boolean findAnyIn(CharSequence cs) {

int len = cs.length();

Node node = root;

for (int i = 0; i < len; i++) {

Node next = node.childOf(cs.charAt(i));

if (next == null) {

next = node.fail();

if (next == null) {

node = root;

continue;

}

}

if (next.exists())

return true;

}

return false;

}

public List search(CharSequence cs) {

if (cs == null || cs.length() == 0)

return Collections.emptyList();

List result = new ArrayList();

int len = cs.length();

Node node = root;

for (int i = 0; i < len; i++) {

Node next = node.childOf(cs.charAt(i));

if (next == null) {

next = node.fail();

if (next == null) {

node = root;

continue;

}

}

if (next.exists()) {

MatchInfo info = new MatchInfo(i, next);

result.add(info);

node = root;

continue;

}

node = next;

}

return result;

}

@Override

public String toString() {

return root.toString();

}

}

定义一个保存查找结果的实体:

/**

* Created by zhaoyy on 2017/2/7.

*/

public final class MatchInfo {

private final int index;

private final String word;

public MatchInfo(int index, String word) {

this.index = index;

this.word = word;

}

public MatchInfo(int index, Node node) {

StringBuilder builder = new StringBuilder();

while (node != null) {

if (!node.isRoot())

builder.append(node.value());

node = node.parent();

}

String word = builder.reverse().toString();

this.index = index + 1 - word.length();

this.word = word;

}

public int getIndex() {

return index;

}

public String getWord() {

return word;

}

@Override

public String toString() {

return index + ":" + word;

}

}

第四步,调用Demo:

public static void main(String[] args) {

List list = Arrays.asList("say", "her", "he", "she", "shr", "alone");

WordTable table = WordTable.compile(list);

System.out.println(table);

System.out.println(table.search("1shesaynothingabouthislivinghimalone"));

}

以下是输出结果:

< exists="false" parent="null" fail="null">

<l exists="false" parent="a" fail=" ">

</l>

>

[1:she, 4:say, 31:alone]

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值