后缀树建立 java_后缀树源码Java版O(n) Suffix Tree In Java_2

/******************************************************************************/

/*

trace_single_edge :

Traces for a string in a given node's OUTcoming edge. It searches only in the

given edge and not other ones. Search stops when either whole string was

found in the given edge, a part of the string was found but the edge ended

(and the next edge must be searched too - performed by function trace_string)

or one non-matching character was found.

Input : The string to be searched, given in indices of the main string.

Output: (by value) the node where tracing has stopped.

(by reference) the edge position where last match occured, the string

position where last match occured, number of characters found, a flag

for signaling whether search is done, and a flag to signal whether

search stopped at a last character of an edge.

*/

NODE trace_single_edge(

SUFFIX_TREE    tree,

/* Node to start from */

NODE           node,

/* String to trace */

PATH            str,

/* Last matching position in edge */

MyInteger       edge_pos,

/* Last matching position in tree source string */

MyInteger       chars_found,

/* Skip or no_skip*/

SKIP_TYPE       type,

/* 1 if search is done, 0 if not */

MyInteger            search_done)

{

NODE      cont_node;

int   length,str_len;

/* Set default return values */

search_done.intValue = 1;

edge_pos.intValue    = 0;

/* Search for the first character of the string in the outcoming edge of

node */

cont_node = find_son(tree, node, tree.tree_string.charAt(str.begin));

if(cont_node == null)

{

/* Search is done, string not found */

edge_pos.intValue = get_node_label_length(tree,node)-1;

chars_found.intValue = 0;

return node;

}

/* Found first character - prepare for continuing the search */

node    = cont_node;

length  = get_node_label_length(tree,node);

str_len = str.end - str.begin + 1;

/* Compare edge length and string length. */

/* If edge is shorter then the string being searched and skipping is

enabled - skip edge */

if(type == SKIP_TYPE.skip)

{

if(length <= str_len)

{

(chars_found.intValue)   = length;

(edge_pos.intValue)      = length-1;

if(length < str_len)

search_done.intValue  = 0;

}

else

{

(chars_found.intValue)   = str_len;

(edge_pos.intValue)      = str_len-1;

}

counter++;

return node;

}

else

{

/* Find minimum out of edge length and string length, and scan it */

if(str_len < length)

length = str_len;

for(edge_pos.intValue=1, chars_found.intValue=1; edge_pos.intValue

{

counter++;

/* Compare current characters of the string and the edge. If equal -

continue */

if(tree.tree_string.charAt(node.edge_label_start+edge_pos.intValue) != tree.tree_string.charAt(str.begin+edge_pos.intValue))

{

(edge_pos.intValue)--;

return node;

}

}

}

/* The loop has advanced edge_pos.intValue one too much */

(edge_pos.intValue)--;

if((chars_found.intValue) < str_len)

/* Search is not done yet */

search_done.intValue = 0;

return node;

}

/******************************************************************************/

/*

trace_string :

Traces for a string in the tree. This function is used in construction

process only, and not for after-construction search of substrings. It is

tailored to enable skipping (when we know a suffix is in the tree (when

following a suffix link) we can avoid comparing all symbols of the edge by

skipping its length immediately and thus save atomic operations - see

Ukkonen's algorithm, skip trick).

This function, in contradiction to the function trace_single_edge, 'sees' the

whole picture, meaning it searches a string in the whole tree and not just in

a specific edge.

Input : The string, given in indice of the main string.

Output: (by value) the node where tracing has stopped.

(by reference) the edge position where last match occured, the string

position where last match occured, number of characters found, a flag

for signaling whether search is done.

*/

NODE trace_string(

SUFFIX_TREE    tree,

/* Node to start from */

NODE           node,

/* String to trace */

PATH            str,

/* Last matching position in edge */

MyInteger       edge_pos,

/* Last matching position in tree string */

MyInteger       chars_found,

/* skip or not */

SKIP_TYPE       type)

{

/* This variable will be 1 when search is done.

It is a return value from function trace_single_edge */

MyInteger      search_done = new MyInteger(0);

/* This variable will hold the number of matching characters found in the

current edge. It is a return value from function trace_single_edge */

MyInteger edge_chars_found = new MyInteger(0);

chars_found.intValue = 0;

while(search_done.intValue == 0)

{

edge_pos.intValue        = 0;

edge_chars_found.intValue = 0;

// str is changed in trace_single_edge? not changed. safe.

// clone str

PATH path = new PATH();

path.begin = str.begin;

path.end = str.end;

node = trace_single_edge(tree, node, path, edge_pos, edge_chars_found, type, search_done);

str.begin       += edge_chars_found.intValue;

chars_found.intValue    += edge_chars_found.intValue;

}

return node;

}

/******************************************************************************/

/*

follow_suffix_link :

Follows the suffix link of the source node according to Ukkonen's rules.

Input : The tree, and pos. pos is a combination of the source node and the

position in its incoming edge where suffix ends.

Output: The destination node that represents the longest suffix of node's

path. Example: if node represents the path "abcde" then it returns

the node that represents "bcde".

*/

void follow_suffix_link(SUFFIX_TREE tree, POS pos)

{

/* gama is the string between node and its father, in case node doesn't have

a suffix link */

PATH      gama = new PATH();

/* dummy argument for trace_string function */

MyInteger  chars_found = new MyInteger(0);

if(pos.node == tree.root)

{

return;

}

/* If node has no suffix link yet or in the middle of an edge - remember the

edge between the node and its father (gama) and follow its father's suffix

link (it must have one by Ukkonen's lemma). After following, trace down

gama - it must exist in the tree (and thus can use the skip trick - see

trace_string function description) */

if(pos.node.suffix_link == null || is_last_char_in_edge(tree,pos.node,pos.edge_pos.intValue) == 0)

{

/* If the node's father is the root, than no use following it's link (it

is linked to itself). Tracing from the root (like in the naive

algorithm) is required and is done by the calling function SEA uppon

recieving a return value of tree.root from this function */

if(pos.node.father == tree.root)

{

pos.node = tree.root;

return;

}

/* Store gama - the indices of node's incoming edge */

gama.begin      = pos.node.edge_label_start;

gama.end      = pos.node.edge_label_start + pos.edge_pos.intValue;

/* Follow father's suffix link */

pos.node      = pos.node.father.suffix_link;

/* Down-walk gama back to suffix_link's son */

// clone the PATH

PATH tmpGama = new PATH();

tmpGama.begin = gama.begin;

tmpGama.end = gama.end;

pos.node      = trace_string(tree, pos.node, tmpGama, (pos.edge_pos), chars_found, SKIP_TYPE.skip);

}

else

{

/* If a suffix link exists - just follow it */

pos.node      = pos.node.suffix_link;

pos.edge_pos.intValue   = get_node_label_length(tree,pos.node)-1;

}

}

/******************************************************************************/

/*

create_suffix_link :

Creates a suffix link between node and the node 'link' which represents its

largest suffix. The function could be avoided but is needed to monitor the

creation of suffix links when debuging or changing the tree.

Input : The node to link from, the node to link to.

Output: None.

*/

void create_suffix_link(NODE node, NODE link)

{

node.suffix_link = link;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值