学习后缀树,朴素的建立容易理解也容易实现,但时间复杂度高,而 Ukkonen算法实现了以O(n)时间复杂度建树。
感谢该博主详尽的学习笔记
Author: If
转载请注明:来自 IF's
http://www.cnblogs.com/snowberg/archive/2011/10/21/2468588.html
朴素算法
1、在str[0]~str[n-1]逐步建立str[0,i],i∈[0,n-1]的线段树
2、每一步依次找到str[j,i-1],j∈[0,i](到i是因为把空字符也算上)再尾部加上str[i]更新树
Ukkonen算法通过三步加速 使第二步限制在常数时间内
下面注释是自己的理解,容纳两个字符串的后缀树(为了最长回文)
<pre name="code" class="cpp">#include<iostream>
#include<map>
#include <string>
#include<algorithm>
#include<fstream>
#include<cmath>
#include<vector>
#include<queue>
using namespace std;
#define lch(i) ((i)<<1)
#define rch(i) ((i)<<1|1)
#define sqr(i) ((i)*(i))
#define pii pair<int,int>
#define mp make_pair
#define FOR(i,b,e) for(int i=b;i<=e;i++)
#define FORE(i,b,e) for(int i=b;i>=e;i--)
#define ms(a) memset(a,0,sizeof(a))
const int maxnum =20007;
const int key = 177;
int n,len;
typedef struct node
{
int depth; //根到该点结尾的字符串长度
int len; //该节点字符串长度
int pos; //判断隐式节点的位置
char* str;
node* next; //指向下一个后缀连接
/*node* last;*/
node* father;
vector<node*> son;
typedef vector<node*>::iterator
soniterator;
node(): str(NULL),
len(0),
depth(0),
pos(-1),
next(NULL),
father(NULL),
son(){}
node(node const& a):str(a.str),
len(a.len),
pos(a.pos),
depth(a.depth),
next(a.next),
father(a.father),
son(a.son){}
~node(){}
}*pnode;
class suffixtree{
public:
node* ST,*LEND,*root;//需要chushih
int num;
char* strs[3];
suffixtree(){
root = new node;
root->depth=root->len=0;
root->next=root;
root->pos=-1;
num=0;
}
void destroy(node* p){
int size = p->son.size();
FOR(i,0,size-1)
destroy(p->son[i]);
delete p;
}
void clear(){
destroy(root);
root = new node;
delete strs[0];
root->depth=root->len=0;
root->next=root;
root->pos=-1;
num=0;
}
~suffixtree(){
destroy(root);
FOR(i,0,num-1){
delete strs[i];
}
}
void bfs(){
node* ptr;
queue<node*> que;
queue<int> deep;
que.push(root);
deep.push(0);
int nowdeep=0,tmpd;
while(!que.empty()){
ptr = que.front();
que.pop();
tmpd = deep.front();
deep.pop();
node::soniterator si = ptr->son.begin();
for(;si!=ptr->son.end();si++){
que.push(*si);
deep.push(tmpd+1);
}
if(tmpd>nowdeep){nowdeep++; printf("\n");}
if(ptr==root)continue;
FOR(i,0,ptr->len-1){
printf("%c",ptr->str[i]);
}
printf(" ");
}
}
node* matchstr(node *tmp,char si){
if(tmp->son.empty()) return 0;
int size = tmp->son.size();
FOR(i,0,size-1){
if(tmp->son[i]->str[0]==si){
return tmp->son[i];
}
}
return 0;
}
node* FindLastPos(node* p,int len,char *str){
node* tmpP = p;
while(tmpP->depth<len){
tmpP=matchstr(tmpP,str[tmpP->depth]);
}
if(tmpP->depth>len){//隐式节点,标记位置
tmpP->pos=len-tmpP->father->depth;
}
return tmpP;
}
node* createLeaf(node *tmp,int pos,char *s){
node *explicitnode =new node;
explicitnode->len=len-pos;
explicitnode->depth=tmp->depth+explicitnode->len;
explicitnode->father=tmp;
explicitnode->str = s;
explicitnode->next=root;
tmp->son.push_back(explicitnode);
return explicitnode;
}
node* splitnode(node *tmp,int pos,char *s){//s为s[i,n-1],TMP只有一个父结点而有多个子结点,故将新结点放在前面方便更新;
//分裂为s[j-1-k,i-1-k]即s[j-1,i-1],s[j-k,i-1-k+l](之前的后缀)前后两个结点,而之前的后缀连接均指向两结点之前,无须更新
node *explicitnode = new node;
explicitnode->str=tmp->str;
explicitnode->len=tmp->pos;
explicitnode->depth=tmp->father->depth+explicitnode->len;
explicitnode->father=tmp->father;
explicitnode->son.push_back(tmp);
explicitnode->next=root;
int si = tmp->father->son.size();
FOR(i,0,si-1){
if(tmp->father->son[i]->str[0]==tmp->str[0]){
tmp->father->son[i]=explicitnode;
break;
}
}
tmp->len-=explicitnode->len;
tmp->str+=explicitnode->len;
tmp->father=explicitnode;
tmp->pos=-1;
createLeaf(explicitnode,pos,s);
return explicitnode;
}
void addstring(char *s){
len = strlen(s);
char *str = strs[num];
num++;
str=new char[len+1];
strcpy(str,s);
str[len]='$';
str[len+1]='\0';
LEND= root;
int st_=0;
len++;
node *p=root,*tmp,*matchson;
ST=0;
FOR(i,0,len-1){
LEND=root;//截断连接,防止干扰上一轮形成的连接
FOR(j,st_,i){
//用tmp找到str[j,i-1]
tmp = FindLastPos(p,i-j,str+j);
//
if(tmp->pos==-1){//显式节点->s[j,i]均在TMP子结点;p=tmp->father
if(LEND!=root)//建立(s[j-1,i]或上一轮结尾)->s[j,i]
LEND->next=tmp;
LEND=tmp;
matchson = matchstr(tmp,str[i]);//返回以str[i]开头的子结点
if(matchson!=0){//s[j,i]已存在,存在指向s[j+1,i]的next,否则指向0,之前的s[j-1,i]没有break,一定创建了叶子节点,所以下一轮必定从此开始
//下一轮从S[J,I]开始,可以直接从p搜索到s[j,i],为了缩短搜索距离
p=tmp;//跳转到s[j,i]前一结点
break;
}else{//创建叶子节点
createLeaf(tmp,i,str+i);//s[j,i]成为叶子节点,下一轮p应从s[j+1,i]更新为s[j+1,i+1];tmp=s[j,i-1];
st_++;
p=tmp;
//
}
}else{//隐式结点->s[j,i],s[j,i-1]同处一结点,p=s[j,i-1]前
if(tmp->str[tmp->pos]==str[i]){//tmp指向s[j,i],跳转,p指向s[j,i-1]前内部结点,因s[j,i],s[j,i-1]同处一结点,其后缀连接相同
//p由s[j-1,i]前找到s[j,i-1]为了缩短搜索距离
//p=tmp->father;//跳转到s[j,i]前一结点
tmp->pos=-1;
break;
}
else{
tmp=splitnode(tmp,i,str+i);//s[j,i]成为叶子结点,下一轮不再更新,tmp->s[j,i]father,tmp = s[j,i-1];
if(LEND!=0&&LEND!=root)//建立s[j-1,i]前内部结点->s[j,i]前面内部结点,p指向s[j,i-1],找s[j-1,i-1]前,找s[j,i-1]前;
LEND->next=tmp;
LEND=tmp;
p=tmp->father;
st_++;
}
}
p=p->next;
}
}
}
};
int main()
{
suffixtree stree;
stree.addstring("AACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT");
stree.addstring("ACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
stree.addstring("CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
stree.bfs();
return 0;
}