建立O（n）的后缀树实现（修正版3）

最新推荐文章于 2021-07-20 22:07:42 发布

yeqm_

最新推荐文章于 2021-07-20 22:07:42 发布

阅读量357

点赞数

分类专栏：后缀树文章标签： poj 后缀树

本文链接：https://blog.csdn.net/yeqm_/article/details/49455021

版权

后缀树专栏收录该内容

2 篇文章 0 订阅

订阅专栏

学习后缀树，朴素的建立容易理解也容易实现，但时间复杂度高，而 Ukkonen算法实现了以O(n)时间复杂度建树。

感谢该博主详尽的学习笔记

Author: If

转载请注明：来自 IF's

http://www.cnblogs.com/snowberg/archive/2011/10/21/2468588.html

朴素算法

1、在str[0]~str[n-1]逐步建立str[0,i]，i∈[0,n-1]的线段树

2、每一步依次找到str[j,i-1],j∈[0,i](到i是因为把空字符也算上)再尾部加上str[i]更新树

Ukkonen算法通过三步加速使第二步限制在常数时间内

下面注释是自己的理解，容纳两个字符串的后缀树（为了最长回文）

<pre name="code" class="cpp">#include<iostream>  
#include<map>
#include <string>   
#include<algorithm>  
#include<fstream>
#include<cmath>  
#include<vector>
#include<queue>
using namespace std;  
#define lch(i) ((i)<<1)  
#define rch(i) ((i)<<1|1)  
#define sqr(i) ((i)*(i))  
#define pii pair<int,int>  
#define mp make_pair  
#define FOR(i,b,e) for(int i=b;i<=e;i++)  
#define FORE(i,b,e) for(int i=b;i>=e;i--)  
#define ms(a)   memset(a,0,sizeof(a))  
const int maxnum =20007;
const int key = 177;
int n,len;

typedef struct node
{
	int					depth;			//根到该点结尾的字符串长度
	int					len;			//该节点字符串长度
	int					pos;			//判断隐式节点的位置
	char*				str;			
	node*				next;			//指向下一个后缀连接
	/*node*				last;*/
	node*				father;
	vector<node*>		son;
	
	typedef  vector<node*>::iterator 
						soniterator;
	node():				str(NULL),
						len(0),
						depth(0),
						pos(-1),
						next(NULL),
						
						father(NULL),
						son(){}
	node(node const& a):str(a.str),
						len(a.len),
						pos(a.pos),
						depth(a.depth),
						next(a.next),
						
						father(a.father),
						son(a.son){}
	~node(){}
}*pnode;

class suffixtree{
public:
	node* ST,*LEND,*root;//需要chushih
	int num;
	char* strs[3];
	suffixtree(){
		root = new node;
		root->depth=root->len=0;
		root->next=root;
		root->pos=-1;
		num=0;
	}

	void destroy(node* p){
		int size = p->son.size();
		FOR(i,0,size-1)
			destroy(p->son[i]);
		delete p;
	}
	void clear(){
		destroy(root);
		root = new node;
		delete strs[0];
		root->depth=root->len=0;
		root->next=root;
		root->pos=-1;
		num=0;
	}
	~suffixtree(){
		destroy(root);
		FOR(i,0,num-1){
			delete strs[i];
		}
	}

	void  bfs(){
		node* ptr;
		queue<node*> que;
		queue<int> deep;
		que.push(root);
		deep.push(0);
		int nowdeep=0,tmpd;
		while(!que.empty()){
			ptr = que.front();
			que.pop();
			tmpd = deep.front();
			deep.pop();
			node::soniterator si = ptr->son.begin();
			for(;si!=ptr->son.end();si++){
				que.push(*si);
				deep.push(tmpd+1);
			}
			if(tmpd>nowdeep){nowdeep++; printf("\n");}
			if(ptr==root)continue;
			FOR(i,0,ptr->len-1){
				printf("%c",ptr->str[i]);
			}
			printf(" ");
			
		}
	}


	node* matchstr(node *tmp,char si){
		if(tmp->son.empty()) return 0;
		int size = tmp->son.size();
		FOR(i,0,size-1){
			if(tmp->son[i]->str[0]==si){
				return tmp->son[i];
			}
		}
		return 0;
	}

	node* FindLastPos(node* p,int len,char *str){
		node* tmpP = p;
		while(tmpP->depth<len){
			tmpP=matchstr(tmpP,str[tmpP->depth]);
		}
		if(tmpP->depth>len){//隐式节点，标记位置
			tmpP->pos=len-tmpP->father->depth;
		}
		return tmpP;
	}


	
	node* createLeaf(node *tmp,int pos,char *s){
		node *explicitnode =new node;
		explicitnode->len=len-pos;
		explicitnode->depth=tmp->depth+explicitnode->len;
		explicitnode->father=tmp;
		explicitnode->str = s;
		explicitnode->next=root;
		tmp->son.push_back(explicitnode);
		return explicitnode;
	}

	node* splitnode(node *tmp,int pos,char *s){//s为s[i,n-1],TMP只有一个父结点而有多个子结点，故将新结点放在前面方便更新;
											   //分裂为s[j-1-k,i-1-k]即s[j-1,i-1],s[j-k,i-1-k+l](之前的后缀)前后两个结点,而之前的后缀连接均指向两结点之前，无须更新
		node *explicitnode = new node;
		explicitnode->str=tmp->str;
		explicitnode->len=tmp->pos;
		explicitnode->depth=tmp->father->depth+explicitnode->len;
		explicitnode->father=tmp->father;
		explicitnode->son.push_back(tmp);
		explicitnode->next=root;
		int si = tmp->father->son.size();
		FOR(i,0,si-1){
			if(tmp->father->son[i]->str[0]==tmp->str[0]){
				tmp->father->son[i]=explicitnode;
				break;
			}
		}
		tmp->len-=explicitnode->len;
		tmp->str+=explicitnode->len;
		tmp->father=explicitnode;
		tmp->pos=-1;
		createLeaf(explicitnode,pos,s);
		return explicitnode;
	}

	void addstring(char *s){
		len = strlen(s);
		char *str = strs[num];
		num++;
		str=new char[len+1];
		strcpy(str,s);
		str[len]='$';
		str[len+1]='\0';
		LEND= root;
		int st_=0;
		len++;
		node *p=root,*tmp,*matchson;
		ST=0;
		FOR(i,0,len-1){
			LEND=root;//截断连接，防止干扰上一轮形成的连接
			FOR(j,st_,i){
				//用tmp找到str[j,i-1]
				tmp = FindLastPos(p,i-j,str+j);
				//
				if(tmp->pos==-1){//显式节点->s[j,i]均在TMP子结点;p=tmp->father
					if(LEND!=root)//建立（s[j-1,i]或上一轮结尾）->s[j,i]
						LEND->next=tmp;
					LEND=tmp;
					matchson = matchstr(tmp,str[i]);//返回以str[i]开头的子结点
					if(matchson!=0){//s[j,i]已存在,存在指向s[j+1,i]的next，否则指向0，之前的s[j-1,i]没有break，一定创建了叶子节点，所以下一轮必定从此开始
						//下一轮从S[J,I]开始，可以直接从p搜索到s[j,i]，为了缩短搜索距离

						p=tmp;//跳转到s[j,i]前一结点
						break;
					}else{//创建叶子节点
						createLeaf(tmp,i,str+i);//s[j,i]成为叶子节点，下一轮p应从s[j+1,i]更新为s[j+1,i+1];tmp=s[j,i-1];
						st_++;
						p=tmp;
						//
					}

				}else{//隐式结点->s[j,i],s[j,i-1]同处一结点,p=s[j,i-1]前
					if(tmp->str[tmp->pos]==str[i]){//tmp指向s[j,i],跳转，p指向s[j,i-1]前内部结点,因s[j,i],s[j,i-1]同处一结点，其后缀连接相同
												   //p由s[j-1,i]前找到s[j,i-1]为了缩短搜索距离

						//p=tmp->father;//跳转到s[j,i]前一结点
						tmp->pos=-1;
						break;
					}
					else{
						tmp=splitnode(tmp,i,str+i);//s[j,i]成为叶子结点，下一轮不再更新,tmp->s[j,i]father,tmp = s[j,i-1];
						if(LEND!=0&&LEND!=root)//建立s[j-1,i]前内部结点->s[j,i]前面内部结点，p指向s[j,i-1],找s[j-1,i-1]前,找s[j,i-1]前;
							LEND->next=tmp;
						LEND=tmp;
						p=tmp->father;
						st_++;
					}
				}
				p=p->next;
			}
		}
	}

};

int main()    
{  
	suffixtree stree;
	
	
	stree.addstring("AACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT");
	stree.addstring("ACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
	stree.addstring("CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC");
	stree.bfs();
	return 0;
}

yeqm_

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
建立O（n）的后缀树实现（修正版3）

学习后缀树，朴素的建立容易理解也容易实现，但时间复杂度高，而 Ukkonen算法实现了以O(n)时间复杂度建树。感谢该博主详尽的学习笔记http://www.cnblogs.com/snowberg/archive/2011/10/21/2468588.html朴素算法1、在str[0]~str[n-1]逐步建立str[0,i]，i∈[0,n-1]的线段树2、每一步依次找到str[
复制链接

扫一扫

专栏目录