一些朋友提醒,这次一次放出实现了Ukkonen 的paper的三个优化的完整SuffixTree的代码。看了之前博客的只实现SuffixLink优化的代码,再看这个应该就很简单了。
下面是SuffixTree的头文件SuffixTree.h
#pragma once
#include <vector>
#include <string>
using namespace std;
class SuffixNode
{
public:
vector<SuffixNode*> m_pSons;
SuffixNode* m_pFarther;
SuffixNode* m_pSuffixLink;
int m_iPathPos;
int m_iEdgeStart;
int m_iEdgeEnd;
};
class SuffixTree
{
public:
int m_iE;//The virtual end of all leaves.
SuffixNode* m_pRoot;//The root of the tree.
string m_czTreeStr;//the string that the tree represent.
};
//Means a sub string of the suffix tree (string[beging],string[end]).
class TreePath
{
public:
int m_iBegin;
int m_iEnd;
};
//Represent the char in a node's incoming edge.
class TreePos
{
public:
TreePos()
{
m_iEdgePos = 0;
m_pNode = NULL;
}
TreePos(int edgePos,SuffixNode* pNode)
{
m_iEdgePos = edgePos;
m_pNode = pNode;
}
int m_iEdgePos;//The ith char of the incoming edge.
SuffixNode* m_pNode;//The node we are going to search.
};
//=====================================Class Declarations==============================
void SingleCharExtesion(SuffixTree* pTree,TreePos*& pPos ,TreePath extendStrPath,int* firstExtensionFlag,bool * rule3Applied,int* iLeafNum);
/*
Add s[0....i+1],s[1...i+1].... to the suffix tree
Input:
SuffixNode* pNode : When we only use trick 1,pNode is the pointer to the longest leaf,s[0........i].
phase : Equals i+1 in the paper.
*/
void SinglePhaseExtend(SuffixTree* pTree,TreePos* & pPos,int phase,int* iExtension,int* ruleApplied,bool* lastRule3Applied,int *iLeafNum);
SuffixNode* CreateTreeNode(SuffixNode* pFarther,int iedgeStart,int iedgeEnd,int pathPos);
/*
FollowSuffixLink :
Follows the suffix link of the source node according to Ukkonen's rules(jump from s[j-1...i] to s[j....i]).
Input : The tree, and node. The node is the last internal node we visited.
Output: The destination node that represents the longest suffix of node's
path. Example: if node represents the path "abcde" then it returns
the node that represents "bcde".
*/
void FollowSuffixLink(SuffixTree* pTree,TreePos*& pPos, TreePath strji);
int GetNodeLabelLength(SuffixTree* pTree, SuffixNode* pNode);
int GetNodeLabelEnd(SuffixTree* pTree,SuffixNode* pNode);
/*
Find the son node which starts with the char,ch.
*/
SuffixNode* Find_Son(SuffixTree* pTree,SuffixNode* pFarNode, char ch);
bool IsTheLastCharInEdge(SuffixTree* pTree, SuffixNode* pNode, int edge_pos);
SuffixNode* ApplyExtensionRule2(SuffixNode* pNode,int edgeLabelBeg,int edgeLabelEnd,int edgePos,int pathPos,bool newLeafFlag);
/*
Trace the sub string(TreePath str) from the node(SuffixNode* pNode).
Input:
int* edgePos : where the last char is found at that edge
int* charsFound : how many chars of str have been found.
bool skipFlag : Use skip trick or not.
*/
SuffixNode* TraceString(SuffixTree* pTree,SuffixNode* pNode,TreePath str,int* edgePos,int* charsFound,bool skipFlag);
/*
Trace the substring(TreePath strPath) in one single edge out of pNode.
*/
SuffixNode* TraceSingleEdge(SuffixTree* pTree,SuffixNode* pNode,TreePath strPath,int* charsFound,int* edgePos,bool* searchDone,bool skipFlag);
SuffixNode* CreateFirstCharacter(SuffixTree* pTree);//Add the first character to the suffix tree.
SuffixTree* CreateSuffixTree(string tStr);
/*
For Debug:
See if the sub string (from root to pPos) equals pTree->string[subPath.m_iBegin,subPath.m_iEnd]
*/
bool TestPosSubStringEqualPath(SuffixTree* pTree,TreePos *pPos, TreePath subPath);
/*
For debugging, We want to see if the suffix link from linkFrom to linkTo is right.
*/
bool TestSuffixLinkMatch(SuffixTree *pTree, SuffixNode* linkFrom,SuffixNode *linkTo);
int FindSubString(SuffixTree* pTree,string subStr);
//=====================================================================================
下面是SuffixTree的实现文件SuffixTree.cpp
#pragma once
#include "SuffixTree.h"
#include <iostream>
using namespace std;
SuffixNode* pNodeNoSuffixLink=NULL;
//=====================================Class Definitions==============================
/*
Trace the substring(TreePath strPath) in one single edge going out of pNode.
Input:
int* edgeCharsFound : how many characters we find matched in the outgoing edge of pNode.
*/
SuffixNode* TraceSingleEdge(SuffixTree* pTree,SuffixNode* pNode,TreePath strPath,int* edgeCharsFound,int* edgePos,bool* searchDone,bool skipFlag)
{
//Find outgoing edge of pNode with our first character.
SuffixNode* nextNode = Find_Son(pTree,pNode,pTree->m_czTreeStr[strPath.m_iBegin]);
*searchDone = true;
if(nextNode == NULL)
{//There is no match in pNode's sons,so we can only return to pNode.
*edgePos = GetNodeLabelLength(pTree,pNode);
*edgeCharsFound = 0;
return pNode;
}
int edgeLen = GetNodeLabelLength(pTree,nextNode);
int strLen = strPath.m_iEnd - strPath.m_iBegin + 1;
if(skipFlag == true)//Use the trick1 : skip
{
if(edgeLen < strLen)
{
*searchDone = false;
*edgeCharsFound = edgeLen;
*edgePos = edgeLen - 1;
}
else if(edgeLen == strLen)
{
*edgeCharsFound = edgeLen;
*edgePos = edgeLen - 1;
}
else
{
*edgeCharsFound = strLen;
*edgePos = strLen - 1;
}
return nextNode;
}
else//No skip,match each char one after another
{
*edgePos = 0;
*edgeCharsFound = 0;
//Find out the min length
if(strLen < edgeLen)
edgeLen = strLen;
for(*edgeCharsFound=1,*edgePos=1;(*edgePos)<edgeLen ;(*edgePos)++,(*edgeCharsFound)++)
{
if( pTree->m_czTreeStr[ nextNode->m_iEdgeStart + *edgePos ] != pTree->m_czTreeStr[strPath.m_iBegin + *edgePos ])
{
(*edgePos)--;
return nextNode;
}
}
}
//When it comes here, (*edgePos) is one more;
(*edgePos)--;
if(*edgeCharsFound < strLen)
{
*searchDone = false;
}
return nextNode;
}
/*
Trace the sub string(TreePath str) from the node(SuffixNode* pNode).
Input:
int* edgePos :For output , where the last char is found at that edge
int* charsFound : How many chars of str have been found.
bool skipFlag : Use skip trick or not.
*/
SuffixNode* TraceString(SuffixTree* pTree,SuffixNode* pNode,TreePath str,int* edgePos,int* charsFound,bool skipFlag)
{
bool searchDone=false;
*charsFound = 0;
*edgePos=0 ;
int edgeCharsFound=0;
while(searchDone==false)
{
edgeCharsFound = 0;
*edgePos=0;
pNode = TraceSingleEdge(pTree,pNode,str,&edgeCharsFound,edgePos,&searchDone,skipFlag);
str.m_iBegin += edgeCharsFound;
*charsFound += edgeCharsFound;
}
//if(*charsFound == 0)
// return NULL;
return pNode;
}
/*
Input:
(1) pNode : the node who is going to add a new son or whose edge is going to be split.
(2) edgeLabelBeg : when newleafFlag==true,it's the edge begin label of the new leaf. when when newleafFlag==false, it's the edge begin label of the new new leaf( the leaf of s[i+1], not s[i]).
(3) like above : just the end
(4 )int edgePos : where split is done to pNode if newLeafFlag==false (the 0th position or 1th position or...)
*/
SuffixNode* ApplyExtensionRule2(SuffixNode* pNode,int edgeLabelBeg,int edgeLabelEnd,int edgePos,int pathPos,bool newLeafFlag)
{
if(newLeafFlag==true)
{
//Add an new leaf
SuffixNode* newLeaf = CreateTreeNode(pNode,edgeLabelBeg,edgeLabelEnd,pathPos);
return newLeaf;
}
else
{
//Add an new internal node and an new leaf
//First create the new internal node.
SuffixNode* nInternalNode = CreateTreeNode(pNode->m_pFarther,pNode->m_iEdgeStart,pNode->m_iEdgeStart + edgePos,pNode->m_iPathPos);
//Remove pNode from its farther's sons
for(vector<SuffixNode*>::iterator pNodeIter=pNode->m_pFarther->m_pSons.begin();
pNodeIter!=pNode->m_pFarther->m_pSons.end();pNodeIter++)
{
if( pNode == *pNodeIter )
{
pNode->m_pFarther->m_pSons.erase(pNodeIter);
break;
}
}
//Adjust pNode's information.
pNode->m_iEdgeStart += (edgePos + 1);
pNode->m_pFarther = nInternalNode;
nInternalNode->m_pSons.push_back(pNode);
//Create the new leaf for s[i+1]
SuffixNode* nLeafNode = CreateTreeNode(nInternalNode,edgeLabelBeg,edgeLabelEnd,pathPos);
return nInternalNode;
}
}
bool IsTheLastCharInEdge(SuffixTree* pTree, SuffixNode* pNode, int edge_pos)
{
if( edge_pos == GetNodeLabelLength(pTree,pNode) - 1 )
return true;
return false;
}
int GetNodeLabelEnd(SuffixTree* pTree,SuffixNode* pNode)
{
if(pNode->m_pSons.size() == NULL)
{
return pTree->m_iE;
}
return pNode->m_iEdgeEnd;
}
int GetNodeLabelLength(SuffixTree* pTree, SuffixNode* pNode)
{
int length = GetNodeLabelEnd(pTree,pNode) - pNode->m_iEdgeStart + 1;
return length;
}
SuffixNode* CreateTreeNode(SuffixNode* pFarther,int iedgeStart,int iedgeEnd,int pathPos)
{
SuffixNode* pNode=new SuffixNode();
pNode->m_iEdgeStart = iedgeStart;
pNode->m_iEdgeEnd = iedgeEnd;
pNode->m_pFarther = pFarther;
pNode->m_iPathPos = pathPos;
pNode->m_pSuffixLink = NULL;
if(pFarther!=NULL)
pFarther->m_pSons.push_back(pNode);
return pNode;
}
//Find the son node which starts with the ch
SuffixNode* Find_Son(SuffixTree* pTree,SuffixNode* pFarNode, char ch)
{
for(vector<SuffixNode*>::iterator nodeIter=pFarNode->m_pSons.begin();
nodeIter!=pFarNode->m_pSons.end();nodeIter++)
{
if(pTree->m_czTreeStr[(*nodeIter) -> m_iEdgeStart] == ch )
{
return *nodeIter;
}
}
return NULL;
}
/*
FollowSuffixLink :
Follows the suffix link of the source node according to Ukkonen's rules(jump from s[j-1...i] to s[j....i]).
Input : The tree, and node. The node is the last internal node we visited.
Output: The destination node that represents the longest suffix of node's
path. Example: if node represents the path "abcde" then it returns
the node that represents "bcde".
*/
void FollowSuffixLink(SuffixTree* pTree,TreePos* & pPos,TreePath strji)
{
/*gama : the string(r in Gusfield's paper) between node and its father.
If the node doesn't have suffix link , we need to go up to its farther*/
TreePath gama;
if(pPos->m_pNode == pTree->m_pRoot)
{
pPos->m_iEdgePos = 0;
return;
}
// No suffix link,walk up at most one step(if it is not the root).
if( pPos->m_pNode->m_pSuffixLink == NULL )
{
if(pPos->m_pNode->m_pFarther == pTree->m_pRoot)
{//its farther is the root
pPos->m_pNode = pTree->m_pRoot;
pPos->m_iEdgePos = 0;
return;
}
else
{
// Find the gamma (the substring between pPos's parent's and pPos's)
gama.m_iBegin = pPos->m_pNode->m_iEdgeStart;
gama.m_iEnd = pPos->m_pNode->m_iEdgeStart + pPos->m_iEdgePos;// the end of s[j..i]
//Follow farther's suffix link
pPos->m_pNode = pPos->m_pNode->m_pFarther->m_pSuffixLink;
//Down-walk gamma (until we found s[i],the character we add last extension)
int charsFound=0;
pPos->m_pNode = TraceString(pTree,pPos->m_pNode,gama,&pPos->m_iEdgePos,&charsFound,true);
}
}
else
{
//A suffix link exists - just follow it.
pPos->m_pNode = pPos->m_pNode->m_pSuffixLink;
pPos->m_iEdgePos = GetNodeLabelLength(pTree,pPos->m_pNode) - 1; //The last char of pPos's suffix link represents s[i] (the character we add last extension).
}
return;
}
/*
For Debug:
See if the sub string (from root to pPos) equals pTree->string[subPath.m_iBegin,subPath.m_iEnd]
*/
bool TestPosSubStringEqualPath(SuffixTree* pTree,TreePos *pPos, TreePath subPath)
{
if(pTree->m_pRoot == pPos->m_pNode )
{
return true;
}
int strRevIndex = subPath.m_iEnd;
SuffixNode* tmpNode = pPos->m_pNode;
int edgeRevIndex = tmpNode->m_iEdgeStart + pPos->m_iEdgePos;
while(tmpNode != pTree->m_pRoot)
{
while( edgeRevIndex >= tmpNode->m_iEdgeStart && strRevIndex >= subPath.m_iBegin)
{
if( pTree->m_czTreeStr[edgeRevIndex] != pTree->m_czTreeStr[strRevIndex] )
{
return false;
}
edgeRevIndex--;
strRevIndex--;
}
tmpNode = tmpNode->m_pFarther;
edgeRevIndex = tmpNode->m_iEdgeEnd;
}
if(strRevIndex != subPath.m_iBegin-1)
return false;
return true;
}
/*
Input:
(1) SuffixTree* pTree : The suffix tree
(2) TreePos* pPos : The last internal node we visited , then we are going to jump to its suffix link in this extension.
(3) TreePath extendStrPath : The suffix (s[j...i+1]) we are goint to add to the tree.
*/
void SingleCharExtesion(SuffixTree* pTree,TreePos*& pPos ,TreePath extendStrPath,int* ruleApplied,bool* lastRule3Applied,int *iLeafNum)
{
TreePath sji;
sji.m_iBegin = extendStrPath.m_iBegin;
sji.m_iEnd = extendStrPath.m_iEnd - 1;
int pathPos = extendStrPath.m_iBegin;
if(*lastRule3Applied == false)
{
//Ready to jump from suffix link at or above s[j-1...i] that either has a suffix link to s[j...i] or the root .
FollowSuffixLink(pTree,pPos,sji);
}
else
{
*lastRule3Applied = false;
}
//For Debug//
if(sji.m_iEnd >= sji.m_iBegin)
{
if(TestPosSubStringEqualPath(pTree,pPos, sji) == false && pPos->m_pNode != pTree->m_pRoot)
{
cout<<"FollowSuffixLink doesn't go to the right s[j..i]:"<<extendStrPath.m_iBegin<<":"<<extendStrPath.m_iEnd-1<<endl;
}
}
///
int chars_found=0;
if(pPos->m_pNode == pTree->m_pRoot && sji.m_iEnd >= sji.m_iBegin)
{
pPos->m_pNode = TraceString(pTree,pTree->m_pRoot,sji,&pPos->m_iEdgePos,&chars_found,false);
if(pPos->m_pNode == NULL)
{
pPos->m_iEdgePos = 0;
pPos->m_pNode =pTree->m_pRoot;
//For Debug//
if(sji.m_iBegin != sji.m_iEnd)
{
cout<<"There is s[j-1..i](not empty) doesn't exist!"<<endl;
return;
}
//For Debug//
}
//For Debug//
if(sji.m_iEnd != sji.m_iBegin && chars_found != sji.m_iEnd - sji.m_iBegin + 1)
{
cout<<"s[j...i] doesn't exit from root:["<<sji.m_iBegin<<","<<sji.m_iEnd<<"]"<<endl;
return;
}
}
//Now we are going to found out which rule to use for extension,rule1?rule2?rule3?
//First test rule3 and rule1.
{
/*
We only need to extend the last character(s[i+1]) since
we use suffix link to jump from s[j-1..i] to s[j..i],
and extendStrPath.m_iEnd is s[i+1].
*/
chars_found = 0;
/*
If the last character(s[i]) is the last of its edge,
try to find s[i+1] in the next edge.
*/
if(IsTheLastCharInEdge(pTree,pPos->m_pNode,pPos->m_iEdgePos))
{
SuffixNode* pTmp = Find_Son(pTree,pPos->m_pNode,pTree->m_czTreeStr[extendStrPath.m_iEnd]);
if(pTmp != 0)
{ //s[i+1] exits already.
chars_found = 1;
*ruleApplied = 3; //We found s[i+1] which is already in the tree.
pPos->m_iEdgePos = 0;
pPos->m_pNode = pTmp;
if(pNodeNoSuffixLink != NULL)
{
pNodeNoSuffixLink->m_pSuffixLink = pPos->m_pNode->m_pFarther;//Let s[j-1..i] jump to s[j...i]
///For Debug//
if(!TestSuffixLinkMatch(pTree,pNodeNoSuffixLink ,pNodeNoSuffixLink->m_pSuffixLink))
{
cout<<"Suffix Tree Build Error"<<endl;
}
//
pNodeNoSuffixLink = NULL;
}
}
}
//Else see if can find extendStrPath.m_iEnd in the current edge
else
{
if( pTree->m_czTreeStr[ pPos->m_pNode->m_iEdgeStart + pPos->m_iEdgePos + 1]
== pTree->m_czTreeStr[extendStrPath.m_iEnd]
)//Notice that " + 1 " means the next char of s[j...i] : yes, s[i+1]
{//s[i+1] exits already.
chars_found = 1;
if(pPos->m_pNode->m_pSons.size() == 0)//We found s[i+1] in the leaf
{
if( pPos->m_iEdgePos + 1 == GetNodeLabelLength(pTree,pPos->m_pNode) - 1) //Rule1 applied, we just add s[i+1] to the leaf.
{
*ruleApplied = 1;
}
else
{
*ruleApplied = 3;
pPos->m_iEdgePos++;//Go to the s[i+1]
}
}
else//We found s[i+1] in the internal node
{
*ruleApplied = 3;
pPos->m_iEdgePos++;//Go to s[i+1]
if(pNodeNoSuffixLink != NULL)
{
pNodeNoSuffixLink->m_pSuffixLink = pPos->m_pNode;
///For Debug//
if(! TestSuffixLinkMatch(pTree,pNodeNoSuffixLink ,pNodeNoSuffixLink->m_pSuffixLink))
{
cout<<"Suffix Tree Build Error"<<endl;
}
//
pNodeNoSuffixLink = NULL;
}
}
}
}
}
if(chars_found == 1)
{
return;
}
/*Since rule3 and rule1 doesn't fit ( that s[j...i+1] is not in the tree),
we are going to see rule2.
*/
/* If last char s[j...i] found is the last char of an edge - create an new leaf
,apply rule2(add a new leaf) or rule1 */
if(IsTheLastCharInEdge(pTree,pPos->m_pNode,pPos->m_iEdgePos) || pPos->m_pNode==pTree->m_pRoot)
{
if(pPos->m_pNode->m_pSons.size() != 0)
{
//Internal node or root,apply rule2 that add a new leaf
ApplyExtensionRule2(pPos->m_pNode, extendStrPath.m_iEnd, extendStrPath.m_iEnd, 0, pathPos, true);
//Suffix Link
if(pNodeNoSuffixLink != NULL)
{
pNodeNoSuffixLink->m_pSuffixLink = pPos->m_pNode;
///For Debug//
if(! TestSuffixLinkMatch(pTree,pNodeNoSuffixLink ,pNodeNoSuffixLink->m_pSuffixLink))
{
cout<<"Suffix Tree Build Error"<<endl;
}
//
pNodeNoSuffixLink = NULL;
}
*ruleApplied = 2;
(*iLeafNum)++;
}
//else in a leaf, We do nothing (implicitly add a char to the end of a leaf).
}
//Else apply rule2 that adds an new intern node
else
{
SuffixNode* nInternalNode = ApplyExtensionRule2(pPos->m_pNode,extendStrPath.m_iEnd,extendStrPath.m_iEnd,pPos->m_iEdgePos,pathPos,false);
if(pNodeNoSuffixLink != NULL)
{
pNodeNoSuffixLink->m_pSuffixLink = nInternalNode;
///For Debug//
if(! TestSuffixLinkMatch(pTree,pNodeNoSuffixLink ,pNodeNoSuffixLink->m_pSuffixLink))
{
cout<<"Suffix Tree Build Error"<<endl;
}
//
pNodeNoSuffixLink = NULL;
}
//See the new internal node's suffix link.
if( GetNodeLabelLength(pTree,nInternalNode)==1 && nInternalNode->m_pFarther == pTree->m_pRoot)
{
nInternalNode->m_pSuffixLink = pTree->m_pRoot;
pNodeNoSuffixLink = NULL;
}
else
{
pNodeNoSuffixLink = nInternalNode;
}
//Adjust the node for the next extension
pPos->m_pNode = nInternalNode;
(*iLeafNum)++;
*ruleApplied = 2;
}
return ;
}
/*
Add s[0....i+1],s[1...i+1].... to the suffix tree
Input:
SuffixNode* pNode: When we only use trick 1,pNode is the pointer to the longest leaf,s[0........i].
*/
void SinglePhaseExtend(SuffixTree* pTree,TreePos* & pPos,int phase,int* iExtension,int* ruleApplied,bool *lastRule3Applied,int *iLeafNum)
{
//pTree->m_iE = phase-1;
//int ruleApplied=-1;
while(*iExtension <= phase )
{
TreePath extendPath;
extendPath.m_iBegin = *iExtension;
extendPath.m_iEnd = phase;
SingleCharExtesion(pTree,pPos,extendPath,ruleApplied,lastRule3Applied,iLeafNum);
if(*ruleApplied == 3)//We already found s[j...i+1] which means s[j+1...i+1] and the rest are already in the tree too.
{
*lastRule3Applied = true;
break;
}
(*iExtension)++;
}
return;
}
SuffixNode* CreateFirstCharacter(SuffixTree* pTree)
{
pTree->m_iE = 0;
SuffixNode* firstLeaf = CreateTreeNode(pTree->m_pRoot,0,0,0);
return firstLeaf;
}
SuffixTree* CreateSuffixTree(string tStr)
{
SuffixTree* psTree=new SuffixTree();
psTree->m_czTreeStr = tStr+"$";
psTree->m_pRoot = CreateTreeNode(NULL,0,0,0);
//Add the first char into it.
SuffixNode* firstLeaf = CreateFirstCharacter(psTree);
TreePos* lastLeafPos = new TreePos(0,firstLeaf);
int ruleApplied = 2;
int iExtension = 1;
int iLeafNum = 1;
bool lastRule3Applied = false;
for(int phase = 1 ; phase<psTree->m_czTreeStr.length() ; phase++)
{
psTree->m_iE = phase;
iExtension = iLeafNum;
//lastLeafPos->m_iEdgePos = lastLeafPos->m_pNode->m_iEdgeEnd - lastLeafPos->m_pNode->m_iEdgeStart; //start from s[j..i]
SinglePhaseExtend(psTree,lastLeafPos,phase,&iExtension,&ruleApplied,&lastRule3Applied,&iLeafNum);
}
return psTree;
}
int FindSubString(SuffixTree* pTree,string subStr)
{
SuffixNode* node = Find_Son(pTree,pTree->m_pRoot,subStr[0]);
if(node == NULL)
{
return -1;
}
int startIndex = node->m_iEdgeStart;
int strIndex=0;
int edgeIndex;
while(node != NULL)
{
edgeIndex = node->m_iEdgeStart;
int edgeLabelEnd = GetNodeLabelEnd(pTree,node);
while(strIndex < subStr.length() && edgeIndex <= edgeLabelEnd && pTree->m_czTreeStr[edgeIndex] == subStr[strIndex])
{
strIndex++;
edgeIndex++;
}
if(strIndex == subStr.length())
{
//we found it
return node->m_iPathPos;
}
else if(edgeIndex > node->m_iEdgeEnd)
{
node = Find_Son(pTree,node,subStr[strIndex]);
}
else
{
return -1;
}
}
return -1;
}
/*
For debugging, We want to see if the suffix link from linkFrom to linkTo is right.
*/
bool TestSuffixLinkMatch(SuffixTree *pTree, SuffixNode* linkFrom,SuffixNode *linkTo)
{
char ch1,ch2;
while(linkFrom->m_pFarther->m_pFarther!=pTree->m_pRoot && linkFrom->m_pFarther != pTree->m_pRoot)
{
linkFrom = linkFrom->m_pFarther;
}
if(linkFrom->m_pFarther == pTree->m_pRoot)
{
if(linkFrom->m_iEdgeEnd == linkFrom->m_iEdgeStart)
{
if(linkTo != pTree->m_pRoot)
{
return false;
}
else
{
return true;
}
}
else
{
ch1 = pTree->m_czTreeStr[linkFrom->m_iEdgeStart + 1];
}
}
else
{
if(linkFrom->m_pFarther->m_iEdgeStart == linkFrom->m_pFarther->m_iEdgeEnd)
{
ch1 = pTree->m_czTreeStr[linkFrom->m_iEdgeStart];
}
else
{
ch1 = pTree->m_czTreeStr[linkFrom->m_pFarther->m_iEdgeStart + 1];
}
}
while(linkTo->m_pFarther != pTree->m_pRoot)
{
linkTo = linkTo->m_pFarther;
}
ch2 = pTree->m_czTreeStr[linkTo->m_iEdgeStart];
if(ch1 == ch2)
return true;
else
return false;
}
最后是调用的主函数,一个简单的判断子串是否在字符串中,如果子串subStr存在于字符创str中,输出它的起点,否则输出不存在。
#include "SuffixTree.h"
#include <string>
#include <iostream>
using namespace std;
int main()
{
string str="avnaoihvnjovsdvaeveavsfvwevfwa vafjoajv aoja aojfoaj afowajop afoajv afjoajo csvweavefvs fjwojvwoajv haovpjvowj ajovwjavo aojvowajv ajvoajv vnaojv vfdvdfvavaewvavaisadnvioenvoehnvPIavnaoihvnjovsdvaeveavsfvwevfwa vafjoajv aoja aojfoaj afowajop afoajv afjoajo csvweavefvs fjwojvwoajv haovpjvowj ajovwjavo aojvowajv ajvoajv vnaojv vfdvdfvavaewvavaisadnvioenvoehnvPI";
string subStr=" vafjoajv aoja aojfoaj afowajop afoajv afjoajo csvweavefvs fjwojvwoajv haov";
SuffixTree* pTree = CreateSuffixTree(str);
int existFlag = FindSubString(pTree,subStr);
if(existFlag>=0)
cout<<"Sub string starts from "<<existFlag<<endl;
else
cout<<"sub string doesn't exist"<<endl;
return 0;
}