给定一个目标串串T和若干个模式串P,设计一个算法去匹配每一个模式串。
思路:
多模式串匹配问题(设m为目标串的长度,n为模式串的平均长度)。可以用后缀trie树,时间复杂度为O(m^2 + kn)。利用AC自动机的时间复杂度为O(m + kn + z)(其中z为T中出现的模式串个数)。还可以用后缀树,后缀树的方法比较复杂,这里不做介绍。
下面是后缀trie树的代码
数组形式:
#include <iostream>
#include <string>
#include <vector>
using namespace std;
class Trie
{
public:
static const int CLD = 26;
int size;
vector<int> trie;
Trie(const string& s)
{
int len = s.size();
for (int i = 0; i < len * len * CLD; ++i)
trie.push_back(-1);
size = 0;
for (int i = 0; i < s.size(); ++i)
{
string sub(s, i, s.size() - 1);
Insert(sub);
}
}
void Insert(const string& s)
{
if (s.size() == 0)
return;
int index = 0;
for (int i = 0; i < s.size(); ++i)
{
int j = s[i] - 'a';
if (trie[index * CLD + j] == -1)
trie[index * CLD + j] = ++size;
index = trie[index * CLD + j];
}
}
bool Search(const string& s)
{
if (s.size() == 0)
return false;
int index = 0;
for (int i = 0; i < s.size(); ++i)
{
int j = s[i] - 'a';
if (trie[index * CLD + j] == -1)
return false;
index = trie[index * CLD + j];
}
return true;
}
};
void main()
{
string s("mississipi");
cout << s.size() << endl;
Trie trie(s);
vector<string> svec;
svec.push_back("is");
svec.push_back("sip");
svec.push_back("hi");
svec.push_back("sis");
svec.push_back("mississippa");
for (int i = 0; i < 5; ++i)
cout << trie.Search(svec[i]) << endl;
}
树的形式:
#include <iostream>
#include <string>
#include <vector>
#include <assert.h>
using namespace std;
const int CLD = 26;
struct TNode
{
vector<TNode*> pcld;
TNode()
{
for (int i = 0; i < CLD; ++i)
pcld.push_back(NULL);
}
};
void Insert(TNode*& root, const string& s)
{
assert(root != NULL && s.size() > 0);
TNode* temp = root;
for (int i = 0; i < s.size(); ++i)
{
int j = s[i] - 'a';
if (temp->pcld[j] == NULL)
{
TNode* tn = new TNode();
temp->pcld[j] = tn;
}
temp = temp->pcld[j];
}
}
bool Search(TNode* root, const string& s)
{
assert(root != NULL && s.size() > 0);
TNode* temp = root;
for (int i = 0; i < s.size(); ++i)
{
int j = s[i] - 'a';
if (temp->pcld[j] == NULL)
return false;
temp = temp->pc ld[j];
}
return true;
}
void main()
{
string s("mississipi");
TNode* root = new TNode();
for (int i = 0; i < s.size(); ++i)
{
string sub(s, i);
Insert(root, sub);
}
vector<string> svec;
svec.push_back("is");
svec.push_back("sip");
svec.push_back("hi");
svec.push_back("sis");
svec.push_back("mississippa");
for (int i = 0; i < 5; ++i)
cout << Search(root, svec[i]) << endl;
}
以下是AC自动机代码:
#include <iostream>
#include <string>
#include <vector>
#include <queue>
#include <assert.h>
using namespace std;
const int CLD = 26;
struct TNode
{
vector<TNode*> pcld;
TNode* fail;
bool tag;
TNode()
{
for (int i = 0; i < CLD; ++i)
pcld.push_back(NULL);
tag = false;
}
};
void Insert(TNode*& root, const string& s)
{
assert(root != NULL && s.size() > 0);
TNode* temp = root;
for (int i = 0; i < s.size(); ++i)
{
int j = s[i] - 'a';
if (temp->pcld[j] == NULL)
{
TNode* tn = new TNode();
temp->pcld[j] = tn;
}
temp = temp->pcld[j];
}
temp->tag = true;
}
void Build(TNode*& root)
{
assert(root != NULL);
queue<TNode*> que;
que.push(root);
root->fail = NULL;
while (!que.empty())
{
TNode* cur = que.front();
que.pop();
for (int i = 0; i < CLD; ++i)
{
if (cur->pcld[i] == NULL)
continue;
TNode* temp = cur->fail;
while (temp != NULL && temp->pcld[i] == NULL)
temp = temp->fail;
if (temp == NULL)
cur->pcld[i]->fail = root;
else
cur->pcld[i]->fail = temp->pcld[i];
que.push(cur->pcld[i]);
}
}
}
int Search(TNode* root, const string& s)
{
assert(root != NULL && s.size() > 0);
TNode* temp = root;
int res = 0;
for (int i = 0; i < s.size(); ++i)
{
int j = s[i] - 'a';
while (temp != root && temp->pcld[j] == NULL)
temp = temp->fail;
temp = temp->pcld[j];
if (temp == NULL)
temp = root;
TNode* p = temp;
while (p != root)
{
if (p->tag)
++res;
p = p->fail;
}
}
return res;
}
void main()
{
string s("missisip");
TNode* root = new TNode();
vector<string> svec;
svec.push_back("is");
svec.push_back("sip");
svec.push_back("ssis");
svec.push_back("sis");
svec.push_back("missisip");
svec.push_back("ip");
for (int i = 0; i < svec.size(); ++i)
Insert(root, svec[i]);
Build(root);
cout << Search(root, s) << endl;
}