自动机实现正则表达式匹配,https://leetcode.com/problems/regular-expression-matching/
这题有更简单的做法,自动机则更加通用
自动机配合hash实现NFA到DFA的转化,Compiler课里面看到很多自动机自动生成的工具,所以就写了一个简单的。
#include <bits/stdc++.h>
using namespace std;
//A program resolve RegExp contains only letters & *(no consecutive *) with len <= 30
const static int bign = 33;
const static int hashNum = 100003;
const static int charNum = 26;
const static int maxStateNum = 1033;
int mark[bign];
char valchar[bign];
int n = 0;
int stateNum = 1;
int firstend;
int stateAuto[maxStateNum][charNum];
int mq[maxStateNum];
int mfront, mtail;
vector<int> hashList[hashNum];
vector<vector<int>> stateToIndex;
vector<pair<int, int>> nxtState[bign];
bool isend[maxStateNum];
int vst[26][bign];
inline int hashCode(vector<int> &vec)
{
int ret = 0;
for (int v : vec)
ret = (ret * 31 + v) % hashNum;
return ret;
}
inline bool equalVec(vector<int>& vec1, vector<int>& vec2)
{
if (vec1.size() != vec2.size())
return false;
for (int i = 0; i < vec1.size(); i++)
if (vec1[i] != vec2[i])
return false;
return true;
}
void AutoCons(const char* RegExp)
{
stateNum = 1;
int slen = strlen(RegExp);
for (int i = 0; i < slen; i++)
{
if (RegExp[i] == '*')
{
mark[n - 1] = 1;
}
else
{
valchar[n++] = RegExp[i];
}
}
firstend = n;
for (int i = n - 1; i >= 0; i--)
{
if (mark[i] == 0)
{
//firstend = i + 1;
break;
}
else
{
firstend--;
}
}
for (int i = n - 1; i >= 0; i--)
{
if (valchar[i] == '.')
{
for (int j = 0; j < 26; j++)
{
if (1 == mark[i])
{
nxtState[i].push_back(make_pair(j, i));
vst[j][i] = i + 1;
}
nxtState[i].push_back(make_pair(j, i + 1));
vst[j][i + 1] = i + 1;
}
}
else
{
if (1 == mark[i])
{
vst[valchar[i] - 'a'][i] = i + 1;
nxtState[i].push_back(make_pair(valchar[i] - 'a', i));
}
vst[valchar[i] - 'a'][i + 1] = i + 1;
nxtState[i].push_back(make_pair(valchar[i] - 'a', i + 1));
}
if (1 == mark[i])
{
for (auto it : nxtState[i + 1])
{
if(vst[it.first][it.second] != i + 1)
nxtState[i].push_back(it);
}
}
//sort(nxtState[i].begin(), nxtState[i].end());
//unique(nxtState[i].begin(), nxtState[i].end());
}
vector<int> state1(1, 0);
if (firstend == 0)
isend[1] = true;
for (int i = 0; i < n; i++)
{
if (1 == mark[i])
{
state1.push_back(i + 1);
if(firstend <= i + 1)
isend[1] = true;
}
else
break;
}
stateToIndex.push_back(state1);
hashList[hashCode(state1)].push_back(1);
mq[mtail++] = stateNum;
memset(vst, 0, sizeof(vst));
while (mfront < mtail)
{
int u = mq[mfront++];
//u--;
vector<int> tvec[26];
for(int i = 0; i < 26; i++)
tvec[i].clear();
for (int i = 0; i < stateToIndex[u - 1].size(); i++)
{
for (pair<int, int> tautomata : nxtState[stateToIndex[u - 1][i]])
{
int x = tautomata.first;
int y = tautomata.second;
if (vst[x][y] != u)
vst[x][y] = u;
else
continue;
tvec[x].push_back(y);
//if (tautomata.second >= firstend)
// endflag = true;
}
}
for (int i = 0; i < 26; i++)
{
if (tvec[i].empty())
continue;
// sort(tvec[i].begin(), tvec[i].end());
// unique(tvec[i].begin(), tvec[i].end());
int hashnum = hashCode(tvec[i]);
bool flag = false;
for (int stateid : hashList[hashnum])
{
if (equalVec(stateToIndex[stateid - 1], tvec[i]))
{
flag = true;
stateAuto[u][i] = stateid;
break;
}
}
if (!flag)
{
stateNum++;
stateAuto[u][i] = stateNum;
hashList[hashnum].push_back(stateNum);
stateToIndex.push_back(tvec[i]);
mq[mtail++] = stateNum;
if (tvec[i][tvec[i].size() - 1] >= firstend)
isend[stateNum] = true;
}
}
}
cout << "stateNum of DFA = " << stateNum << endl;
}
bool mMatch(const char* matchstr)
{
int slen = strlen(matchstr);
int nowstate = 1;
for (int i = 0; i < slen; i++)
{
int tc = matchstr[i] - 'a';
nowstate = stateAuto[nowstate][tc];
if (0 == nowstate)
{
return false;
}
//if (isend[nowstate])
// return true;
}
return isend[nowstate];
}
int main()
{
//"bbbba"
// ".*a*a"
string p = ".*a*a";
string s = "bbbba";
AutoCons(p.c_str());
if (mMatch(s.c_str()))
cout << "matching successfully" << endl;
else
cout << "matching failed" << endl;
}