一道标准的NFA题
题目描述
Given an input string (s) and a pattern §, implement regular expression matching with support for ‘.’ and ‘*’.
‘.’ Matches any single character.
‘*’ Matches zero or more of the preceding element.
The matching should cover the entire input string (not partial).
Note:
s could be empty and contains only lowercase letters a-z.
p could be empty and contains only lowercase letters a-z, and characters like . or *.
Example 1:
Input:
s = “aa”
p = “a”
Output: false
Explanation: “a” does not match the entire string “aa”.
Example 2:
Input:
s = “aa”
p = “a*”
Output: true
Explanation: ‘*’ means zero or more of the precedeng element, ‘a’. Therefore, by repeating ‘a’ once, it becomes “aa”.
Example 3:
Input:
s = “ab”
p = “."
Output: true
Explanation: ".” means “zero or more (*) of any character (.)”.
Example 4:
Input:
s = “aab”
p = “cab”
Output: true
Explanation: c can be repeated 0 times, a can be repeated 1 time. Therefore it matches “aab”.
Example 5:
Input:
s = “mississippi”
p = “misisp*.”
Output: false
算法分析
- 为了测试系统正确性,先直接构造了一个NFA。为了方便实现,直接用递归。然后时间就炸了
直接到了倒数5%内了,sad。如果不是递归的缘故,应该会很快。
理论上时间复杂度应该为O(mn),m为s的长度,n为p的长度。
a.构造NFA需要O(n),NFA最多有n+2个状态
b.验证s,即把s在NFA上跑最多需要跑m个step,每个step最多包含2*n种转移(忽略初始状态和结束状态),从状态i到状态i或从状态i到状态i+1,i∈[1,n]。需要O(nm)
综上所述,
时间复杂度为O(nm)
空间复杂度为O(n), 与状态个数有关
class Solution {
public:
char space[1010]; //character space: range within 'a'-'z'and '.'
int states[1010]; //NFA states space: range from {0}-{state_length-1}
char trans0[1010];
char trans1[1010];
//NFA transform function: {states_i} to {states_j}
//init trans[state_length][2] cause the format of this input string
//trans[state_length][0] means states_i to states_i
//trans[state_length][1] means states_i to states_i+1
int space_length;
int state_length;
int init_state;
int end_state;
void init(bool *chac, int len, string p) {
space_length = 0;
for (int i = 0;i < 26;++i) {
if (chac[i]) {
++space_length;
}
}
state_length = len + 2;
init_state = 0;
end_state = state_length - 1;
//init character space
for (int i = 0;i < 26;++i) {
if (chac[i]) {
space[i] = 'a' + i;
}
}
//init states
for (int i = 0;i < state_length;++i)
{
states[i] = i;
}
int length = p.length();
//init trans
for (int i = 0;i < state_length;++i)
{
trans0[i] = '#';
trans1[i] = '*';
}
int current_state = 0;
trans1[current_state] = '*';
current_state++;
for (int i = 0;i < length;++i)
{
if (p[i] >= 'a'&&p[i] <= 'z' || p[i] == '.')
{
//cout << p[i] << endl;
if (i + 1 < length&&p[i + 1] == '*')
{
trans1[current_state] = '*';
trans0[current_state++] = p[i];
}
else {
trans1[current_state++] = p[i];
}
}
}
}
bool pass(string s, int input, int current_state)
{
if (current_state == end_state && input == s.length())
return true;
//cout << trans0[current_state] << ";" << trans1[current_state] << endl;
//cout << s[input] << endl;
if(input == s.length()){
if(trans1[current_state]=='*')
return (pass(s,input,current_state+1));
else
return false;
}
if ((trans0[current_state] - s[input]==0 || trans0[current_state] - '.'==0) &&
(trans1[current_state] - s[input]==0|| trans1[current_state] - '.' == 0))
{
//cout << "1input: " << s[input] << " state: " << current_state << endl;
return pass(s, input + 1, current_state) || pass(s, input + 1, current_state + 1);
}
else if ((trans0[current_state] - s[input]==0 || trans0[current_state] - '.'==0) &&
(trans1[current_state] - '*'==0))
{
//cout << "2input: " << s[input] << " state: " << current_state << endl;
return pass(s, input + 1, current_state) || pass(s, input, current_state + 1);
}
else if (trans1[current_state] - s[input]==0 || trans1[current_state] - '.' == 0)
{
//cout << "3input: " << s[input] << " state: " << current_state << endl;
return pass(s, input + 1, current_state + 1);
}
else if (trans1[current_state]- '*'==0)
{
//cout << "4input: " << s[input] << " state: " << current_state << endl;
return pass(s, input, current_state + 1);
}
else if (trans0[current_state] - s[input] == 0 || trans0[current_state] - '.' == 0)
{
//cout << "5input: " << s[input] << " state: " << current_state << endl;
return pass(s, input + 1, current_state);
}
else
{
return false;
}
}
bool isMatch(string s, string p) {
int state = 0;
int len = p.length();
bool chac[26];
for (int i = 0;i < 26;++i)
chac[i] = false;
for (int i = 0;i < len;++i)
{
if (p[i] >= 'a'&&p[i] <= 'z')
{
chac[p[i] - 'a'] = true;
++state;
}
else if (p[i] == '.')
{
++state;
}
}
init(chac, state, p);
for (int i = 0;i < state_length;++i)
{
//cout << trans0[i] << ":" << trans1[i] << endl;
}
return pass(s, 0, 0);
}
};
- 果然,把递归的部分换掉,一条过
class Solution {
public:
//char *space; //character space: range within 'a'-'z'and '.'
int *states;
char *trans0;
char *trans1;
//NFA transform function: {states_i} to {states_j}
//init trans[state_length][2] cause the format of this input string
//trans[state_length][0] means states_i to states_i
//trans[state_length][1] means states_i to states_i+1
//int space_length;
int state_length;
int init_state;
int end_state;
void init(bool *chac, int len, string p) {
state_length = len + 2;
init_state = 0;
end_state = state_length - 1;
//space = new char[space_length];
states = new int[state_length];
trans0 = new char[state_length];
trans1 = new char[state_length];
//init states
for (int i = 0;i < state_length;++i)
{
states[i] = i;
}
int length = p.length();
//init trans
for (int i = 0;i < state_length;++i)
{
trans0[i] = '#';
trans1[i] = '*';
}
trans1[end_state] = '#';
int current_state = 0;
trans1[current_state] = '*';
current_state++;
for (int i = 0;i < length;++i)
{
if (p[i] >= 'a'&&p[i] <= 'z' || p[i] == '.')
{
if (i + 1 < length&&p[i + 1] == '*')
{
trans1[current_state] = '*';
trans0[current_state++] = p[i];
}
else {
trans1[current_state++] = p[i];
}
}
}
}
bool pass(string s)
{
vector<int> current_state;
int length = s.length();
vector<int> tmp;
current_state.push_back(init_state);
int tmp_current_state = init_state;
while (trans1[tmp_current_state] == '*')
{
current_state.push_back(++tmp_current_state);
}
int len;
bool *tmp_state = new bool[state_length];
for (int i = 0;i < length;++i)
{
for (int j = 0;j < state_length;++j)
tmp_state[j] = false;
len = current_state.size();
for (int j = 0;j < len;++j)
{
if (trans0[current_state[j]] == s[i] || trans0[current_state[j]] == '.')
{
tmp_state[current_state[j]] = true;
int tmp_current_state = current_state[j];
while (trans1[tmp_current_state] == '*')
{
tmp_state[++tmp_current_state] = true;
}
}
if (trans1[current_state[j]] == s[i]||trans1[current_state[j]]=='.')
{
tmp_state[current_state[j] + 1] = true;
int tmp_current_state = current_state[j]+1;
while (trans1[tmp_current_state] == '*')
{
tmp_state[++tmp_current_state] = true;
}
}
}
current_state.clear();
for (int j = 0;j < state_length;++j)
{
if (tmp_state[j])
{
current_state.push_back(j);
}
}
}
for (int i = 0;i < current_state.size();++i)
{
if (current_state[i] == end_state)
return true;
}
return false;
}
bool isMatch(string s, string p) {
int state = 0;
int len = p.length();
bool chac[27];
for (int i = 0;i < 27;++i)
chac[i] = false;
for (int i = 0;i < len;++i)
{
if (p[i] >= 'a'&&p[i] <= 'z')
{
chac[p[i] - 'a'] = true;
++state;
}
else if (p[i] == '.')
{
++state;
}
}
init(chac, state, p);
return pass(s);
}
};