Given an input string (s
) and a pattern (p
), implement regular expression matching with support for '.'
and '*'
.
'.' Matches any single character.
'*' Matches zero or more of the preceding element.
The matching should cover the entire input string (not partial).
Note:
s
could be empty and contains only lowercase lettersa-z
.p
could be empty and contains only lowercase lettersa-z
, and characters like.
or*
.
Example 1:
Input:
s = "aa"
p = "a"
Output: false
Explanation: "a" does not match the entire string "aa".
Example 2:
Input:
s = "aa"
p = "a*"
Output: true
Explanation: '*' means zero or more of the precedeng element, 'a'. Therefore, by repeating 'a' once, it becomes "aa".
Example 3:
Input:
s = "ab"
p = ".*"
Output: true
Explanation: ".*" means "zero or more (*) of any character (.)".
Example 4:
Input:
s = "aab"
p = "c*a*b"
Output: true
Explanation: c can be repeated 0 times, a can be repeated 1 time. Therefore it matches "aab".
Example 5:
Input:
s = "mississippi"
p = "mis*is*p*."
Output: false
题目大意:正则表达式的匹配
方法一:dfs
这个解法的复杂度没有什么优势,但是是最近看编译原理的有限状态机理论一点启发,对p进行处理,将p改为类似于图的结构,然后运用dfs判断最后是否能到达末状态。
以c*a*b为例:
遍历c:为字母,则初始状态(0)可以到达c(1)
遍历*:说明前面的c可以有0到无穷个,则有c状态(1)可以到c状态(1)
遍历a:上一个字母状态c(3)可以到a(3)
遍历*:a状态(3)可以到a状态(3),a状态自己能到达下一个字母状态b(5)
遍历b:上一个字母状态a(3)可以到达b(5)
结尾:上一个字母状态(5)可以到达末位置(6);
class Solution {
public:
vector<vector<int>>graph;
bool flag = false;
int slen,plen;
bool isMatch(string s, string p) {
initize(p);
p = '#' + p + '@';
int temp = 0;
slen = s.length();
plen=p.length();
graph.resize(plen);
vector<int>pre[plen];
for(int i = 1; i < plen; i++){
if(p[i] == '*'){
graph[temp].push_back(temp);
for(int j = 0; j < pre[temp].size(); j++){
graph[pre[temp][j]].push_back(i+1);
pre[i+1].push_back(pre[temp][j]);
}
}
else{
pre[i].push_back(temp);
graph[temp].push_back(i);
temp = i;
}
}
for(int i = 0; i < graph[0].size(); i++)
dfs(0,graph[0][i],s,p);
return flag;
}
void initize(string &p){
while(*p.begin() == '*')
p.erase(p.begin());
int cnt = 0, len = p.length();
for(int i = 0; i < len; i++){
if(i != 0 && p[i] == '*' && p[i-1] == '*')
continue;
p[cnt++] = p[i];
}
p.resize(cnt);
}
void dfs(int cur,int index,string s,string p){
if(cur == slen && index == plen-1){
flag = true;
return;
}
if(flag == true || cur == slen || index == plen || (s[cur] != p[index] && p[index] != '.'))
return;
for(int i = 0; i < graph[index].size(); i++)
dfs(cur+1,graph[index][i],s,p);
}
};
方法二:动态规划
dp[i][j]记录长为 i 的s串能够与长为 j 的p串匹配。同样用递归对dp进行更新(从字符串的头部开始),每次遇到p串的 ‘*’ 时,需要留意,下次递归的时候,分为两种情况,可能‘*’前的一个字母直接丢弃。
C++
class Solution {
public:
vector<vector<int>>dp;
int m, n;
bool isMatch(string s, string p) {
m = s.length();
n = p.length();
dp = vector<vector<int>>(m+1, vector<int>(n+1, -1));
return dfs(0,0,s,p);
}
int dfs(int x, int y, string &s, string &p){
if(dp[x][y] != -1)
return dp[x][y];
if(y == n)
return dp[x][y] = x == m;
bool cur = x < m && (s[x] == p[y] || p[y] == '.'), ans;
if(y + 1 < n && p[y+1] == '*')
ans = dfs(x, y+2, s, p) || (cur && dfs(x+1, y, s, p));
else
ans = cur && dfs(x+1, y+1,s, p);
return dp[x][y] = ans;
}
};
Python
class Solution(object):
def isMatch(self, s, p):
"""
:type s: str
:type p: str
:rtype: bool
"""
m, n = len(s), len(p)
dp = [[-1 for i in range(n+1)] for i in range(m+1)]
def dfs(x, y):
if dp[x][y] != -1:
return dp[x][y]
if y == n:
return x == m
cur_match = x < m and p[y] in {s[x], '.'}
if y+1 < n and p[y+1] == '*':
ans = dfs(x, y+2) or (cur_match and dfs(x+1, y))
else:
ans = cur_match and dfs(x+1, y+1)
dp[x][y] = ans
return ans
return dfs(0,0)
官方题解中提供了一种非递归的更新方法,注意用非递归的话,由于某个位置的dp值由于‘*’的影响,需要考虑其后面的dp值,从dp的末尾开始更新更合适:
class Solution(object):
def isMatch(self, s, p):
"""
:type s: str
:type p: str
:rtype: bool
"""
m, n = len(s), len(p)
dp = [[False for i in range(n+1)] for i in range(m+1)]
dp[-1][-1] = True
for i in range(m, -1, -1):
for j in range(n-1, -1, -1):
cur_match = i < m and p[j] in {s[i], '.'}
if j+1 < n and p[j+1] == '*':
dp[i][j] = dp[i][j+2] or (cur_match and dp[i+1][j])
else:
dp[i][j] = cur_match and dp[i+1][j+1]
return dp[0][0]