LeetCode10. Regular Expression Matching （C++/Python）

最新推荐文章于 2021-11-04 12:10:43 发布

Brielleqqqqqqjie

最新推荐文章于 2021-11-04 12:10:43 发布

阅读量285

点赞数

分类专栏： Leetcode

本文链接：https://blog.csdn.net/qq_41562704/article/details/86744131

版权

Leetcode 专栏收录该内容

110 篇文章 2 订阅

订阅专栏

Given an input string (s) and a pattern (p), implement regular expression matching with support for '.' and '*'.

'.' Matches any single character.
'*' Matches zero or more of the preceding element.

The matching should cover the entire input string (not partial).

Note:

s could be empty and contains only lowercase letters a-z.
p could be empty and contains only lowercase letters a-z, and characters like . or *.

Example 1:

Input:
s = "aa"
p = "a"
Output: false
Explanation: "a" does not match the entire string "aa".

Example 2:

Input:
s = "aa"
p = "a*"
Output: true
Explanation: '*' means zero or more of the precedeng element, 'a'. Therefore, by repeating 'a' once, it becomes "aa".

Example 3:

Input:
s = "ab"
p = ".*"
Output: true
Explanation: ".*" means "zero or more (*) of any character (.)".

Example 4:

Input:
s = "aab"
p = "c*a*b"
Output: true
Explanation: c can be repeated 0 times, a can be repeated 1 time. Therefore it matches "aab".

Example 5:

Input:
s = "mississippi"
p = "mis*is*p*."
Output: false

题目大意：正则表达式的匹配

方法一：dfs

这个解法的复杂度没有什么优势，但是是最近看编译原理的有限状态机理论一点启发，对p进行处理，将p改为类似于图的结构，然后运用dfs判断最后是否能到达末状态。

以c*a*b为例：

遍历c：为字母，则初始状态（0）可以到达c（1）

遍历*：说明前面的c可以有0到无穷个，则有c状态（1）可以到c状态（1）

遍历a：上一个字母状态c（3）可以到a（3）

遍历*：a状态（3）可以到a状态（3），a状态自己能到达下一个字母状态b（5）

遍历b：上一个字母状态a（3）可以到达b（5）

结尾：上一个字母状态（5）可以到达末位置（6）；

class Solution {
public:
    vector<vector<int>>graph;
    bool flag = false;
    int slen,plen;
    bool isMatch(string s, string p) {
        initize(p);
        p = '#' + p + '@';
        int temp = 0;
        slen = s.length();
        plen=p.length();
        graph.resize(plen);    
        vector<int>pre[plen];
        for(int i = 1; i < plen; i++){
            if(p[i] == '*'){
                graph[temp].push_back(temp);
                for(int j = 0; j < pre[temp].size(); j++){
                    graph[pre[temp][j]].push_back(i+1);
                    pre[i+1].push_back(pre[temp][j]);
                }   
            }   
            else{
                pre[i].push_back(temp);
                graph[temp].push_back(i);
                temp = i;
            }
        }
        for(int i = 0; i < graph[0].size(); i++)
            dfs(0,graph[0][i],s,p);
        return flag;
    }
    void initize(string &p){
        while(*p.begin() == '*')
            p.erase(p.begin());
        int cnt = 0, len = p.length();
        for(int i = 0; i < len; i++){
            if(i != 0 && p[i] == '*' && p[i-1] == '*')
                continue;
            p[cnt++] = p[i];
        }
        p.resize(cnt);
    }
    void dfs(int cur,int index,string s,string p){
        if(cur == slen && index == plen-1){
            flag = true;
            return;
        }
        if(flag == true || cur == slen || index == plen || (s[cur] != p[index] && p[index] != '.'))
            return;
        for(int i = 0; i < graph[index].size(); i++)
            dfs(cur+1,graph[index][i],s,p);
    }
};

方法二：动态规划

dp[i][j]记录长为 i 的s串能够与长为 j 的p串匹配。同样用递归对dp进行更新(从字符串的头部开始)，每次遇到p串的 ‘*’ 时，需要留意，下次递归的时候，分为两种情况，可能‘*’前的一个字母直接丢弃。

C++

class Solution {
public:
    vector<vector<int>>dp;
    int m, n;
    bool isMatch(string s, string p) {
        m = s.length();
        n = p.length();
        dp = vector<vector<int>>(m+1, vector<int>(n+1, -1));
        return dfs(0,0,s,p);
    }
    int dfs(int x, int y, string &s, string &p){
        if(dp[x][y] != -1)
            return dp[x][y];
        if(y == n)
            return dp[x][y] = x == m;
        bool cur = x < m && (s[x] == p[y] || p[y] == '.'), ans;
        if(y + 1 < n && p[y+1] == '*')
            ans = dfs(x, y+2, s, p) || (cur && dfs(x+1, y, s, p));
        else
            ans = cur && dfs(x+1, y+1,s, p);
        return dp[x][y] = ans;
    }
};

Python

class Solution(object):
    def isMatch(self, s, p):
        """
        :type s: str
        :type p: str
        :rtype: bool
        """
        m, n = len(s), len(p)
        dp = [[-1 for i in range(n+1)] for i in range(m+1)]
        
        def dfs(x, y):
            if dp[x][y] != -1:
                return dp[x][y]
            if y == n:
                return x == m
            cur_match = x < m and p[y] in {s[x], '.'}
            if y+1 < n and p[y+1] == '*':
                ans = dfs(x, y+2) or (cur_match and dfs(x+1, y))
            else:
                ans = cur_match and dfs(x+1, y+1)
            dp[x][y] = ans
            return ans
                
        return dfs(0,0)

官方题解中提供了一种非递归的更新方法，注意用非递归的话，由于某个位置的dp值由于‘*’的影响，需要考虑其后面的dp值，从dp的末尾开始更新更合适：

class Solution(object):
    def isMatch(self, s, p):
        """
        :type s: str
        :type p: str
        :rtype: bool
        """
        m, n = len(s), len(p)
        dp = [[False for i in range(n+1)] for i in range(m+1)]
        dp[-1][-1] = True
        for i in range(m, -1, -1):
            for j in range(n-1, -1, -1):
                cur_match = i < m and p[j] in {s[i], '.'}
                if j+1 < n and p[j+1] == '*':
                    dp[i][j] = dp[i][j+2] or (cur_match and dp[i+1][j])
                else:
                    dp[i][j] = cur_match and dp[i+1][j+1]
                
        return dp[0][0]