数据结构和算法笔记6:KMP算法

KMP(Knuth-Morris-Pratt)算法是一种字符串匹配算法,用于在一个文本串中查找一个模式串的出现位置。它的主要优势在于在匹配过程中避免了不必要的回溯,提高了匹配效率。

KMP算法的核心思想是利用已经匹配过的信息,避免在模式串与文本串匹配过程中进行不必要的回溯。它通过构建一个部分匹配表(也称为失配函数),记录模式串中每个位置的最长可匹配前缀和后缀的长度,从而在匹配过程中通过查表来决定下一次匹配的位置。相关原理建议看文后的参考链接,讲得非常细致,可以先看3,再看1和2。

下面是KMP算法的代码实现(使用C++语言),下面是用KMP算法找寻text串中第一个pattern串的索引位置的代码:

#include <iostream>
#include <vector>

using namespace std;

vector<int> buildPartialMatchTable(const string& pattern) {
    int m = pattern.length();
    vector<int> next(m, 0);
    int i = 1, j = 0;

    while (i < m) {
        if (pattern[i] == pattern[j]) {
            j++;
            next[i] = j;
            i++;
        } else {
            if (j != 0) {
                j = next[j - 1];
            } else {
                next[i] = 0;
                i++;
            }
        }
    }
    return next;
}

int kmpSearch(const string& text, const string& pattern) {
    int n = text.length();
    int m = pattern.length();
    if (m == 0) {
        return 0;
    }
    if (n == 0) {
        return -1;
    }
    
    vector<int> next = buildPartialMatchTable(pattern);
    int i = 0, j = 0;
    while (i < n) {
        if (text[i] == pattern[j]) {
            i++;
            j++;
            if (j == m) {
                return i - j;
            }
        } else {
            if (j != 0) {
                j = next[j - 1];
            } else {
                i++;
            }
        }
    }
    return -1;
}

int main() {
    string text = "sadbutsad";
    string pattern = "sad";
    
    int index = kmpSearch(text, pattern);
    if (index != -1) {
        cout << "Pattern found at index " << index << endl;
    } else {
        cout << "Pattern not found" << endl;
    }
    
    return 0;
}

上面是用while循环写的逻辑,可以使用for循环更简洁点:

#include <iostream>
#include <vector>

using namespace std;

vector<int> buildPartialMatchTable(const string& pattern) {
    int m = pattern.length();
    vector<int> next(m, 0);
    int i = 1, j = 0;
    for (int i = 1; i < m; ++i)
    {
    	while (j > 0 && pattern[i] != pattern[j])
    		j = next[j - 1];
    	if (pattern[i] == pattern[j])
			++j;
		next[i] = j;
    }
    return next;
}

int kmpSearch(const string& text, const string& pattern) {
    int n = text.length();
    int m = pattern.length();
    if (m == 0) {
        return 0;
    }
    if (n == 0) {
        return -1;
    }
    
    vector<int> next = buildPartialMatchTable(pattern);
    int i = 0, j = 0;
    for (int i = 0; i < n; ++i)
    {
		while (i > 0 && text[i] != pattern[j])
			j = next[j - 1];
		if (text[i] == pattern[j])
			++j;
		if (j == m)
			return i - m + 1;
	}
    return -1;
}

int main() {
    string text = "sadbutsad";
    string pattern = "sad";
    
    int index = kmpSearch(text, pattern);
    if (index != -1) {
        cout << "Pattern found at index " << index << endl;
    } else {
        cout << "Pattern not found" << endl;
    }
    
    return 0;
}

输出:
在这里插入图片描述

KMP算法力扣相关的题目:

28. 找出字符串中第一个匹配项的下标

在这里插入图片描述

class Solution
{
public:
  int strStr(string haystack, string needle)
  {
    if (needle.size() == 0)
        return 0;
    int next[needle.size()];
    int j = 0;
    next[0] = j;
    for (int i = 1; i < needle.size(); ++i)
    {
        while (j > 0 && needle[i] != needle[j])
        {
            j = next[j - 1];
        }
        if (needle[i] == needle[j])
            j++;
        next[i] = j;
    }
    int ans = 0;
    j = 0;
    for (int i = 0; i < haystack.size(); ++i)
    {
        while (j > 0 && haystack[i] != needle[j])
        {
            j = next[j - 1];
        }
        if (haystack[i] == needle[j])
        {
            j++;
        }
        if (j == needle.size())
        {
            return i - needle.size() + 1;

        }

    }
    return -1;

  }
};

459. 重复的子字符串

在这里插入图片描述

class Solution {
public:
    bool repeatedSubstringPattern(string s) {
        int i = 0;
        int len = s.size();
        int next[len];
        next[0] = 0;
        for (int j = 1; j < len; ++j)
        {
            while (i > 0 && s[i] != s[j])
                i = next[i - 1];
            if (s[i] == s[j])
                i++;
            next[j] = i;
        }

        if (len % (len - next[len - 1]) == 0 && next[len - 1] != 0)
            return true;
        else 
            return false;
        
    }
};

686. 重复叠加字符串匹配

在这里插入图片描述

重复我们可以想到取余操作,观察知,最多可能重复的次数是b.size() / a.size() + 2。

class Solution {
public:
    int kmpSearch(string a, string b)
    {
        if (a == "" && b == "")
            return 0;
        else if (a == "")
            return -1;
        else if (b == "")
            return -1;
        int n = a.size();
        int m = b.size();
        int k = m / n;
        vector<int> next(b.size(), 0);
        int j = 0;
        for (int i = 1; i < b.size(); ++i)
        {
            while (j > 0 && b[i] != b[j])
            {
                j = next[j - 1];
            }
            if (b[i] == b[j])
                ++j;
            next[i] = j;
        } 

        j = 0;
        int cnt = 0;
        for (int i = 0; i < (k + 2) * n; ++i)
        {
            if (i % n == 0)
                cnt++;
            while (j > 0 && a[i % n] != b[j])
                j = next[j - 1];
            if (a[i % n] == b[j])
                ++j;
            if (j == b.size())
                return cnt;
        }
        return -1;
    }
    int repeatedStringMatch(string a, string b) {
        return kmpSearch(a, b);
        
        
        
    }
};

面试题 17.17. 多次搜索

在这里插入图片描述

class Solution {

public:

    vector<int> kmpsearch(string& big, string& pattern)
    {
        if (pattern == "")
            return vector<int>({});
        int m = big.size();
        int n = pattern.size();
        vector<int> next(n, 0);
        next[0] = 0;
        int j = 0;
        for (int i = 1; i < n; ++i)
        {
            while (j > 0 && pattern[i] != pattern[j])
                j = next[j - 1];
            if (pattern[i] == pattern[j])
                ++j;
            next[i] = j;
        }
        j = 0;
        vector<int> ans;
        for (int i = 0; i < m; ++i)
        {
            while (j > 0 && big[i] != pattern[j])
                j = next[j - 1];
            if (big[i] == pattern[j])
                ++j;
            if (j == n)
            {
                ans.push_back(i - n + 1);
                j = next[j - 1];
            }
        }
        return ans;
    }

    vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
        vector<vector<int>> ans;
        for (int i = 0; i < smalls.size(); ++i)
        {
            ans.push_back(kmpsearch(big, smalls[i]));
        }
        return ans;
    }
};

796. 旋转字符串

在这里插入图片描述

class Solution {
public:
    bool rotateString(string s, string goal) {
        int n = s.size();
        int m = goal.size();
        
        if (n != m)
            return false;
        vector<int> next(m, 0);
        int j = 0;
        for (int i = 1; i < m; ++i)
        {
            while (j > 0 && goal[i] != goal[j])
                j = next[j - 1];
            if (goal[i] == goal[j])
                ++j;
            next[i] = j;
        }
        j = 0;
        for (int i = 0; i < 2 * n; ++i)
        {
            while (j > 0 && s[i % n] != goal[j])
                j = next[j - 1];
            if (s[i % n] == goal[j])
                ++j;
            if (j == next.size())
                return true;
        }
        return false;
    }
};

214. 最短回文串

在这里插入图片描述

class Solution {
public:
    string shortestPalindrome(string s) {
        int n = s.size();
        vector<int> next(n, 0);
        int j = 0;
        for (int i = 1; i < n; ++i)
        {
            while (j > 0 && s[i] != s[j])
                j = next[j - 1];
            if (s[i] == s[j])
                ++j;
            next[i] = j;
        }
        j = 0;
        for (int i = n - 1; i >= 0; --i)
        {
            while (j > 0 && s[i] != s[j])
                j = next[j - 1];
            if (s[i] == s[j])
                ++j;
        }
        cout << j;
        string add = (j == n) ? "" : s.substr(j, n - j);
        reverse(add.begin(), add.end());
        return add + s;
    }
};

相关参考:

  1. 代码随想录-28. 实现 strStr()
  2. 【宫水三叶】简单题学 KMP 算法
  3. 灵茶山艾府的知乎回答:如何更好地理解和掌握 KMP 算法?
  • 9
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值