算法Day09 | KMP，28. 实现 strStr，459.重复的子字符串

雨后的放线君

已于 2024-07-18 16:11:51 修改

阅读量589

点赞数

分类专栏：刷题日志文章标签：算法数据结构

于 2023-05-20 16:55:30 首次发布

本文链接：https://blog.csdn.net/sakura745/article/details/130754289

版权

刷题日志专栏收录该内容

52 篇文章 0 订阅

订阅专栏

Day09

KMP

KMP是三个人人名缩写，用于在文本字符串text中搜索pattern字符串，返回在text中第一出现的位置。
算法做法就是在暴力匹配的基础上加速匹配。通过对pattern字符串求next数组(该数组也成为前缀表)，来跳过text部分匹配。next数组是为了实现跳过部分字符串的功能来设计的。暴力匹配的时间复杂度为 $O(m\times n)$ ，KMP的时间复杂度为 $O (m + n)$ 。

如何求出next数组？通过求出pattern所有字符子串的最长相等前、后缀构成next数组。具体实现可能存在其他加工，如对next数组减一，位移一位等。

假设有abbabbk，对于字符k有一个信息，k之前字符串中，前、后缀匹配最大长度为3。其中前、后缀为k前字符串的真子字符串，即不包含整体abbabb字符串。为什么不包含，因为是next数组对应长度的下标不能等于它本身，k是第6个元素，next[k] != 6，而且整体字符串也一定相等，无意义。

int next[] = {-1/*人为规定*/, 0/*根据定义*/, 0/1, ...};

求next数组是往前跳的过程，最后用到前三个元素来得到最终数组。

d4779b1360e9149fe4265c7afc4f631
0b8fd333c19d3f228d872827b21f74c
b7ef04f09a89f04411565a5cbf1b608
0e1a3cf49c881dba57b070df93deaff

010f77584f1ff064d7ac05c0b700a6e

//通过回跳得到完整的next数组
void getNext(const std::string& pattern, std::vector<int>& next) {
    next[0] = -1;//人为规定为-1
    if (next.size() == 1) return;
    next[1] = 0;//人为规定为0
    int idxN = 2, nextValue = 0;
    while (idxN < pattern.size()) {
        if (pattern[idxN - 1] == pattern[nextValue]) {//next[idxN]是由next[idxN-1]所求的
            next[idxN++] = ++nextValue;
        } else if (nextValue > 0) {//当不相等时，还可以往前跳
            nextValue = next[nextValue];
        } else {//既不相等，也不能往前跳。只能重新进行计算
            next[idxN++] = 0;
        }
    }
}
int KMP(const std::string& text, const std::string& pattern) {
    std::vector<int> next(pattern.size());
    getNext(pattern, next);
    int idxT = 0, idxP = 0;

    //保持idxT的位置不变，通过移动idxP的位置来匹配字符。直到idxT匹配完毕，再匹配idxT+1
    while (idxT < text.size() && idxP < pattern.size()) {
        if (text[idxT] == pattern[idxP]) {//匹配上了，匹配下一对
            idxT++;
            idxP++;
        } else if (idxP > 0) {//匹配不上，使用next数组进行跳转
            idxP = next[idxP];
        } else {//idxT匹配完毕，匹配idxT+1
            idxT++;
        }
    }

    //是否因为是idxP跳出的循环？
    //如果是，则说明是通过匹配上找到的，则得到结论
    //如果不是，则说明匹配失败
    return idxP == pattern.size() ? idxT - idxP : -1;
}

对第六行代码 if (s.size() == 1) return;，没有这一句，当字符串s只有一个字符时，next数组应该直接返回next[1] = {-1}，如果不跳出，所以会造成dynamic-stack-buffer-overflow。
本地编译器来说，不加这一句也能通过，应该是给优化掉了。
LeetCode没有优化掉，不过还是应该填上这一句，保证代码的稳定。

28. 实现 strStr

题目链接：28. 实现 strStr()
KMP代码同上

class Solution {
    void getNext(const string& pattern, vector<int>& next) {
        next[0] = -1;
        if (next.size() == 1) return;
        next[1] = 0;
        int idxP = 2, nextValue = 0;
        while (idxP < pattern.size()) {
            if (pattern[idxP - 1] == pattern[nextValue]) {
                next[idxP++] = ++nextValue;
            } else if (nextValue > 0) {
                nextValue = next[nextValue];
            } else {
                next[idxP++] = 0;
            }
        }
    }
public:
    int strStr(string haystack, string needle) {
        vector<int> next(needle.size());
        getNext(needle, next);
        int idxH = 0, idxN = 0;
        while (idxH < haystack.size() && idxN < needle.size()) {
            if (haystack[idxH] == needle[idxN]) {
                idxN++;
                idxH++;
            } else if (idxN > 0) {
                idxN = next[idxN];
            } else {
                idxH++;
            }
        }
        return idxN == needle.size() ? idxH - idxN : -1;
    }
};

459.重复的子字符串

题目链接： 459.重复的子字符串
移动匹配
字符串s中有重复，s+s也会包含s。erase()时间复杂度为 $O (n)$

class Solution {
public:
    bool repeatedSubstringPattern(string s) {
        string ss = s + s;
        ss.erase(ss.begin());
        ss.erase(ss.end() - 1);
        return ss.find(s) != string::npos;
    }
};

KMP
用到了next数组，根据next数组的构成原则可得，重复的子字符串 = 字符串 - 最长相等前后缀。如果没有最长相等前后缀，直接返回false。

class Solution {
    void getNext(const string& pattern, vector<int>& next) {
        next[0] = -1;
        if (next.size() == 1) return;
        next[1] = 0;
        int idxP = 2, nextValue = 0;
        while (idxP < pattern.size()) {
            if (pattern[idxP - 1] == pattern[nextValue]) {
                next[idxP++] = ++nextValue;
            } else if (nextValue > 0) {
                nextValue = next[nextValue];
            } else {
                next[idxP++] = 0;
            }
        }
    }
public:
    bool repeatedSubstringPattern(string s) {
        int length = s.size();
        vector<int> next(length + 1);//+1是因为next数组是所表示的为当前位置之前的字符串
        getNext(s + 'A'/*类似于哨兵的作用，不满足于s要求的字符就行*/, next);
        //比如s为aba，则为abaA
        
        return next[length] > 0 && length % (length - next[length]) == 0;
    }
};