Implement strStr(). Returns the index of the first occurrence of needle in haystack, or -1 if needle is not part of haystack.
字符串模式匹配:
1.brute force:
O(M×N)
int strStr(string haystack, string needle)
{
int N1 = haystack.length(), N2 = needle.length();
if (N2 == 0) return 0;
for (int i = 0; i <= N1 - N2; ++i)
for (int j = 0; j < N2 && haystack[i + j] == needle[j]; ++j)
if (j == N2 - 1) return i;
return -1;
}
2.KMP algorithm
O(N)
算法的细节可以看这三个链接:1 , 2 和 3。
class Solution
{
private:
int M, N;
void computeLPS(vector<int> &lps, const string &needle)
{
// lps stands for longest proper suffix
lps.resize(M, 0);
// len stands for previous longest propere suffix
for (int i = 1, len = 0; i < M;)
{
if (needle[len] == needle[i])
lps[i++] = ++len;
else
{
// take AAACAAAA and i = 7 for example
// the previous len = 3
// needle[3] != needle[7]
// thus lps[7] is the same as string
// AAAA, which is 3.
if (len > 0)
len = lps[len - 1];
else
lps[i++] = 0;
}
}
}
public:
int strStr(string haystack, string needle)
{
N = haystack.length(), M = needle.size();
vector<int> lps;
computeLPS(lps, needle);
for (int i = 0, j = 0; i <= N - M + j;)
{
if (haystack[i] == needle[j])
++i, ++j;
else
{
if (j == 0)
++i;
else
j = lps[j - 1];
}
if (j == M) return i - j;
}
return -1;
}
};
ABABA
当j = 3时,lps = 2,前AB,后AB。
当j = 4时,判定p[lps[j - 1]] == p[j],因此lps = 3,前ABC,后ABC。
ABABC
当j = 4时,判定p[lps[j - 1]] != p[j],此时取上一步的最长前串AB,如果在后面加上C,此时的lps为0,所以ABAB在后面加上C后,lps为0。
3.Rabin-Karp Algorithm
O(M+N)
Here is a slide about this algorithm.
int strStr(string haystack, string needle)
{
int N = haystack.length(), M = needle.length();
if (M == 0) return 0;
int q = 335549; // table size
int d = 256; // radix
int h = 1, h1 = 0, h2 = 0;
// precompute q^(M - 1)
for (int j = 1; j < M; ++j)
h = (h * d) % q;
for (int j = 0; j < M; ++j)
{
h1 = (h1 * d + needle[j]) % q;
h2 = (h2 * d + haystack[j]) % q;
}
for (int i = 0; i <= N - M; ++i)
{
if (h2 == h1)
{
int j;
for (j = 0; j < M; ++j)
if (haystack[i + j] != needle[j])
break;
if (j == M) return i;
}
if (i < N - M)
{
h2 = (h2 - haystack[i] * h) % q;
h2 = (h2 * d + haystack[i + M]) % q;
if (h2 < 0) h2 += q;
}
}
return -1;
}