Implement strStr().
Returns the index of the first occurrence of needle in haystack, or -1 if needle is not part of haystack.
Update (2014-11-02):
The signature of the function had been updated to return the index instead of the pointer. If you still see your function signature returns achar *
or String
, please click the reload button to reset your code definition.
算法一,Brute force
此算法在leetcode上的实际执行时间为16ms。
class Solution {
public:
int strStr(char *haystack, char *needle) {
int i = 0, j = 0;
while (haystack[i] && needle[j]) {
if (haystack[i++] != needle[j++]) {
i -= j-1;
j = 0;
}
}
return !needle[j] ? i-j : -1;
}
};
算法二,Rabin–Karp algorithm
特点是采取Rolling hash。
先计算好待查字符串needle的Hash值。然后将被搜索字符串haystack逻辑上分成若干组,每组长度与needle长度相同。每组分别计算Hash值。这样,比较字符串,变成了比较了比较Hash值。
所谓Rolling Hash,就是后一组的Hash计算,可以利用前一组的Hash成果。
因为后一组字符的组成与前一组的关系为,吐出尾部字符,吸纳一个新字符。这样计算Hash时,也回滚掉吐出的字符,吸纳进新的字符。
该算法在leetcode上实际执行时间为40ms。
class Solution {
public:
int strStr(char *haystack, char *needle) {
const int q = 3355439;
const int r = 256;
int nLen = 0;
int nHash = 0;
int weight = 1;
while (*needle) {
nHash = ((nHash * r) % q + *needle) % q;
weight = nLen ? (weight * r) % q : 1;
++needle;
++nLen;
}
if (!nLen) return 0;
int k = nLen;
int hHash = 0;
while (*haystack && k) {
hHash = ((hHash * r) % q + *haystack) % q;
++haystack;
--k;
}
if (k) return -1;
if (nHash == hHash &&
!strncmp(haystack-nLen, needle-nLen, nLen))
return 0;
int i = 1;
while (*haystack) {
hHash = ((hHash - (haystack[-nLen] * weight) % q) % q + q) % q;
hHash = ((hHash * r) % q + *haystack) % q;
if (nHash == hHash &&
!strncmp(haystack-nLen+1, needle-nLen, nLen))
return i;
++haystack;
++i;
}
return -1;
}
};
参考资料
http://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
http://en.wikipedia.org/wiki/Rolling_hash
算法三:KMP
此算法在leetcode上实际执行时间为40ms。
class Solution {
public:
int strStr(char *haystack, char *needle) {
vector<int> next;
preKMP(needle, next);
int i = 0, j= 0;
while (haystack[i] && needle[j]) {
while (j > -1 && needle[j] != haystack[i])
j = next[j];
++i;
++j;
}
return !needle[j] ? i-j: -1;
}
void preKMP(char *needle, vector<int> &next) {
next.push_back(-1);
int i = 0;
int j = -1;
while (needle[i]) {
while (j > -1 && needle[i] != needle[j])
j = next[j];
++i;
++j;
if (needle[i] == needle[j])
next.push_back(next[j]);
else
next.push_back(j);
}
}
};
参考资料:
http://monge.univ-mlv.fr/~lecroq/string/node8.html
总结,Brute force执行效率远好于其它两种算法。怪不得leetcode将此题难度标注为easy,大约原意就是只考一考Brute fore。
毕竟高级字符串查找算法,只有在needle的长度越长时,才对Brute force有明显优势。