参考 https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
在计算前缀时,与kmp表示的是不同的意思
在kmp中的next数组,next[i]表示是从模式中的i-next[i] - 2到i-1与从0到next[i] -1区间的字符相等
而bm中的next[i]表示是从模式中的i-next[i]-1到i与从0到next[i] - 1区间的字符相等
在建立bad character rule表时,总是记录最后出现字符的下标,是因为如果在出现不匹配时,模式串的不匹配位置的前面和后面都有文本串不匹配位置的字符。最好情况下就满足 good suffix rule,否则移动模式串的长度。
代码如下:
public class Solution
{
private Map<Character, Integer> skip_;
private int[] suffix_;
private void build_skip_table(String s)
{
skip_ = new HashMap<>();
for (int i = 0; i < s.length(); i++)
{
skip_.put(s.charAt(i), i);
}
}
private int[] compute_bm_prefix(String s)
{
if (s.length() == 0) return null;
int[] prefix = new int[s.length()];
prefix[0] = 0;
int k = 0;
for (int i = 1; i < s.length(); i++)
{
while (k > 0 && s.charAt(k) != s.charAt(i))
{
k = prefix[k - 1];
}
if (s.charAt(k) == s.charAt(i)) k++;
prefix[i] = k;
}
return prefix;
}
private void build_suffix_table(String s)
{
if (s.length() == 0) return ;
int[] prefix = compute_bm_prefix(s);
String reverse_s = new StringBuilder(s).reverse().toString();
int[] reverse_prefix = compute_bm_prefix(reverse_s);
suffix_ = new int[s.length() + 1];
for (int i = 0; i <= s.length(); i++)
{
suffix_[i] = s.length() - prefix[s.length() - 1];
}
for (int i = 0; i < reverse_s.length(); i++)
{
int index = reverse_s.length() - reverse_prefix[i];
int shift = i - reverse_prefix[i] + 1;
if (suffix_[index] > shift)
{
suffix_[index] = shift;
}
}
}
private int do_search(String text, String pattern)
{
build_skip_table(pattern);
build_suffix_table(pattern);
int index_end = text.length() - pattern.length();
int i = 0;
while (i <= index_end)
{
int j = pattern.length();
while (text.charAt(i + j - 1) == pattern.charAt(j - 1))
{
j--;
if (j == 0) return i;
}
int k = skip_.containsKey(text.charAt(j - 1)) ? skip_.get(pattern.charAt(j - 1)) : -1;
int m = j - 1 - k;
if (k < j && m > suffix_[j])
{
i += m;
}
else
{
i += suffix_[j];
}
}
return -1;
}
public int strStr(String haystack, String needle)
{
if (haystack.compareTo(needle) == 0) return 0;
if (needle.isEmpty()) return 0;
if (haystack.length() < needle.length()) return -1;
return do_search(haystack, needle);
}
}