字符串匹配可以用哈希值来匹配。RabinKarp使用了滚动哈希来提高效率。
代码如下:(只是哈希)O(mn)(朴素也是)
package L5;
public class L1 {
static final int seed=31;
// RabinKarp
static long hash(String str) {//((31*c0)+c1)*31....
long hash=0;
for (int i = 0; i < str.length(); i++) {
hash=seed*hash+str.charAt(i);
}
return hash;
}
static void match(String s,String p) {
long hash_p=hash(p);
int p_len=p.length();
for (int i = 0; i +p_len<=s.length(); i++) {
long hash_i=hash(s.substring(i,i+p_len));
if(hash_i==hash_p) {
System.out.println("match: "+i);
}
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
String s = "ababcsdabc";
String p = "abc";
match(s,p);
}
}
滚动哈希:O(m)
package L5;
public class L1 {
static final int seed = 31;
// RabinKarp
//哈希会产生冲突,100000个字符,冲突大约在0-3之间
static long hash(String str) {
long hash = 0;
for (int i = 0; i < str.length(); i++) {
hash = seed * hash + str.charAt(i);
}
return hash % Long.MAX_VALUE;
}
static long[] hash(String s, int n) {
long[] res = new long[s.length() - n + 1];
res[0] = hash(s.substring(0, n));
for (int i = n; i < s.length(); i++) {
char newChar = s.charAt(i);
char oldChar = s.charAt(i - n);
long v = (res[i - n] * seed + newChar - (int) (Math.pow(seed, n) * oldChar)) % Long.MAX_VALUE;// 滚动哈希
res[i - n + 1] = v;
}
return res;
}
static void match(String s, String p) {
long hash_p = hash(p);
long[] hashofs = hash(s, p.length());
for (int i = 0; i < hashofs.length; i++) {
if (hashofs[i] == hash_p) {
//如果担心冲突,可以在这用朴素算法检验一下
System.out.println("match: " + i);
}
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
String s = "ababcsdabc";
String p = "abc";
match(s, p);
}
}