Z algorithm 和字符串匹配的几个算法其实类似,主要利用了pattern的重复信息。
具体思想在这里,这个slides写得非常好懂。
还有个demo也很好。
我实现了一下code(暂时还没去测试是否bugfree)
public class ZAlgo {
private int [] z;
private char [] text;
public ZAlgo() {
z = null;
text = null;
}
private int basicPrefixMatch(int index) {
int i = 0;
while (index < text.length && text[index] == text[i]) {
++i;
++index;
}
return i;
}
// " a a b c a a b x a a a z"
// " 0 2 4 6 8 10 "
// wrong answer: 12, 1, 0, 0, 3, 1, 0, 0, 2, 1, 1, 0
// correct answer: 12, 1, 0, 0, 3, 1, 0, 0, 2, 2, 1, 0
public void populateZ(String str) {
text = str.toCharArray();
z = new int[text.length];
int l = 0;
int r = 0;
z[0] = text.length;
for (int index = 1; index < text.length; ++index) {
if (index > r) {
z[index] = basicPrefixMatch(index);
System.out.printf("a:index = %d, z[index] = %d\n", index, z[index]);
if (z[index] > 0) {
l = index;
r = index + z[index] - 1;
}
//print_array('a');
}else {
if (z[index - l] < r - index + 1) {
z[index] = z[index - l];
print_array('b');
} else {
int i = r + 1;
int start = r -index + 1;
//System.out.printf("debug:index = %d, i = %d, start = %d\n", index, i, start);
// note below: compare text array instead of z value array
while (i < text.length && text[i] == text[start]) {
++i;
++start;
}
l = index;
r = i -1;
z[index] = r - l +1;
//print_array('c');
}
}
//System.out.printf("index = %d, l=%d, r=%d%n%n", index, l, r);
}
}
public void print_array(char c) {
System.out.printf("%c:", c);
for (int i = 0; i < z.length; ++i) {
System.out.printf("%d, ", z[i]);
}
System.out.println();
}
public static void main(String[] args) {
// TODO Auto-generated method stub
ZAlgo zalgo = new ZAlgo();
String input = new String("aabcaabxaaaz");
zalgo.populateZ(input);
zalgo.print_array('z');
}
}