Hash Function:
哈希函数就是将任意长度的数据转变成固定长度的数据(http://en.wikipedia.org/wiki/Hash_function)
Bloom Filter:
用java实现的Bloom Filter的代码如下
import java.util.BitSet;
public class BloomFilter {
private static final int DEFAULT_SIZE = 2 << 24; // Set DEFAULT_SIZE as 2^24
private static final int[] seed = new int[] {
5, 7, 11, 13, 31, 37, 61
};
private BitSet bits = new BitSet(DEFAULT_SIZE);
private SimpleHash[] func = new SimpleHash[seed.length];
public BloomFilter() {
for (int i = 0; i < seed.length; i++) {
func[i] = new SimpleHash(DEFAULT_SIZE, seed[i]);
}
}
public void add(String value) {
for (SimpleHash f : func)
bits.set(f.hash(value), true);
}
public boolean contains(String value) {
if (value == null)
return false;
boolean rslt = true;
for (SimpleHash f : func) {
rslt = rslt & bits.get(f.hash(value));
}
return rslt;
}
public static class SimpleHash{
private int cap;
private int seed;
SimpleHash(int cap, int seed){
this.cap = cap;
this.seed = seed;
}
public int hash(String value){
int result = 0;
int len = value.length();
for (int i = 0; i < len; i++)
result = result * seed + value.charAt(i);
return (cap -1) & result;
}
}
}
测试程序代码如下:
public class TestBloomFilter{
public static void main(String[] args) {
BloomFilter bf =new BloomFilter();
bf.add("www.baidu.com");
bf.add("www.sohu.com");
bf.add("www.163.com");
if(bf.contains("www.cctv.com"))
System.out.println("www.cctv.com is in the set");
else
System.out.println("www.cctv.com is not in the set");
if(bf.contains("www.baidu.com"))
System.out.println("www.baidu.com is in the set");
else
System.out.println("www.baidu.com is not in the set");
}
}