布隆过滤器含义
- 先准备一个长度为n的二进制向量
- 若干个哈希函数,计算出来的若干个索引在0-n-1
- 某数据通过这组哈希函数计算出来若干个索引,在该向量中若都为1则表明该数据大概率存在,否则一定不存在
- 有误判率
- 设计难度在于:随机映射函数的设计 和 二进制向量的长度设为多少?
引用代码——guava实现布隆过滤器
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>19.0</version>
</dependency>
private static int size = 1000000;
private static double fpp = 0.01;
private static BloomFilter<Integer> bloomFilter = BloomFilter.create(Funnels.integerFunnel(), size, fpp);
public static void main(String[] args) {
for (int i = 0; i < 1000000; i++) {
bloomFilter.put(i);
}
int count = 0;
for (int i = 1000000; i < 2000000; i++) {
if (bloomFilter.mightContain(i)) {
count++;
System.out.println(i + "误判了");
}
}
System.out.println("总共的误判数:" + count);
}
引用代码——redis实现布隆过滤器
public class RedisMain {
static final int expectedInsertions = 100;
static final double fpp = 0.01;
private static long numBits;
private static int numHashFunctions;
static {
numBits = optimalNumOfBits(expectedInsertions, fpp);
numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits);
}
public static void main(String[] args) {
Jedis jedis = new Jedis("192.168.0.109", 6379);
for (int i = 0; i < 100; i++) {
long[] indexs = getIndexs(String.valueOf(i));
for (long index : indexs) {
jedis.setbit("codebear:bloom", index, true);
}
}
for (int i = 0; i < 100; i++) {
long[] indexs = getIndexs(String.valueOf(i));
for (long index : indexs) {
Boolean isContain = jedis.getbit("codebear:bloom", index);
if (!isContain) {
System.out.println(i + "肯定没有重复");
}
}
System.out.println(i + "可能重复");
}
}
private static long[] getIndexs(String key) {
long hash1 = hash(key);
long hash2 = hash1 >>> 16;
long[] result = new long[numHashFunctions];
for (int i = 0; i < numHashFunctions; i++) {
long combinedHash = hash1 + i * hash2;
if (combinedHash < 0) {
combinedHash = ~combinedHash;
}
result[i] = combinedHash % numBits;
}
return result;
}
private static long hash(String key) {
Charset charset = Charset.forName("UTF-8");
return Hashing.murmur3_128().hashObject(key, Funnels.stringFunnel(charset)).asLong();
}
private static int optimalNumOfHashFunctions(long n, long m) {
return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
}
private static long optimalNumOfBits(long n, double p) {
if (p == 0) {
p = Double.MIN_VALUE;
}
return (long) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
}
}