package com.redis.set; import java.util.BitSet; /** * 布隆过滤器: * 是由足够长的bit数组和k个hash函数构成的 * 本身是存在误判率的,但是误判率不能超过0.1%; * 通过概率算出来的,求导以及最值的公式; * * 应用:解决的问题: * */ public class BloomFilter { private int size; private BitSet bits; //bit数组,bitMap public BloomFilter(int size) { this.size = size; this.bits = new BitSet(size); } public void add(String key) { int hash1 = hash_1(key); int hash2 = hash_2(key); int hash3 = hash_3(key); bits.set(hash1, true); bits.set(hash2, true); bits.set(hash3, true); } public boolean find(String key) { int hash1 = hash_1(key); if (!bits.get(hash1)) { return false; } int hash2 = hash_2(key); if (!bits.get(hash2)) { return false; } int hash3 = hash_3(key); if (!bits.get(hash3)) { return false; } return true; } public int hash_1(String key) { int hash = 0; int i; for (i = 0; i < key.length(); i++) { hash = 33 * hash + key.charAt(i); } return Math.abs(hash) % size; } public int hash_2(String key) { final int p = 16777619; int hash = (int) 2166136261L; for (int i = 0; i < key.length(); i++) { hash = (hash ^ key.charAt(i) * p); } hash += hash << 33; hash ^= hash >> 7; hash += hash << 3; hash ^= hash >> 17; hash += hash << 5; return Math.abs(hash) % size; } public int hash_3(String key) { int hash, i; for (hash = 0, i = 0; i < key.length(); ++i) { hash += key.charAt(i); hash += (hash << 10); hash += (hash >> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); return Math.abs(hash) % size; } public static void main(String[] args) { //尽可能的分散 (查找的来源于一片论文) BloomFilter bloomFilter = new BloomFilter(Integer.MAX_VALUE); System.out.println(bloomFilter.hash_1("1")); System.out.println(bloomFilter.hash_2("1")); System.out.println(bloomFilter.hash_3("1")); //代码占用的内存 Integer.MAX_VALUE = 21亿bit //1024kb //1024M //1Byte = 8bit; int a = Integer.MAX_VALUE; System.out.println(a / 8); int b = a / 8; //byte int c = b / 1024; //kb int d = c / 1024; //M System.out.println(d);//M 内存的大小 } }
package com.redis.set; import com.google.common.hash.BloomFilter; import com.google.common.hash.Funnels; public class GoogleBloomFilter { public static void main(String[] args) { int dataSize = 10000000; double fpp = 0.001; //误判率 BloomFilter bloomFilter = BloomFilter.create(Funnels.integerFunnel(), dataSize, fpp); for (int i = 0; i < 10000000; i++) { bloomFilter.put(i); } //测试误判率 --测试不存在的数据 是一种概率性的问题 int t = 0; for (int i = 20000000; i < 30000000; i++) { if (bloomFilter.mightContain(i)){ t++; } //表示存在 } System.out.println("误判的个数: " + t); System.out.println("误判率 " + t / dataSize); } }