什么是布隆过滤器?
以下摘自百度百科
布隆过滤器(BloomFilter)是1970年由布隆提出的。它实际上是一个很长的二进制向量和一系列随机映射函数。布隆过滤器可以用于检索一个元素是否在一个集合中。它的优点是空间效率和查询时间都比一般的算法要好的多,缺点是有一定的误识别率和删除困难。
以下为借鉴其他资料实现布隆过滤器代码
package com.zmx.common.common.utils.bitmap;
import java.util.BitSet;
import java.util.concurrent.atomic.AtomicInteger;
/**
* @Description: 自定义实现布隆过滤器
* @ClassName: BloomFileter
* @Author zhaomxr
* @Date 2021-12-02 14:18
*/
public class BloomFileter {
/**
* 布隆过滤器最大容量
*/
private int size;
/**
* 存储散列值列表
*/
private BitSet bitSet;
/**
* 清除比例,若设置0.9,则列表容量达到90%就会清除原有数据,重新放入
*/
private double clearRate;
private boolean isClearRate = false;
/**
* 当前使用hash位数,位数越多误判概率越低,内存占用就会越多
*/
private int[] seeds;
/**
* 当前已存储数据量
*/
private AtomicInteger count = new AtomicInteger(0);
public BloomFileter(){}
public BloomFileter(int capacity, double clearRate) {
this(SeedsEnum.MIDDLE, capacity, clearRate);
}
public BloomFileter(SeedsEnum seedsEnum, int capacity, double clearRate) {
this.size = seedsEnum.seeds.length * capacity;
this.seeds = seedsEnum.seeds;
this.bitSet = new BitSet(this.size);
this.clearRate = clearRate;
this.isClearRate = clearRate > 0;
}
/**
* 新增元素
* @param element
*/
private void add(String element){
//检查是否需要清除旧数据
checkNeedClear();
for (int i = 0;i < seeds.length; i++){
//计算hash值
int hashCode = hash(element, seeds[i]);
setBitSet(hashCode);
}
}
/**
* 判断元素是否已存在
* @param element
* @return
*/
private boolean contain(String element) {
boolean exit = true;
for (int i = 0; i < seeds.length; i++) {
//计算hash值
int hashCode = hash(element, seeds[i]);
if (!bitSet.get(hashCode)) {
exit = false;
break;
}
}
return exit;
}
/**
* 如果元素存在则返回true,不存在则增加并返回false
* @param element
* @return
*/
private boolean addIfNoExit(String element) {
//检查是否需要清除旧数据
checkNeedClear();
boolean exit = true;
int[] hashCodes = new int[seeds.length];
for (int i = 0; i < seeds.length; i++) {
//计算hash值
int hashCode = hash(element, seeds[i]);
hashCodes[i] = hashCode;
//重点说明下这段代码,只要有一次hash不存在,就说明这个元素不存在。
//遍历整个hash判断,如果存在就去bitSet中判断当前hash是否为true,如果为fale,说明不存在,
//那么当前hash值及之前都要存入bitSet中,if(exit)会自动将剩余hash值放入bitSet中
//如此便可完成存在返回true,不存在就放入bitSet中,返回false
if (exit) {
if (!bitSet.get(hashCode)) {
exit = false;
//补充原有
for (int j = 0; j < i + 1; j++) {
setBitSet(hashCodes[j]);
}
}
} else {
setBitSet(hashCode);
}
}
return exit;
}
private void checkNeedClear() {
if (isClearRate) {
if (usedRate() >= clearRate) {
synchronized (this) {
if (usedRate() >= clearRate) {
bitSet.clear();
count.set(0);
}
}
}
}
}
private void setBitSet(int hashCode) {
//总数量原则自增
count.incrementAndGet();
bitSet.set(hashCode, true);
}
private double usedRate() {
return (double) this.count.intValue() / this.size;
}
private int hash(String element, int seed) {
char[] chars = element.toCharArray();
int hashCode = 0;
for (int i = 0; i < chars.length; i++) {
hashCode = i * hashCode + chars[i];
}
hashCode = hashCode * seed % size;
// 防止溢出变成负数
return Math.abs(hashCode);
}
public enum SeedsEnum {
/**
* 每个字符串分配4个位
*/
VERY_SMALL(new int[]{2, 3, 5, 7}),
/**
* 每个字符串分配8个位
*/
SMALL(new int[]{2, 3, 5, 7, 11, 13, 17, 19}),
/**
* 每个字符串分配16个位
*/
MIDDLE(new int[]{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53}),
/**
* 每个字符串分配32个位
*/
HIGH(new int[]{2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
101, 103, 107, 109, 113, 127, 131});
private int[] seeds;
private SeedsEnum(int[] seeds) {
this.seeds = seeds;
}
public int[] getSeeds() {
return seeds;
}
public void setSeeds(int[] seeds) {
this.seeds = seeds;
}
}
public static void main(String[] args) {
BloomFileter bloomFileter = new BloomFileter(1100000, 0.9);
long addStart = System.currentTimeMillis();
bloomFileter.add("爱");
bloomFileter.add("意");
bloomFileter.add("随");
bloomFileter.add("风");
bloomFileter.add("起");
System.out.println(bloomFileter.addIfNoExit("风"));
System.out.println(bloomFileter.addIfNoExit("起"));
System.out.println(bloomFileter.addIfNoExit("意"));
System.out.println(bloomFileter.addIfNoExit("难"));
System.out.println(bloomFileter.addIfNoExit("平"));
for (int i = 0; i < 1000000; i++) {
bloomFileter.add(String.valueOf(i));
}
System.out.println("存储元素用时:" + "" + (System.currentTimeMillis() - addStart));
System.out.println("----------------over----------------");
System.out.println("bloomFileter占用内存:" + bloomFileter.bitSet.size() / 1024 +"KB");
System.out.println("bloomFileter占用内存:" + bloomFileter.bitSet.size() / (1024 * 1024) +"MB");
System.out.println("bloomFileter占用内存:" + bloomFileter.bitSet.size() / (1024 * 1024 * 1024) +"MB");
System.out.println("------------------------------------");
System.out.println(bloomFileter.addIfNoExit("难"));
System.out.println(bloomFileter.addIfNoExit("平"));
}
}