介绍
- 布隆过滤器的优点:
支持海量数据场景下高效判断元素是否存在
布隆过滤器存储空间小,并且节省空间,不存储数据本身,仅存储hash结果取模运算后的位标记
不存储数据本身,比较适合某些保密场景 - 布隆过滤器的缺点:
不存储数据本身,所以只能添加但不可删除,因为删掉元素会导致误判率增加
由于存在hash碰撞,匹配结果如果是“存在于过滤器中”,实际不一定存在
当容量快满时,hash碰撞的概率变大,插入、查询的错误率也就随之增加了 - 布隆过滤器中一个元素如果判断结果为存在的时候元素不一定存在,但是判断结果为不存在的时候则一定不存在。因此,布隆过滤器不适合那些对结果必须精准的应用场景。
配置参数初始化
yml配置文件
black:
data-quantity: 20000
false-rate: 100
配置类设置
@Component
public class BlackFilterConfig {
private Integer dataQuantity;
private Integer falseRate;
@Value("${black.data-quantity:20000}")
public void setDataQuantity(Integer dataQuantity) {
this.dataQuantity = dataQuantity;
}
@Value("${black.false-rate:100}")
public void setFalseRate(Integer falseRate) {
this.falseRate = falseRate;
}
public Integer getDataQuantity() {
return dataQuantity;
}
public Integer getFalseRate() {
return falseRate;
}
}
初始化创建布隆过滤器
@Component
@Order(value = 1)
public class InitRunner implements CommandLineRunner {
private static final Logger log = LoggerFactory.getLogger(InitRunner.class);
@Autowired
private BlackFilterConfig blackFilterConfig;
@Override
public void run(String... args) {
BlackFilterManager.getInstance().createApacheBloomFilter(blackFilterConfig.getDataQuantity(),blackFilterConfig.getFalseRate());
}
创建工具类
public class BlackFilterManager {
private static final Logger log = LoggerFactory.getLogger(BlackFilterManager.class);
private CountingBloomFilter filter = null;
private static BlackFilterManager instance = new BlackFilterManager();
private BlackFilterManager(){
}
public static BlackFilterManager getInstance(){
return instance;
}
public CountingBloomFilter getFilter() {
return filter;
}
public synchronized void createApacheBloomFilter(Integer dq,Integer fr){
if (dq == null || fr == null){
return;
}
if (BlackFilterManager.getInstance().filter != null){
return;
}
BigDecimal divide = new BigDecimal("1").divide(new BigDecimal(String.valueOf(fr * 10000)),6,BigDecimal.ROUND_HALF_UP);
float falsePosRate = divide.floatValue();
log.info("加载过滤器配置[black.false-rate] :{}w | 计算后 1/({} * 10000) = {}",fr,fr,divide);
int numMembers = dq * 10000;
log.info("加载过滤器配置[black.data-quantity] :{}w | 计算后 {} * 10000 = {}",dq,dq,numMembers);
int vectorSize = getOptimalBloomFilterSize(numMembers, falsePosRate);
int nbHash = getOptimalK(numMembers, vectorSize);
log.info("根据数据预估量:{} | 预定误判概率:{} ======》 得到初始化空间大小:{},Hash函数个数:{}",numMembers,divide,vectorSize,nbHash);
BlackFilterManager.getInstance().filter = new CountingBloomFilter(vectorSize,nbHash, Hash.MURMUR_HASH);
}
private static int getOptimalBloomFilterSize(int numRecords,float falsePosRate) {
int size = (int) (-numRecords * (float) Math.log(falsePosRate) / Math
.pow(Math.log(2), 2));
return size;
}
private static int getOptimalK(float numMembers, float vectorSize) {
return (int) Math.round(vectorSize / numMembers * Math.log(2));
}
}
使用
key = new Key(("数据").getBytes(StandardCharsets.UTF_8));
BlackFilterManager.getInstance().getFilter().add(key);
- 判断数据(true不一定存在,false一定不存在)
if (BlackFilterManager.getInstance().getFilter().membershipTest(key)) {
}