众所周知,Guava提供的BloomFilter是不支持删除操作的,本文简单实现了一个支持删除的布隆过滤器,实现的原理很简单,高手绕行,通过将删除的数据暂存,并加入判断,同时通过定时器和阀值控制过滤器的重建。
一、实现源码
基本代码如下,代码的关键地方在于BloomFilter的重建逻辑,由于guava的API中全都是内部类,访问权限严格,导致外部无法容易调用内部对象。
package com.zte.rate.filter;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.BloomFilterStrategies;
import com.google.common.hash.Funnel;
import com.google.common.hash.LongAddable;
import com.google.common.hash.LongAddables;
import java.lang.reflect.Field;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLongArray;
import java.util.function.Supplier;
public final class ForceBloomFilter<T> {
private static final ScheduledExecutorService EXE = Executors.newScheduledThreadPool(2);
private static final int REBUILD_COUNT = 5000;
private static final int MAX_DELETE_COUNT = 10000;
private Set<T> delElements = new HashSet<>(MAX_DELETE_COUNT);
private Supplier<List<T>> dataSupply;
private BloomFilter<T> bloomFilter;
private ForceBloomFilter(BloomFilter<T> bloomFilter, Supplier<List<T>> dataSupply) {
this.bloomFilter = bloomFilter;
this.dataSupply = dataSupply;
dataSupply.get().forEach(this::put);
EXE.schedule(() -> {
if (delElements.size() > MAX_DELETE_COUNT * 0.75) {
dataSupply.get().forEach(this::put);
delElements.clear();
}
}, 1, TimeUnit.HOURS);
}
public static <T> ForceBloomFilter<T> create(
Funnel<? super T> funnel, long expectedInsertions, double fpp, Supplier<List<T>> dataSupply) {
final BloomFilter<T> bloomFilter = BloomFilter.create(funnel, (long) expectedInsertions, fpp);
return new ForceBloomFilter<>(bloomFilter, dataSupply);
}
private boolean put(T object) {
return this.bloomFilter.put(object);
}
public boolean remove(T object) {
final boolean add = delElements.add(object);
if (delElements.size() >= REBUILD_COUNT) {
//执行重建
rebuild();
dataSupply.get().forEach(this::put);
delElements.clear();
}
return add;
}
private void rebuild() throws Exception {
//TODO:::
}
public boolean mightContain(T object) {
if (delElements.contains(object)) {
return false;
}
return this.bloomFilter.mightContain(object);
}
}
虽然BloomFilter提供了copy函数,但copy出来的仍然保留原来的bit值,实际要只要重置一下bit值,即可以完成所谓重建。但LongAddable同时不对外开放。
二、布隆重建
本节解决上文中善于重建部分的问题,主要需要通过反射强吻一些属性并进行重置对应的值。
private synchronized void rebuild() {
try {
final Field bitsField = this.bloomFilter.getClass().getDeclaredField("bits");
bitsField.setAccessible(true);
final Object bitsVal = bitsField.get(this.bloomFilter);
final Field dataField = bitsVal.getClass().getDeclaredField("data");
dataField.setAccessible(true);
final AtomicLongArray dataVal = (AtomicLongArray) dataField.get(bitsVal);
for (int i = 0; i < dataVal.length(); ++i) {
dataVal.set(i, 0);
}
final Field bitCountField = bitsVal.getClass().getDeclaredField("bitCount");
bitCountField.setAccessible(true);
final Constructor<?> constructor = bitCountField.get(bitsVal).getClass().getConstructor();
constructor.setAccessible(true);
final Object newBitCountVal = constructor.newInstance();
bitCountField.set(bitsVal, newBitCountVal);
} catch (Exception e) {
throw new RuntimeException("ReBuild Bloom Filter error");
}
}
三、并发控制
当过滤器在重建时,理论上正在增删数据,是需要阻塞的,而增删数据本身是可以并发同时进行的。所以读写锁则可以很好地解决该问题。这里提供完成的代码示例:
public final class ForceBloomFilter<T> {
private ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
/**
* TODO::定义线程名
*/
private static final ScheduledExecutorService EXE = Executors.newScheduledThreadPool(2);
/**
* 当删除数据量达到该数时主动解决过滤器重建
*/
private static final int REBUILD_COUNT = 5000;
/**
* 过滤器总容量
*/
private static final int MAX_DELETE_COUNT = 10000;
private Set<T> delElements = new HashSet<>(MAX_DELETE_COUNT);
private Supplier<List<T>> dataSupply;
private BloomFilter<T> bloomFilter;
private ForceBloomFilter(BloomFilter<T> bloomFilter, Supplier<List<T>> dataSupply) {
this.bloomFilter = bloomFilter;
this.dataSupply = dataSupply;
dataSupply.get().forEach(this::put);
EXE.scheduleAtFixedRate(() -> {
//定时任务容量达REBUILD_COUNT 3/4进行过滤器重建
if (delElements.size() >= REBUILD_COUNT * 0.75) {
rebuild();
dataSupply.get().forEach(this::put);
delElements.clear();
}
}, 1, 1, TimeUnit.HOURS);
}
public static <T> ForceBloomFilter<T> create(
Funnel<? super T> funnel, long expectedInsertions, double fpp, Supplier<List<T>> dataSupply) {
final BloomFilter<T> bloomFilter = BloomFilter.create(funnel, expectedInsertions, fpp);
return new ForceBloomFilter<>(bloomFilter, dataSupply);
}
private boolean put(T object) {
lock.readLock().lock();
final boolean put = this.bloomFilter.put(object);
lock.readLock().unlock();
return put;
}
public boolean remove(T object) {
lock.readLock().lock();
final boolean add = delElements.add(object);
lock.readLock().unlock();
if (delElements.size() != 0 && delElements.size() > REBUILD_COUNT) {
//执行重建
rebuild();
dataSupply.get().forEach(this::put);
delElements.clear();
}
return add;
}
private void rebuild() {
try {
lock.writeLock().lock();
final Field bitsField = this.bloomFilter.getClass().getDeclaredField("bits");
bitsField.setAccessible(true);
final Object bitsVal = bitsField.get(this.bloomFilter);
final Field dataField = bitsVal.getClass().getDeclaredField("data");
dataField.setAccessible(true);
final AtomicLongArray dataVal = (AtomicLongArray) dataField.get(bitsVal);
for (int i = 0; i < dataVal.length(); ++i) {
dataVal.set(i, 0);
}
final Field bitCountField = bitsVal.getClass().getDeclaredField("bitCount");
bitCountField.setAccessible(true);
final Constructor<?> constructor = bitCountField.get(bitsVal).getClass().getConstructor();
constructor.setAccessible(true);
final Object newBitCountVal = constructor.newInstance();
bitCountField.set(bitsVal, newBitCountVal);
} catch (Exception e) {
throw new RuntimeException("ReBuild Bloom Filter error");
} finally {
lock.writeLock().unlock();
}
}
public boolean mightContain(T object) {
if (delElements.contains(object)) {
return false;
}
return this.bloomFilter.mightContain(object);
}
}
四、测试
测试当删除数据量达到该数时主动解决过滤器重建
package com.zte.rate.filter;
import com.google.common.collect.Lists;
import com.google.common.hash.Funnels;
import org.springframework.util.Assert;
import java.nio.charset.Charset;
import java.util.List;
/**
* @Author 10184538
* @Date 2022/8/30 15:09
**/
public class TestBloomFilter {
public static int count = 0;
public static List<String> getData() {
count++;
if (count == 1) {
return Lists.newArrayList("A", "B", "C", "D");
} else {
return Lists.newArrayList("B", "F", "D", "E", "F");
}
}
public static void main(String[] args) {
final ForceBloomFilter<String> forceBloomFilter = ForceBloomFilter.create(
Funnels.stringFunnel(Charset.defaultCharset()), 10000, 0.1,
TestBloomFilter::getData);
boolean isContainsA = forceBloomFilter.mightContain("A");
Assert.isTrue(isContainsA, "Exist A");
boolean isContainsE = forceBloomFilter.mightContain("E");
Assert.isTrue(!isContainsE, "Not Exist E");
//移除A
forceBloomFilter.remove("A");
isContainsA = forceBloomFilter.mightContain("A");
Assert.isTrue(!isContainsA, "Not Exist A");
//移除B,触发重建(方便测试修改REBUILD_COUNT=2)
forceBloomFilter.remove("B");
isContainsE = forceBloomFilter.mightContain("E");
Assert.isTrue(isContainsE, "Exist E");
isContainsA = forceBloomFilter.mightContain("A");
Assert.isTrue(!isContainsA, "Not Exist A");
System.out.println("end");
}
}
测试 定时任务容量达REBUILD_COUNT 3/4进行过滤器重建
public static void main(String[] args) throws InterruptedException {
final ForceBloomFilter<String> forceBloomFilter = ForceBloomFilter.create(
Funnels.stringFunnel(Charset.defaultCharset()), 10000, 0.1,
TestBloomFilter::getData);
boolean isContainsA = forceBloomFilter.mightContain("A");
Assert.isTrue(isContainsA, "Exist A");
boolean isContainsE = forceBloomFilter.mightContain("E");
Assert.isTrue(!isContainsE, "Not Exist E");
//移除A
forceBloomFilter.remove("A");
isContainsA = forceBloomFilter.mightContain("A");
Assert.isTrue(!isContainsA, "Not Exist A");
//移除B,触发定时重建(方便测试修改REBUILD_COUNT=4 3/4 =3) 定时时间改为10, 10, TimeUnit.SECONDS
forceBloomFilter.remove("B");
forceBloomFilter.remove("C");
forceBloomFilter.remove("D");
Thread.sleep(20000);
isContainsE = forceBloomFilter.mightContain("E");
Assert.isTrue(isContainsE, "Exist E");
isContainsA = forceBloomFilter.mightContain("A");
Assert.isTrue(!isContainsA, "Not Exist A");
System.out.println("end");
}