布隆过滤器特点:
1.过滤器校验值存在,结果不一定存在;
2.过滤器校验值不存在,结果一定不存在;
布隆过滤器的数据结构是bit类型的map,值通过计算hash值,获取bit数组中对应的下标,hash值和bit数组下标之间有映射关系。通过hash值与bit数组的映射关系,来判断校验对象是否存在于过滤器中;
通俗来讲,
布隆过滤器放入数据相当于在纸上画一条线,布隆过滤器校验数据相当于拿一条线在纸上比对是否有重合;当所画线条足够多的情况下,比对线条时重合点也会足够多,点多成线。所以过滤器校验值存在,结果可能会不准;过滤器校验值不存在的,结果一定是不存在。
布隆过滤器工具类
public class BloomFilterHelper<T> {
/** hash函数执行次数 */
private int numHashFunctions;
/** bitSize 数组大小 */
private int bitSize;
private Funnel<T> funnel;
public BloomFilterHelper(Funnel<T> funnel, int expectedInsertions, double fpp) {
Preconditions.checkArgument(funnel != null, "funnel不能为空");
this.funnel = funnel;
bitSize = optimalNumOfBits(expectedInsertions, fpp);
numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, bitSize);
}
/**
* hash计算
* 计算多次hash对应的下标
* */
public int[] hashOffset(T value) {
int[] offset = new int[numHashFunctions];
long hash64 = Hashing.murmur3_128().hashObject(value, funnel).asLong();
int hash1 = (int) hash64;
int hash2 = (int) (hash64 >>> 32);
for (int i = 1; i <= numHashFunctions; i++) {
int nextHash = hash1 + i * hash2;
if (nextHash < 0) {
nextHash = ~nextHash;
}
offset[i - 1] = nextHash % bitSize;
}
return offset;
}
/**
* 计算最佳的bit数组长度
* insertions 需要插入的数据总数
* errorRatio 误判率
*/
private int optimalNumOfBits(long insertions, double errorRatio) {
if (errorRatio == 0) {
errorRatio = Double.MIN_VALUE;
}
return (int) (-insertions * Math.log(errorRatio) / (Math.log(2) * Math.log(2)));
}
/**
* 计算hash方法执行次数
*/
private int optimalNumOfHashFunctions(long n, long m) {
return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
}
}
public class BloomfilterService {
@Autowired
private RedisTemplate redisTemplate;
/**
* 根据给定的布隆过滤器添加值
*/
public <T> void addByBloomFilter(BloomFilterHelper<T> bloomFilterHelper, String key, T value) {
Preconditions.checkArgument(bloomFilterHelper != null, "bloomFilterHelper不能为空");
int[] offset = bloomFilterHelper.hashOffset(value);
for (int i : offset) {
redisTemplate.opsForValue().setBit(key, i, true);
}
long expire = redisTemplate.opsForValue().getOperations().getExpire(key);//此方法返回单位为秒过期时长
//设置过期时间
if (expire == -1 ){
redisTemplate.expire(key,1, TimeUnit.HOURS);
}
}
/**
* 根据给定的布隆过滤器判断值是否存在
*/
public <T> boolean includeByBloomFilter(BloomFilterHelper<T> bloomFilterHelper, String key, T value) {
Preconditions.checkArgument(bloomFilterHelper != null, "bloomFilterHelper不能为空");
int[] offset = bloomFilterHelper.hashOffset(value);
for (int i : offset) {
if (!redisTemplate.opsForValue().getBit(key, i)) {
return false;
}
}
return true;
}
}
public class BloomModel {
private String key;
private String value;
}
使用测试
@Autowired
private BloomfilterService bloomfilterService;
private final BloomFilterHelper<BloomModel> modelBloomFilterHelper = new BloomFilterHelper<>((Funnel<BloomModel>)
(from, into) -> into.putString(from.getKey(), Charsets.UTF_8).putString(from.getValue(), Charsets.UTF_8),
100000, 0.000001);
@Override
public String bloomTest(String values) {
BloomModel model = new BloomModel();
model.setKey("FLOW_VALUES");
model.setValue(valus);
if(bloomfilterService.includeByBloomFilter(modelBloomFilterHelper,"FLOW_VALUES",model)){
return "过滤器有值---------------"+values;
} else{
return "过滤器无职+++++++++++++++"+values;
}
}
@Override
public String bloomAdd(String iccid) {
BloomModel model = new BloomModel();
model.setKey("FLOW_VALUES");
model.setValue(iccid);
bloomfilterService.addByBloomFilter(modelBloomFilterHelper,"FLOW_VALUES",model);
return "添加成功";
}