手写布隆过滤器
** 过滤器代码**
package com.ming.filter;
import com.google.common.hash.Funnels;
import com.google.common.hash.Hashing;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.dao.DataAccessException;
import org.springframework.data.redis.connection.RedisConnection;
import org.springframework.data.redis.core.RedisCallback;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Component;
import org.springframework.web.bind.annotation.PostMapping;
import javax.annotation.Nullable;
import javax.annotation.PostConstruct;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.List;
@Component
public class BloomFilter {
//要存储的数据量(数据库表中id大概个数,建议设置大一些)
private static long expectedInsertions = 200000;
//所能容忍错误率(错判率:所有误判请求都会打到数据库,一般千分之一是可以接受的)
private static double fpp = 0.001F;
@Autowired
private RedisTemplate<String, Object> redisTemplate;
//bit数组长度
private long numBits;
//hash函数个数
private int numHashFunctions;
public long getExpectedInsertions() {
return expectedInsertions;
}
public void setExpectedInsertions(long expectedInsertions) {
this.expectedInsertions = expectedInsertions;
}
public double getFpp() {
return fpp;
}
public void setFpp(double fpp) {
this.fpp = fpp;
}
@PostConstruct
public void init() {
this.numBits = optimalNumOfBits(expectedInsertions, fpp);
this.numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits);
}
private int optimalNumOfHashFunctions(long n, long m) {
return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
}
private long optimalNumOfBits(long n, double p) {
if (p == 0) {
p = Double.MIN_VALUE;
}
return (long) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
}
/**
* 判断keys是否存在于集合zhao中
*/
public boolean isExist(String key) {
long[] indexs = getIndexs(key);
List list = redisTemplate.executePipelined(new RedisCallback<Object>() {
@Nullable
@Override
public Object doInRedis(RedisConnection redisConnection) throws DataAccessException {
redisConnection.openPipeline();
for (long index : indexs) {
redisConnection.getBit("zhao".getBytes(), index);
}
redisConnection.close();
return null;
}
});
return !list.contains(false);
}
/**
* 将key存入redis bitmap
*/
public void put(String key) {
long[] indexs = getIndexs(key);
redisTemplate.executePipelined(new RedisCallback<Object>() {
@Nullable
@Override
public Object doInRedis(RedisConnection redisConnection) throws DataAccessException {
try {
redisConnection.openPipeline();
for (long index : indexs) {
redisConnection.setBit("zhao".getBytes(StandardCharsets.UTF_8), index, true);
}
} finally {
redisConnection.close();
return null;
}
}
});
}
/**
* 根据key获取bitmap下标方法来自guava
*/
public long[] getIndexs(String key) {
long hash1 = hash(key);
long hash2 = hash1 >>> 16;
long[] result = new long[numHashFunctions];
for (int i = 0; i < numHashFunctions; i++) {
long combinedHash = hash1 + i * hash2;
if (combinedHash < 0) {
combinedHash = ~combinedHash;
}
result[i] = combinedHash % numBits;
}
return result;
}
/**
* 获取一个hash值 方法来自guava
*/
public long hash(String key) {
Charset charset = Charset.forName("UTF-8");
return Hashing.murmur3_128().hashObject(key, Funnels.stringFunnel(charset)).asLong();
}
}
谷歌guava工具类对布隆过滤器的实现
1 引入jar包
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>24.0-jre</version>
</dependency>
2 使用过滤器
package com.ming.service;
import com.google.common.base.Charsets;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnels;
import com.ming.mapper.SkuMapper;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.List;
@Service
public class SkuService {
@Autowired
private SkuMapper skuMapper;
public List<String> findAll() {
List<String> ids = skuMapper.findAllId();
//参数1:Funnels.stringFunnel(Charsets.UTF_8) 无需理会,如果id是Integer类型,则修改为Funnels.integerFunnel()即可
//参数2:预计加入至集合的id数量
//参数3:错判率
BloomFilter<String> bloomFilter = BloomFilter.create(Funnels.stringFunnel(Charsets.UTF_8), 100000, 0.0001);
for (String id : ids) {
bloomFilter.put(id);
}
System.out.println(bloomFilter.mightContain("3306"));
System.out.println(bloomFilter.mightContain("6379"));
System.out.println(bloomFilter.mightContain("8080"));
return ids;
}
}
不建议使用该方式,它的数组和哈希算法是一起的,存放在内存中,关机即逝。而且这种方式和redis并无关联。第一种手写过滤器可以和redis数据类型bitmap进行完美交互。