目前仅redis4.0以上版本支持
安装插件
git clone https://github.com/RedisBloom/RedisBloom.git
cd RedisBloom
make //编译 会生成一个rebloom.so文件
redis-server --loadmodule /path/to/rebloom.so
redis-cli -h 127.0.0.1 -p 6379
添加依赖,目前redis的jar包不支持布隆过滤器命令
<dependency>
<groupId>com.redislabs</groupId>
<artifactId>jrebloom</artifactId>
<version>1.0.1</version>
</dependency>
代码示例
private void addTobloomFilter(String userid, String[] hashs) {
Client client = null;
try {
// 布隆过滤器去重
Calendar cal = Calendar.getInstance();
int dayOfYear = cal.get(Calendar.DAY_OF_YEAR);
client = new Client("192.168.17.49", 6379, 100000, 1000);
String bloomKey = "rebloom:" + userid + "_" + dayOfYear;
client.addMulti(bloomKey, hashs);
} catch (Exception e) {
e.printStackTrace();
} finally {
client.close();
}
}
private boolean[] bloomFilter(String userid, String[] hashs) {
boolean[] exists = new boolean[hashs.length];
Client client = null;
try {
// 布隆过滤器去重
Calendar cal = Calendar.getInstance();
int dayOfYear = cal.get(Calendar.DAY_OF_YEAR);
client = new Client("192.168.17.49", 6379, 100000, 1000);
String bloomKey = "rebloom:" + userid;
try {
// 新建过滤器 没找到判断过滤器是否存在的接口
// 参数分别为(过滤器名称,预期数据量,容错率)
client.createFilter(bloomKey + "_" + dayOfYear, 1000000, 0.0001);
} catch (Exception e) {
// System.out.println(e.getMessage());
}
for (int k : getKeys()) {//7天内有任意一天存在则为存在
boolean[] e = client.existsMulti(bloomKey + "_" + k, hashs);
Stream.iterate(0, n -> n + 1)
.limit(hashs.length).forEach(index -> {
exists[index] = exists[index] || e[index];
});
}
} catch (Exception e) {
e.printStackTrace();
} finally {
client.close();
}
LOGGER.info("rebloom-hash是否被去重:");
LOGGER.info(JSON.toJSONString(hashs));
LOGGER.info(JSON.toJSONString(exists));
return exists;
}
实战中利用枚举完成单例并实现bloom
//枚举单例创建bloomClient
public enum BloomRedisPool {
INSTANCE;
private Client client = null;
BloomRedisPool() {
client = new Client(ConfigUtil.getConfProp("bloom.redis.host"), Integer.valueOf(ConfigUtil.getConfProp("bloom.redis.port")), 100000, 100);
}
public Client getClient() {
return client;
}
}
//向bloom过滤器中添加查重内容
public void addBloom(){
Client client = BloomRedisPool.INSTANCE.getClient();
client.add("vote_url_filter", "业务中需要查重的内容 如:url+account");
}
//BloomRedis 没有提供选库的操作 需要手动再0库创建一个bloom过滤器 vote_url_filter
public int checkRepeat(){
//布隆过滤器 进行查重返回[true,false.....] true--已存在 false-不存在
String url = "www.baidu.com"
Set<String> accounts = new HashSet<>();
accounts.add("account1");
accounts.add("account2");
String[] bloomKeys = accounts.stream().map(accountName->url+accountName).toArray(String[]::new);
Client client = BloomRedisPool.INSTANCE.getClient();
int usedNum = 0;
if (bloomKeys.length > 0) {
boolean[] bloomResult = client.existsMulti("vote_url_filter", bloomKeys);
for (int i = 0; i < bloomResult.length; i++) {
if (bloomResult[i]) {
usedNum++;
}
}
}
return usedNum;
}
布隆过滤器特点:
1.预期数据量越大占用内存越大,容错率越低占用内存越大
2.判断结果为不存在那肯定是不存在,判断结果为存在其实可能不存在
3.不支持删除元素