一、布隆过滤器
简介
布隆过滤器(Bloom Filter)的核心实现是一个超大的位数组和几个哈希函数。假设位数组的长度为m,哈希函数的个数为k,以上图为例,具体的操作流程:假设集合里面有3个元素{x, y, z},哈希函数的个数为3。首先将位数组进行初始化,将里面每个位都设置为0。对于集合里面的每一个元素,将元素依次通过3个哈希函数进行映射,每次映射都会产生一个哈希值,这个值对应位数组上面的一个点,然后将位数组对应的位置标记为1。查询W元素是否存在集合中的时候,同样的方法将W通过哈希映射到位数组上的3个点。如果3个点的其中有一个点不为1,则可以判断该元素一定不存在集合中。反之,如果3个点都为1,则该元素可能存在集合中。注意:此处不能判断该元素是否一定存在集合中,可能存在一定的误判率。可以从图中可以看到:假设某个元素通过映射对应下标为4,5,6这3个点。虽然这3个点都为1,但是很明显这3个点是不同元素经过哈希得到的位置,因此这种情况说明元素虽然不在集合中,也可能对应的都是1,这是误判率存在的原因。
特点:巧妙的使用hash算法和bitmap位存储的方式,极大的节约了空间。
由于主要用的是hash算法的特点,所有满足和hash算法相同的规则:当过滤器返回 true时(表示很有可能该值是存在的),有一定概率是误判的,即可能不存在;当过滤器返回false时(表示确定不存在),是可以完全相信的。
我们换个数据的角度来看规则:当数据添加到布隆过滤器中时,对该数据的查询一定会返回true;当数据没有插入过滤器时,对该数据的查询大部分情况返回false,但有小概率返回true,也就是误判。
我们知道它最终满足的规则和hash的规则是一致的,只是组合了多个hash,使用了bitmap来存储,大大优化了存储的空间和判断的效率。
布隆过滤器安装(redis 服务器大于等于4.X)
2.下载地址:
wget https://github.com/RedisLabsModules/rebloom/archive/v2.2.2.tar.gz
解压:tar -zxvf v2.2.2.tar.gz
编译:cd RedisBloom-2.2.2 make
加载模块 loadmodule /mnt/RedisBloom-2.2.2/redisbloom.so
重新启动redis 服务器
备注: 报错 Could not get a resource from the pool
解决方法:
vim redis.conf
注释 #bind 127.0.0.1
protected-mode yes 改为protected-mode no
springboot 整合redis + 布隆过滤器
Redis 配置
package com.redis.rebloom; import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.PropertyAccessor; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Charsets; import com.google.common.hash.Funnel; import org.springframework.cache.CacheManager; import org.springframework.cache.annotation.EnableCaching; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.data.redis.cache.RedisCacheManager; import org.springframework.data.redis.connection.RedisConnectionFactory; import org.springframework.data.redis.core.RedisTemplate; import org.springframework.data.redis.core.StringRedisTemplate; import org.springframework.data.redis.serializer.Jackson2JsonRedisSerializer; @Configuration @EnableCaching public class RedisConfig { @Bean public CacheManager cacheManager(RedisConnectionFactory connectionFactory) { RedisCacheManager rcm = RedisCacheManager.create(connectionFactory); return rcm; } @Bean public RedisTemplate<String, Object> redisTemplate(RedisConnectionFactory factory) { RedisTemplate<String, Object> redisTemplate = new RedisTemplate<String, Object>(); redisTemplate.setConnectionFactory(factory); Jackson2JsonRedisSerializer jackson2JsonRedisSerializer = new Jackson2JsonRedisSerializer(Object.class); ObjectMapper om = new ObjectMapper(); om.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY); om.enableDefaultTyping(ObjectMapper.DefaultTyping.NON_FINAL); jackson2JsonRedisSerializer.setObjectMapper(om); //序列化设置 ,这样计算是正常显示的数据,也能正常存储和获取 redisTemplate.setKeySerializer(jackson2JsonRedisSerializer); redisTemplate.setValueSerializer(jackson2JsonRedisSerializer); redisTemplate.setHashKeySerializer(jackson2JsonRedisSerializer); redisTemplate.setHashValueSerializer(jackson2JsonRedisSerializer); return redisTemplate; } @Bean public StringRedisTemplate stringRedisTemplate(RedisConnectionFactory factory) { StringRedisTemplate stringRedisTemplate = new StringRedisTemplate(); stringRedisTemplate.setConnectionFactory(factory); return stringRedisTemplate; } //初始化布隆过滤器,放入到spring容器里面 @Bean public BloomFilterHelper<String> initBloomFilterHelper() { return new BloomFilterHelper<>((Funnel<String>) (from, into) -> into.putString(from, Charsets.UTF_8).putString(from, Charsets.UTF_8), 1000000, 0.01); } }
package com.redis.rebloom; import com.google.common.base.Preconditions; import com.google.common.hash.Funnel; import com.google.common.hash.Hashing; public class BloomFilterHelper<T> { private int numHashFunctions; private int bitSize; private Funnel<T> funnel; public BloomFilterHelper(Funnel<T> funnel, int expectedInsertions, double fpp) { Preconditions.checkArgument(funnel != null, "funnel不能为空"); this.funnel = funnel; // 计算bit数组长度 bitSize = optimalNumOfBits(expectedInsertions, fpp); // 计算hash方法执行次数 numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, bitSize); } public int[] murmurHashOffset(T value) { int[] offset = new int[numHashFunctions]; long hash64 = Hashing.murmur3_128().hashObject(value, funnel).asLong(); int hash1 = (int) hash64; int hash2 = (int) (hash64 >>> 32); for (int i = 1; i <= numHashFunctions; i++) { int nextHash = hash1 + i * hash2; if (nextHash < 0) { nextHash = ~nextHash; } offset[i - 1] = nextHash % bitSize; } return offset; } /** * 计算bit数组长度 */ private int optimalNumOfBits(long n, double p) { if (p == 0) { // 设定最小期望长度 p = Double.MIN_VALUE; } int sizeOfBitArray = (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2))); return sizeOfBitArray; } /** * 计算hash方法执行次数 */ private int optimalNumOfHashFunctions(long n, long m) { int countOfHash = Math.max(1, (int) Math.round((double) m / n * Math.log(2))); return countOfHash; } }
package com.redis.rebloom; import com.google.common.base.Preconditions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.redis.core.RedisTemplate; import org.springframework.stereotype.Component; @Component public class RedisBloomFilter { @Autowired private RedisTemplate redisTemplate; /*** * 根据给定的布隆过滤器添加值 * @param bloomFilterHelper * @param key * @param value * @param <T> */ public <T> void addByBloomFilter(BloomFilterHelper<T> bloomFilterHelper,String key, T value){ Preconditions.checkArgument(bloomFilterHelper != null ,"bloomFilterHelper 不能为空"); int[] offset = bloomFilterHelper.murmurHashOffset(value); for (int i : offset){ System.out.println("key: "+ key + " "+"value :"+i); redisTemplate.opsForValue().setBit(key, i ,true); } } public <T> boolean includeByBloomFilter(BloomFilterHelper<T> bloomFilterHelper, String key , T value){ Preconditions.checkArgument(bloomFilterHelper != null, "bloomFilterHelper不能为空"); int[] offset = bloomFilterHelper.murmurHashOffset(value); for (int i : offset) { System.out.println("key : " + key + " " + "value : " + i); if (!redisTemplate.opsForValue().getBit(key, i)) { return false; } } return true; } }
package com.redis.rebloom; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.redis.core.RedisTemplate; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; @RestController public class RedisController { @Autowired private RedisBloomFilter redisBloomFilter; @Autowired private BloomFilterHelper bloomFilterHelper; @Autowired private RedisTemplate redisTemplate; @RequestMapping(value = "/ad") public String addBloomFiltera(String id){ try { redisTemplate.opsForValue().setBit("v1", Long.parseLong(id),true); }catch (Exception e){ e.printStackTrace(); return "添加失败"; } return "添加成功"; } /*** * 添加到redis * @param id * @return */ @RequestMapping(value = "/add") public String addBloomFilter(String id){ try { redisBloomFilter.addByBloomFilter(bloomFilterHelper,"bloom",id); }catch (Exception e){ e.printStackTrace(); return "添加失败"; } return "添加成功"; } /*** * 查询是否存在 * @param id * @return */ @RequestMapping(value = "/check") public boolean checkBloomFilter(String id){ boolean b = redisBloomFilter.includeByBloomFilter(bloomFilterHelper,"bloom",id); return b; } }
@Configuration
@EnableCaching
public class RedisConfig @Bean
public CacheManager cacheManager(RedisConnectionFactory connectionFactory) {
RedisCacheManager rcm = RedisCacheManager.create(connectionFactory);
return rcm;
}
@Bean
public RedisTemplate<String, Object> redisTemplate(RedisConnectionFactory factory) {
RedisTemplate<String, Object> redisTemplate = new RedisTemplate<String, Object>();
redisTemplate.setConnectionFactory(factory);
Jackson2JsonRedisSerializer jackson2JsonRedisSerializer = new
Jackson2JsonRedisSerializer(Object.class);
ObjectMapper om = new ObjectMapper();
om.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY);
om.enableDefaultTyping(ObjectMapper.DefaultTyping.NON_FINAL);
jackson2JsonRedisSerializer.setObjectMapper(om);
//序列化设置 ,这样计算是正常显示的数据,也能正常存储和获取
redisTemplate.setKeySerializer(jackson2JsonRedisSerializer);
redisTemplate.setValueSerializer(jackson2JsonRedisSerializer);
redisTemplate.setHashKeySerializer(jackson2JsonRedisSerializer);
redisTemplate.setHashValueSerializer(jackson2JsonRedisSerializer);
return redisTemplate;
}
@Bean
public StringRedisTemplate stringRedisTemplate(RedisConnectionFactory factory) {
StringRedisTemplate stringRedisTemplate = new StringRedisTemplate();
stringRedisTemplate.setConnectionFactory(factory);
return stringRedisTemplate;
}
//初始化布隆过滤器,放入到spring容器里面
@Bean
public BloomFilterHelper<String> initBloomFilterHelper() {
return new BloomFilterHelper<>((Funnel<String>) (from, into) -> into.putString(from, Charsets.UTF_8).putString(from, Charsets.UTF_8), 1000000, 0.01);
}
}
布隆过滤器工具类
public class BloomFilterHelper<T> {
private int numHashFunctions;
private int bitSize;
private Funnel<T> funnel;
public BloomFilterHelper(Funnel<T> funnel, int expectedInsertions, double fpp) {
Preconditions.checkArgument(funnel != null, "funnel不能为空");
this.funnel = funnel;
// 计算bit数组长度
bitSize = optimalNumOfBits(expectedInsertions, fpp);
// 计算hash方法执行次数
numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, bitSize);
}
public int[] murmurHashOffset(T value) {
int[] offset = new int[numHashFunctions];
long hash64 = Hashing.murmur3_128().hashObject(value, funnel).asLong();
int hash1 = (int) hash64;
int hash2 = (int) (hash64 >>> 32);
for (int i = 1; i <= numHashFunctions; i++) {
int nextHash = hash1 + i * hash2;
if (nextHash < 0) {
nextHash = ~nextHash;
}
offset[i - 1] = nextHash % bitSize;
}
return offset;
}
/**
* 计算bit数组长度
*/
private int optimalNumOfBits(long n, double p) {
if (p == 0) {
// 设定最小期望长度
p = Double.MIN_VALUE;
}
int sizeOfBitArray = (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
return sizeOfBitArray;
}
/**
* 计算hash方法执行次数
*/
private int optimalNumOfHashFunctions(long n, long m) {
int countOfHash = Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
return countOfHash;
}
}
布隆过滤器工具类
@Component
public class RedisBloomFilter {
@Autowired
private RedisTemplate redisTemplate;
/***
* 根据给定的布隆过滤器添加值
* @param bloomFilterHelper
* @param key
* @param value
* @param <T>
*/
public <T> void addByBloomFilter(BloomFilterHelper<T> bloomFilterHelper,String key, T value){
Preconditions.checkArgument(bloomFilterHelper != null ,"bloomFilterHelper 不能为空");
int[] offset = bloomFilterHelper.murmurHashOffset(value);
for (int i : offset){
System.out.println("key: "+ key + " "+"value :"+i);
redisTemplate.opsForValue().setBit(key, i ,true);
}
}
public <T> boolean includeByBloomFilter(BloomFilterHelper<T> bloomFilterHelper, String key , T value){
Preconditions.checkArgument(bloomFilterHelper != null, "bloomFilterHelper不能为空");
int[] offset = bloomFilterHelper.murmurHashOffset(value);
for (int i : offset) {
System.out.println("key : " + key + " " + "value : " + i);
if (!redisTemplate.opsForValue().getBit(key, i)) {
return false;
}
}
return true;
}
}