记录一次布隆过滤器与redis相结合的结果
需要导入的pom文件
<dependency> <groupId>redis.clients</groupId> <artifactId>jedis</artifactId> <version>2.9.0</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-pool2</artifactId> <version>2.6.2</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>27.1-jre</version> </dependency>
import com.base.constants.ConfigProperty; import com.google.common.base.Charsets; import com.google.common.primitives.Longs; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPool; import redis.clients.jedis.JedisPoolConfig; import redis.clients.jedis.Pipeline; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import static com.google.common.hash.Funnels.stringFunnel; import static com.google.common.hash.Hashing.murmur3_128;
/** * 已解决的问题 * 1.使用redis进行持久化操作 * 2.value值超过redis所支持的最大512M时所产生的问题 * 3.可以对错误进行回滚 * 3.静态方法所产生的线程安全问题(redis的pipeline会有原子性问题,但对目前的实际使用没有影响,在对相同的key进行多线程判重的时候会出现问题) * 4.静态方法所产生的参数不一致问题 * 可解决的问题 * 目前可以先这样进行使用,待达到速度瓶颈后,可以使用本地与redis交互,本地进行去重,开多线程对redis进行持久化 * 目前的速度大约1ms 1.3次判重(测试服务器,正式推测为2次,网络消耗严重) * 本地判重可达到1ms 15w次判重(根据处理器性能变化,3.9GHz频率下可达38w) * @author kuroha */ public class BloomFilter { /** * redis key和value不能超过512M * 256M对应的bit数 */ private static final long MAX_BIT = 2147483648L; /** * 256M对应的bit位数 */ private static final int BIT_NUM = 31; private static Map<String,ThreadLocal<ArrayList<Long>>> threadLocalMap; private static Map<String,Object> map; private static JedisPool jedisPool; /** * 初始化参数 */ static { ConfigProperty configProperty = ConfigProperty.getInstance(); // redis服务器地址 String host = configProperty.getRedisHost(); // redis服务端口 int port = Integer.parseInt(configProperty.getRedisPort()); // redis访问密码 String password = configProperty.getRedisPassword(); // redisPool最大空闲连接数 int maxIdle = Integer.parseInt(configProperty.getRedisPoolMaxIdle()); // redisPool最大连接数 int maxTotal = Integer.parseInt(configProperty.getRedisPoolMaxActive()); // redisPool获取连接时的最大等待毫秒数(如果设置为阻塞时BlockWhenExhausted),如果超时就抛异常, 小于零:阻塞不确定的时间, 默认-1 long maxWaitMillis = Long.parseLong(configProperty.getRedisPoolMaxWait()); // redisPool最小空闲连接数, 默认0 int minIdle = Integer.parseInt(configProperty.getRedisPoolMinIdle()); // redisPool逐出连接的最小空闲时间 默认1800000毫秒(30分钟) int timeout = Integer.parseInt(configProperty.getRedisTimeout()); JedisPoolConfig jedisPoolConfig = new JedisPoolConfig(); jedisPoolConfig.setMaxIdle(maxIdle); jedisPoolConfig.setMaxTotal(maxTotal); jedisPoolConfig.setMaxWaitMillis(maxWaitMillis); jedisPoolConfig.setMinIdle(minIdle); jedisPool = new JedisPool(jedisPoolConfig,host,port,timeout,password); threadLocalMap = new ConcurrentHashMap<>(4); map = new ConcurrentHashMap<>(3); // 获取redis数据 // 如果需要对其他地方添加布隆去重算法,重新生成以下内容,并将jedis,numBits,numHashFunctions放入map中,用key区分 // 获取bloomFlter的参数 // 预期插入数量 long expectedInsertions = Long.parseLong(configProperty.getBfExpectedInsertions()); // 容错率 double fpp = Double.parseDouble(configProperty.getBfFpp()); long numBits = optimalNumOfBits(expectedInsertions, fpp); int numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits); Jedis jedis = new Jedis(host,port,0); jedis.auth(password); map.put(Const.BF_KEY_RK_CONTEXT_SEARCH+"numBits",numBits); map.put(Const.BF_KEY_RK_CONTEXT_SEARCH+"numHashFunctions",numHashFunctions); // 存入不同的ThreadLocal ThreadLocal<ArrayList<Long>> rkThreadLocal = new ThreadLocal<>(); threadLocalMap.put(Const.BF_KEY_RK_CONTEXT_SEARCH,rkThreadLocal); } public static void set(String key,String value) { jedisPool.getResource().set(key,value); } public static String get(String key){ return jedisPool.getResource().get(key); } /** * 判断value是否存在于集合 */ public static boolean isExist(String key, String value) { long[] indexs = getIndexs(key, value); boolean result = false; Jedis jedis = jedisPool.getResource(); try (Pipeline pipeline = jedis.pipelined()) { for (long index : indexs) { pipeline.getbit(getRedisKey(key, index), index % MAX_BIT); } List<Object> returnAll = pipeline.syncAndReturnAll(); result = !returnAll.contains(false); }catch (Exception e) { e.printStackTrace(); } jedis.close(); return result; } /** * 回滚操作 * @param key */ public static void putRollBack(String key) { ThreadLocal<ArrayList<Long>> threadLocal = threadLocalMap.get(key); ArrayList<Long> indexs = threadLocal.get(); threadLocal.remove(); Jedis jedis = jedisPool.getResource(); try (Pipeline pipeline = jedis.pipelined()) { for (long index : indexs) { if (index == -1) { break; } pipeline.setbit(getRedisKey(key, index), index % MAX_BIT, false); } pipeline.sync(); } catch (IOException e) { e.printStackTrace(); } jedis.close(); } /** * 成功删除线程内变量 * @param key */ public static void putSuccess(String key){ threadLocalMap.get(key).remove(); } /** * 将value存入redis bitmap */ public static void put(String key, String value) { long[] indexs = getIndexs(key,value); ArrayList<Long> threadLocalList = threadLocalMap.get(key).get(); if (threadLocalList == null) { threadLocalList = new ArrayList<>(); } Jedis jedis = jedisPool.getResource(); try (Pipeline pipeline = jedis.pipelined()) { for (long index : indexs) { pipeline.setbit(getRedisKey(key, index), index % MAX_BIT, true); } List<Object> returnAll = pipeline.syncAndReturnAll(); for (int i = 0; i < returnAll.size(); i++) { if (!(boolean) returnAll.get(i)) { threadLocalList.add(indexs[i]); } } } catch (IOException e) { e.printStackTrace(); } jedis.close(); threadLocalMap.get(key).set(threadLocalList); } /** * 用于使用前导入数据,相同key只需要一次 * @param key * @param list */ public static void puts(String key,List<String> list) { Jedis jedis = jedisPool.getResource(); try (Pipeline pipeline = jedis.pipelined()) { for (String value : list) { long[] indexs = getIndexs(key, value); for (long index : indexs) { pipeline.setbit(getRedisKey(key, index), index % MAX_BIT, true); } } pipeline.sync(); } catch (IOException e) { e.printStackTrace(); } jedis.close(); } /** * 根据key获取bitmap下标 */ private static long[] getIndexs(String key,String value) { byte[] bytes = hash(value); long hash1 = lowerEight(bytes); long hash2 = upperEight(bytes); long combinedHash = hash1; int numHashFunctions = (int) map.get(key + "numHashFunctions"); long numBits = (long) map.get(key + "numBits"); long[] result = new long[numHashFunctions]; for (int i = 0; i < numHashFunctions; i++) { combinedHash = (combinedHash & Long.MAX_VALUE) % numBits; result[i] = combinedHash; combinedHash += hash2; } return result; } /** * 计算hash函数个数,guava中的实现 * @param n * @param m * @return */ private static int optimalNumOfHashFunctions(long n, long m) { return Math.max(1, (int) Math.round((double) m / n * Math.log(2))); } /** * 计算bit数组长度,guava中的实现 * @param n * @param p * @return */ private static long optimalNumOfBits(long n, double p) { if (p == 0) { p = Double.MIN_VALUE; } double log2 = Math.log(2); double logP = Math.log(p); return (long)(-n * logP / log2 / log2); } /** * 获取一个hash值,,guava中的实现 */ private static byte[] hash(String key) { return murmur3_128().hashObject(key, stringFunnel(Charsets.UTF_8)).asBytes(); } /** * 取hash的前8位,,guava中的实现 * @param bytes * @return */ private static long lowerEight(byte[] bytes) { return Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); } /** * 取hash的后8位,,guava中的实现 * @param bytes * @return */ private static long upperEight(byte[] bytes) { return Longs.fromBytes(bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]); } /** * 获取redis键 * @param key * @param index * @return */ private static String getRedisKey(String key,long index) { return StringUtil.splicingString("bf-",key,":",(index >> BIT_NUM)); }