布隆过滤器实现对ip过滤，以及ip加入黑名单

qq154532295

已于 2023-04-14 21:00:53 修改

阅读量542

点赞数 2

文章标签： java ip

于 2023-04-14 19:26:51 首次发布

本文链接：https://blog.csdn.net/qq154532295/article/details/130160406

版权

java通过布隆过滤器对ip地址进行过滤

add添加一条数据

public void add(String data){
        //检测数据是否要刷新
        checkNeedClear();
        lock.lock();
        try {
            //保证对一个数据的操作是一个整体
            for(int i = 0; i < seeds.length; i++){
                int hashIndex = getHash(data,seeds[i]);
                bitMap.set(hashIndex,true);
            }
        }finally {
            lock.unlock();
        }
    }

check查询一条数据

public boolean check(String data){
        lock.lock();
        try {
            for(int i = 0; i < seeds.length; i++){
                int hashIndex = getHash(data,seeds[i]);
                if(bitMap.get(hashIndex) == false){
                    return false;
                }
            }
            return true;
        }finally {
            lock.unlock();
        }
    }

获取Hash值，模仿字符串自带的hash值

public int getHash(String data, int base){
        char[] value = data.toCharArray();
        long res = 1;
        for(int i = 0; i < value.length; i++){
            res = (res * base + value[i]) % size;
        }
        return (int)(res%size);
    }

主要项目结构
在这里插入图片描述

主要代码 JudgeRate枚举类

package com.cn.bl.enm;
/**
 * @Author WangZhiHua
 * @Description 错误率，每个字符串生成哈希函数的个数
 *          分配的位数越高，误判率越低
 *          分配四个哈希函数误判率大概为0.14  14%;
 *          分配八个哈希函数误判率大概为（0.14*0.14) 2%左右
 *          分配十六个哈希函数误判率大概为(0.14 * 0.14 * 0.14 * 0.14) 0.4%左右
 *          分配三十二个哈希函数误判率大概为（0.002%左右）
 *
 * @Date 16:18 2023-04-14
 * @Param
 * @Return 
 **/
public enum JudgeRate {
    /*一般生成哈希函数的 base选质数*/

    BASE_4(new int[] { 2, 3, 5, 7 }),
    /**
     * 每个字符串分配8个位
     */
    BASE_8(new int[] { 2, 3, 5, 7, 11, 13, 17, 19 }), //
    /**
     * 每个字符串分配16个位
     */
    BASE_16(new int[] { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53 }), //
    /**
     * 每个字符串分配32个位
     */
    BASE_32(new int[] { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
            101, 103, 107, 109, 113, 127, 131 });

    private int[] seeds;

    public int[] getSeeds() {
        return seeds;
    }

    public void setSeeds(int[] seeds) {
        this.seeds = seeds;
    }

    private JudgeRate(int[] seeds){
        this.seeds = seeds;
    }
}

BloomFilter类

package com.cn.bl.util;

import com.cn.bl.enm.JudgeRate;
import org.springframework.stereotype.Component;

import java.util.BitSet;
import java.util.concurrent.locks.ReentrantLock;

/**
 * @author: Wang
 * @Date: 2023-04-14 16:27
 * @Description: 布隆过滤器
 */
@Component
public class BloomFilter {

    //base集合
    private int[] seeds=null;
    //位图
    private BitSet bitMap=null;
    //比率
    private static JudgeRate rate= JudgeRate.BASE_16;

    private static int dataCount=100000;//测试的数据量

    private int size;

    //加锁，保证添加数据，和查询数据是一个原子操作
    private ReentrantLock lock = new ReentrantLock();

    private static Double autoClear=0.95;
    public BloomFilter(){
        long bitSize = rate.getSeeds().length * dataCount;
        if(bitSize < 0 || bitSize > Integer.MAX_VALUE){
            throw new RuntimeException("位数太大溢出了，降低数据大小");
        }
        seeds = rate.getSeeds();
        size = (int)bitSize;
        bitMap = new BitSet(size);
    }

    /**
     * @Author WangZhiHua
     * @Description 添加一个数据
     * @Date 17:05 2023-04-14
     **/

    public void add(String data){
        //检测数据是否要刷新
        checkNeedClear();
        lock.lock();
        try {
            //保证对一个数据的操作是一个整体
            for(int i = 0; i < seeds.length; i++){
                int hashIndex = getHash(data,seeds[i]);
                bitMap.set(hashIndex,true);
            }
        }finally {
            lock.unlock();
        }
    }
    /**
     * @Author WangZhiHua
     * @Description 返回false，说明这个数据不存在
     * @Date 17:03 2023-04-14
     **/
    public boolean check(String data){
        lock.lock();
        try {
            for(int i = 0; i < seeds.length; i++){
                int hashIndex = getHash(data,seeds[i]);
                if(bitMap.get(hashIndex) == false){
                    return false;
                }
            }
            return true;
        }finally {
            lock.unlock();
        }
    }

    public void checkNeedClear(){
        lock.lock();
        try {
            int cardinality = bitMap.cardinality();
            if(1.0 * cardinality / size >= autoClear){
                bitMap.clear();
            }
        }finally {
            lock.unlock();
        }
    }

    public int getHash(String data, int base){
        char[] value = data.toCharArray();
        long res = 1;
        for(int i = 0; i < value.length; i++){
            res = (res * base + value[i]) % size;
        }
        return (int)(res%size);
    }

}

测试类

package com.cn.bl;

import com.cn.bl.util.BloomFilter;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;

import java.util.HashSet;
import java.util.Random;

@SpringBootTest
class BlApplicationTests {

    @Autowired
    private BloomFilter bloomFilter;

    @Test
    void contextLoads() {
    }

    //测试时间
    @Test
    void testBoolFilterTime(){

        //对100万个字符串进行查重 需要7MB

        //用HashSet对100万个字符串进行查重，是上面的96倍
        String[] ipStr = new String[100000];

        String[] testStr = new String[100000];
        HashSet<String> set =new HashSet<>();
        Random r = new Random();
        //模拟生成100万个ip地址
        for(int i = 0; i < 100000; i++){
            int a1=r.nextInt(256);
            int a2=r.nextInt(256);
            int a3=r.nextInt(256);
            int a4=r.nextInt(256);
            ipStr[i] = ""+a1+"."+a2+"."+a3+"."+a4;
        }

        //模拟生成100万个测试地址
        for(int i = 0; i < 100000; i++){
            int a1=r.nextInt(256);
            int a2=r.nextInt(256);
            int a3=r.nextInt(256);
            int a4=r.nextInt(256);
            testStr[i] = ""+a1+"."+a2+"."+a3+"."+a4;
        }
        int res = 0;
        int fail = 0;
        for(int i=0;i<100000;i++){
            bloomFilter.add(ipStr[i]);
            boolean add = set.add(ipStr[i]);
            if(add){
                res++;
            }
        }
        System.out.println("插入有效字符串："+res+"个");

        for(int i=0;i<100000;i++){
            boolean check = bloomFilter.check(testStr[i]);
            boolean contains = set.contains(testStr[i]);
            if(contains != check){
                System.out.println(testStr[i]);
                fail++;
            }
        }
        System.out.println("错了: "+fail+"个");
        System.out.println("错误率: "+1.0*fail/res*100+"%");

        long st1=System.currentTimeMillis();
        for(int i=0;i<100000;i++){
            bloomFilter.check(testStr[i]);
        }
        long ed1=System.currentTimeMillis();
        System.out.println("布隆过滤器比较100000个ip用了: "+(ed1-st1));
        long st2=System.currentTimeMillis();
        for(int i=0;i<100000;i++){
          set.contains(testStr[i]);
        }
        long ed2=System.currentTimeMillis();
        System.out.println("哈希表比较100000个ip用了: "+(ed2-st2));
    }

}