package com.study.jpa.begin.util;
/**
* 布隆过滤器
*/
public class BloomFilters {
/**
* 数组长度
*/
private int arraySize;
/**
* 数组
*/
private int[] array;
public BloomFilters (int arraySize) {
this.arraySize = arraySize;
array = new int[arraySize];
}
/**
* 写入数据
*
* @param key
*/
public void add(String key) {
int first = hashcode_1(key);
int second = hashcode_2(key);
int third = hashcode_3(key);
array[first % arraySize] = 1;
array[second % arraySize] = 1;
array[third % arraySize] =1;
}
/**
* 判断数据是否存在
*
* @param key
* @return
*/
public boolean check(String key) {
int first = hashcode_1(key);
int second = hashcode_2(key);
int third = hashcode_3(key);
int firstIndex = array[first % arraySize];
if (firstIndex == 0) {
return false;
}
int secondIndex = array[second % arraySize];
if(secondIndex == 0) {
return false;
}
int thirdIndex = array[third % arraySize];
if(thirdIndex ==0 ) {
return false;
}
return true;
}
/**
* hash 算法1
*
* @param key
* @return
*/
private int hashcode_1( String key) {
int hash = 0;
int i;
for(i = 0; i < key.length(); ++i) {
hash = 33* hash + key.charAt(i);
}
return Math.abs(hash);
}
/**
* hash 算法2
*
* @param data
* @return
*/
private int hashcode_2( String data) {
final int p = 16777619;
int hash = (int)2166136261L;
for( int i = 0; i < data.length(); i++) {
hash = (hash ^ data.charAt(i)) * p;
}
hash += hash << 13;
hash ^= hash >> 7;
hash += hash << 3;
hash ^= hash >> 17;
hash += hash << 5;
return Math.abs(hash);
}
/**
* hash 算法3
*
* @param key
* @return
*/
private int hashcode_3(String key) {
int hash, i;
for (hash = 0 , i = 0; i < key.length(); ++i) {
hash += key.charAt(i);
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash <<15);
return Math.abs(hash);
}
public static void main(String[] args) {
long star = System.currentTimeMillis();
BloomFilters bloomFilters = new BloomFilters(10000000);
for(int i = 0; i < 10000000; i++) {
bloomFilters.add(i +"") ;
}
System.out.println(bloomFilters.check(8783+""));
long end = System.currentTimeMillis();
System.out.println( "执行时间:"+ (end - star));
}
}
package com.study.jpa.begin.util;
import java.util.ArrayList;
import java.util.List;
public class BitMap {
private static final int N = 10000000;
private int[] a = new int[N/32 + 1];
/**
* 设置所在的bit位为1
* @param n
*/
public void addValue(int n){
//row = n / 32 求十进制数在数组a中的下标
int row = n >> 5;
//相当于 n % 32 求十进制数在数组a[i]中的下标
a[row] |= 1 << (n & 0x1F);
}
// 判断所在的bit为是否为1
public boolean exits(int n){
int row = n >> 5;
return (a[row] & ( 1 << (n & 0x1F))) != 1;
}
public void display(int row){
System.out.println("BitMap位图展示");
for(int i=0;i<row;i++){
List<Integer> list = new ArrayList<Integer>();
int temp = a[i];
for(int j=0;j<32;j++){
list.add(temp & 1);
temp >>= 1;
}
System.out.println("a["+i+"]" + list);
}
}
public static void main(String[] args){
long star = System.currentTimeMillis();
BitMap map = new BitMap();
for(int i=0;i<10000000;i++){
map.addValue(i);
}
int temp = 8783;
if(map.exits(temp)){
System.out.println("temp:" + temp + "has already exists");
}
long end = System.currentTimeMillis();
System.out.println( "执行时间:"+ (end - star));
}
}
对于整数来判断的话, 对于海量数据 判断某个值是否存在,bitMap 算法 比布隆过滤器算法快