本文中只是简单的体会bloomFilter算法的基本原理,设计实现一个生成100万个不重复的随机数。
选择3个分布均匀质数,在这里面质数的选择还是挺有讲究的,要注意不能太小,必须能够满足bloomfilter空间,不然整个空间都是1了还没有找到100万个不重复的随机数。不多说,上代码。
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<cstdbool>
#define MAXNUM 10000000
int hash_fuction(int dst, int select_number)
{
return dst % select_number;
}
int * byte_bloomfilter_random(int generate_number, int maxValue)
{
int temp;
char * bloomfilter;
int *dst;
bool flag;
int index_a, index_b, index_c;
char diff_a, diff_b, diff_c;
bloomfilter = (char *)malloc((size_t)MAXNUM / 8 * sizeof(char));
dst = (int *)malloc((size_t)generate_number * sizeof(int));
for (int i = 0; i < MAXNUM / 8; i++)
{
bloomfilter[i] = 0;
}
for (int i = 0; i < generate_number; i++)
{
flag = true;
while (flag)
{
int temp_a, temp_b, temp_c;
char bit_a, bit_b, bit_c;
temp = rand() * rand() % maxValue;
//select 3 prime numbers and select 3 hash functions
temp_a = hash_fuction(temp, 524287);
temp_b = hash_fuction(temp, 1046527);
temp_c = hash_fuction(temp, 3967);
index_a = temp_a >> 3;
diff_a = temp_a % 8;
index_b = temp_b >> 3;
diff_b = temp_b % 8;
index_c = temp_c >> 3;
diff_c = temp_c % 8;
bit_a = bloomfilter[index_a] & (1 << diff_a);
bit_b = bloomfilter[index_b] & (1 << diff_b);
bit_c = bloomfilter[index_c] & (1 << diff_c);
if (!bit_a || !bit_b || !bit_c)
{
dst[i] = temp;
bloomfilter[index_a] = bloomfilter[index_a] | (1 << diff_a);
bloomfilter[index_b] = bloomfilter[index_b] | (1 << diff_b);
bloomfilter[index_c] = bloomfilter[index_c] | (1 << diff_c);
flag = false;
}
}
}
free(bloomfilter);
return dst;
}