BloomFilter的一个简单实现(C语言)

/*BloomFilter 的简单设计*/
#include<stdio.h>
#include<stdlib.h>
#include<stdint.h>
#include<unistd.h>
#include<string.h>

#include<sys/types.h>
#include<sys/stat.h>
#include<fcntl.h>

#define HNUM 4

 typedef struct BloomFilter{
 	uint32_t hashf[HNUM]; // hash 函数个数
	uint8_t * bit_table;  //位表
 	uint32_t bitSize;     // 位总共大小
 }BFILTER;

 const unsigned char bitMask[8] = {// 屏蔽位  
 	0x01,  //00000001 
	0x02,  //00000010 
	0x04,  //00000100 
 	0x08,  //00001000 
 	0x10,  //00010000 
 	0x20,  //00100000 
 	0x40,  //01000000 
 	0x80   //10000000 
 }; 
 
void generate_seed(BFILTER *psBFilter)                                         						    {
  const uint32_t predef_salt[16] = { 0x0CD5DA28, 0x6E9E355A, 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
    	0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3, 0x15B6796C,
    	0x1D6FDFE4, 0x63FF9092, 0xE7401432, 0xEFFE9412, 0xAEAEDF79,
   };
  int i = 0;
  for (i = 0; i < HNUM; i++) {
   	psBFilter->hashf[i] = predef_salt[i];
  }
  for (i = 0; i < HNUM; i++) {
   	psBFilter->hashf[i] = psBFilter->hashf[i]
     	* psBFilter->hashf[(i + 3) % HNUM] + 0xA5A5A5A5;
  }
 
 }

 

 uint32_t hash_ap(uint8_t* str, uint32_t nlen, uint32_t hash) {// 计算字符串的hash 值
 	unsigned char* it = str;
 	while (nlen >= 2) {
  		hash ^= (hash << 7) ^ (*it++) * (hash >> 3);
 	 	hash ^= (~((hash << 11) + ((*it++) ^ (hash >> 5))));
  		nlen -= 2;
 }
 if (nlen) {
  	hash ^= (hash << 7) ^ (*it) * (hash >> 3);
 }
 return hash;
}
 

/*BloomFilte initialization
tblSize: the number of Bytes
fname:  the name of one file which saved related values
*/
BFILTER *B_init(uint32_t tblSize,char *fname) {  /*如果fname 不空,则从文件中读取*/

 	BFILTER*psBFilter = NULL;
 	psBFilter = (BFILTER *) malloc(sizeof(BFILTER));

 	psBFilter->bitSize=tblSize*8;
 	psBFilter->bit_table=(uint8_t*)malloc(tblSize);
 
         generate_seed(psBFilter); /*利用同样的hash函数,根据不同的参数,生成不同的值*/
	memset(psBFilter->bit_table, 0, tblSize); 

 	if(fname){
   
     		int fds = open(fname, O_RDONLY); 
    		if (fds < 0) {  
     			printf("%s,%d,can't open the file %s\n",__FILE__,__LINE__,fname);
  		}   
  		else  {  
 	   		uint32_t bSize=64*1024;
    			ssize_t sr = 0; 
     			ssize_t len=0;
    			while((len=read(fds,psBFilter->bit_table+sr,bSize))>0){
      				sr+=len;
       			}  
      			close(fds); 

   		} 
 	}
   return psBFilter;
}
/*最好保证除数为素数*/
void B_set(BFILTER *pb,char* key,int nLen) {  
 	int i=0;
 	int bitIndex;
 	for(;i<HNUM;i++){
  		bitIndex=hash_ap(key,nLen,pb->hashf[i]) % (pb->bitSize-1);
  		pb->bit_table[bitIndex/8] |=bitMask[bitIndex%8];
 	}
}  
int B_get(BFILTER *pb,char* key,int nLen) {
 	int i=0;
 	int bitIndex;
 	int bit;
 	for(;i<HNUM;i++){
  		bitIndex=hash_ap(key,nLen,pb->hashf[i]) % (pb->bitSize-1);
   		bit  = bitIndex % 8; 
  		if((pb->bit_table[bitIndex/8] & bitMask[bit] ) !=bitMask[bit])
  			 return 0;
 	}
 return 1; /*exist*/
}

int B_save(BFILTER *pb,char * fname) {  
 
    int fd =open(fname, O_RDWR |O_CREAT|O_TRUNC,00600);  
    if (fd< 0) {
    	printf("wrong \n");
    	return 0;
     }
   else{  
    	int size=pb->bitSize/8;
 	int tmp;
 	int bSize=64*1024;
 	while(size){
  		if(size<bSize)
  			 bSize=size;
  		tmp=write(fd,pb->bit_table,bSize);
  		size-=tmp;
 
         }
   }
  close(fd);  
  return 1;
}  

int B_free(BFILTER *pb){
 if(pb){
  	free(pb->bit_table);
  	free(pb);
  	pb=NULL;
  }
}
/*p0=(1-1/m)^nk
   p=(1-p0)^k
*/
int main()
{
        char *s[10]={"1234","agdfsdjglsdjgkdlf","sdlfserer545","345kjgb","12312cad",
        		"dsfs","345892qs","pionn98765234","0099888777","mvnnvvnx."};
 	BFILTER* bf;
 	int i;
 	bf=B_init(10,NULL);

 	for(i=0;i<10;i++)
  		B_set(bf,s[i],strlen(s[i]));


 	if(B_get(bf,"dsfs",4))
  		printf("found \n");
	 else
  		printf("not found dsfs\n");
 	if(B_get(bf,"12dsfs",6))
  		printf("found \n");
	 else
  		printf("not found\n");
 
 	B_save(bf,"bf.txt");
	 B_free(bf);
}


 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值