#include<iostream>
#include<fstream>
#include<boost/filesystem.hpp>
#include<boost/filesystem/path.hpp>
#include<list>
#include<vector>
#include "hashFun.h.h"
//#include<boost/uuid/uuid_generators.hpp>
//#include<boost/uuid/uuid.hpp>
//#include<boost/uuid/uuid_io.hpp>
/*
* bloom.h
*
* Created on: 2012-2-22
* Author: xiaojay
*/
#ifndef BLOOM_H_
#define BLOOM_H_
class Bloom
{
public:
Bloom(int size, std::vector<HashFun*> hashfunclist);
~Bloom();
void add(const char * text);
bool check(const char * text);
private:
const static int CHARBITSIZE = 8;
int size;
char * arr;
std::vector<HashFun*> hashfunclist;
inline void setbit(long pos);
inline bool getbit(long pos);
};
#endif
Bloom::Bloom(int size, std::vector<HashFun*> hashfunclist)
{
assert(hashfunclist.size()>0);
this->size = size;
this->hashfunclist = hashfunclist;
this->arr = new char[size];
}
Bloom::~Bloom()
{
if (this->arr != NULL)
{
delete this->arr;
}
}
void Bloom::add(const char * text)
{
int nfunc = hashfunclist.size();
long code = 0;
for (int i = 0; i<nfunc; i++)
{
code = hashfunclist.at(i)->gethashval(text);
if (code / CHARBITSIZE>size) return;
else
{
setbit(code);
}
}
}
bool Bloom::check(const char * text)
{
int nfunc = hashfunclist.size();
long code = 0;
for (int i = 0; i<nfunc; i++)
{
code = hashfunclist.at(i)->gethashval(text);
if (code / CHARBITSIZE>size)
return false;
else
{
if (getbit(code))
continue;
else
return false;
}
}
return true;
}
inline void Bloom::setbit(long code)
{
arr[code / CHARBITSIZE] |= (1 << (code%CHARBITSIZE));
}
inline bool Bloom::getbit(long code)
{
if (!(arr[code / CHARBITSIZE] & (1 << (code%CHARBITSIZE))))
{
return false;
}
return true;
}
class HashFunA : public HashFun
{
public:
virtual long gethashval(const char * key)
{
unsigned int h = 0;
while (*key) h ^= (h << 5) + (h >> 2) + (unsigned char)*key++;
return h % 80000;
}
};
class HashFunB : public HashFun
{
public:
virtual long gethashval(const char * key)
{
unsigned int h = 0;
while (*key) h = (unsigned char)*key++ + (h << 6) + (h << 16) - h;
return h % 80000;
}
};
using namespace std;
int main()
{
HashFunA *funa = new HashFunA();
HashFunB *funb = new HashFunB();
vector<HashFun*> hashfunclist;
hashfunclist.push_back(funa);
hashfunclist.push_back(funb);
/*
* Create Bloom object with two parameters :
* size of the store array and list of hash functions
*/
Bloom bloom(10000, hashfunclist);
///Add some words to bloom filter
bloom.add("hello");
bloom.add("world");
bloom.add("ipad");
bloom.add("iphone4");
bloom.add("ipod");
bloom.add("apple");
bloom.add("banana");
bloom.add("hello");
/*
* Test
*/
char word[20];
while (true)
{
cout << "Please input a word : " << endl;
cin >> word;
if (bloom.check(word))
{
cout << "Word :" << word << " has been set in bloom filter." << endl;
}
else
{
cout << "Word :" << word << " not exist !" << endl;
}
}
}
Bloom Filter算法实现
最新推荐文章于 2022-04-02 21:46:06 发布