目录
1.位图
位图的简单实现
#pragma once
namespace bit
{
template<size_t N>
class BitSet
{
public:
BitSet()
{
_bits.resize(N / 32 + 1, 0);
}
// 把x映射的位标记成1
void Set(size_t x)
{
assert(x < N);
// 算出x映射的位在第i个整数
// 算出x映射的位在这个整数的第j个位
size_t i = x / 32;
size_t j = x % 32;
// _bits[i] 的第j位标记成1,并且不影响他的其他位
_bits[i] |= (1 << j);
}
void Reset(size_t x)
{
assert(x < N);
size_t i = x / 32;
size_t j = x % 32;
// _bits[i] 的第j位标记成0,并且不影响他的其他位
_bits[i] &= (~(1 << j));
}
bool Test(size_t x)
{
assert(x < N);
size_t i = x / 32;
size_t j = x % 32;
// 如果第j位是1,结果是非0,非0就是真
// 如果第j为是0,结果是0,0就是假
return _bits[i] & (1 << j);
}
private:
vector<int> _bits;
};
void TestBitSet()
{
//BitSet<4294967295u> bs;
BitSet<-1> bs;
}
}
位图相关练习题
2.布隆过滤器
#pragma once
#include "BitSet.h"
//三种字符串哈希算法
struct HashBKDR
{
// "int" "insert"
// 字符串转成对应一个整形值,因为整形才能取模算映射位置
// 期望->字符串不同,转出的整形值尽量不同
// "abcd" "bcad"
// "abbb" "abca"
size_t operator()(const std::string& s)
{
// BKDR Hash
size_t value = 0;
for (auto ch : s)
{
value += ch;
value *= 131;
}
return value;
}
};
struct HashAP
{
// "int" "insert"
// 字符串转成对应一个整形值,因为整形才能取模算映射位置
// 期望->字符串不同,转出的整形值尽量不同
// "abcd" "bcad"
// "abbb" "abca"
size_t operator()(const std::string& s)
{
// AP Hash
register size_t hash = 0;
size_t ch;
for (long i = 0; i < s.size(); i++)
{
ch = s[i];
if ((i & 1) == 0)
{
hash ^= ((hash << 7) ^ ch ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ ch ^ (hash >> 5)));
}
}
return hash;
}
};
struct HashDJB
{
// "int" "insert"
// 字符串转成对应一个整形值,因为整形才能取模算映射位置
// 期望->字符串不同,转出的整形值尽量不同
// "abcd" "bcad"
// "abbb" "abca"
size_t operator()(const std::string& s)
{
// BKDR Hash
register size_t hash = 5381;
for (auto ch : s)
{
hash += (hash << 5) + ch;
}
return hash;
}
};
template<size_t N, class K = std::string,
class Hash1 = HashBKDR,
class Hash2 = HashAP,
class Hash3 = HashDJB>
class BloomFilter
{
public:
void Set(const K& key)
{
//Hash1 hf1;
//size_t i1 = hf1(key);
size_t i1 = Hash1()(key) % N;
size_t i2 = Hash2()(key) % N;
size_t i3 = Hash3()(key) % N;
cout << i1 << " " << i2 << " " << i3 << endl;
_bitset.Set(i1);
_bitset.Set(i2);
_bitset.Set(i3);
}
bool Test(const K& key)
{
//分别判断三个映射位
size_t i1 = Hash1()(key) % N;
if (_bitset.Test(i1) == false)
{
return false;
}
size_t i2 = Hash2()(key) % N;
if (_bitset.Test(i2) == false)
{
return false;
}
size_t i3 = Hash3()(key) % N;
if (_bitset.Test(i3) == false)
{
return false;
}
// 这里3个位都在,有可能是其他key占了,在是不准确的,存在误判
// 不在是准确的
return true;
}
private:
bit::BitSet<N> _bitset;
bit::vector<char> _bitset;
};
void TestBloomFilter()
{
//简单测试
/*BloomFilter<100> bf;
bf.Set("张三");
bf.Set("李四");
bf.Set("牛魔王");
bf.Set("红孩儿");
cout << bf.Test("张三") << endl;
cout << bf.Test("李四") << endl;
cout << bf.Test("牛魔王") << endl;
cout << bf.Test("红孩儿") << endl;
cout << bf.Test("孙悟空") << endl;*/
BloomFilter<600> bf;
size_t N = 100;
std::vector<std::string> v1;
for (size_t i = 0; i < N; ++i)
{
std::string url = "https://mp.csdn.net/mp_blog/creation/editor/126629245";
url += std::to_string(1234 + i);
v1.push_back(url);
}
for (auto& str : v1)
{
bf.Set(str);
}
for (auto& str : v1)
{
cout << bf.Test(str) << endl;
}
cout << endl << endl;
std::vector<std::string> v2;
for (size_t i = 0; i < N; ++i)
{
std::string url = "https://blog.csdn.net/weixin_48953972/article/details/125643506";
url += std::to_string(6789 + i);
v2.push_back(url);
}
size_t n2 = 0;
for (auto& str : v2)
{
if (bf.Test(str))
{
++n2;
}
}
cout << "相似字符串误判率:" << (double)n2 / (double)N << endl;
std::vector<std::string> v3;
for (size_t i = 0; i < N; ++i)
{
std::string url = "https://zhuanlan.zhihu.com/p/43263751";
url += std::to_string(6789 + i);
v3.push_back(url);
}
size_t n3 = 0;
for (auto& str : v3)
{
if (bf.Test(str))
{
++n3;
}
}
cout << "不相似字符串误判率:" << (double)n3 / (double)N << endl;
}
3.哈希切分