哈希

cxpxatu521

已于 2022-02-23 11:50:08 修改

阅读量164

点赞数

分类专栏：数据结构和算法文章标签： hash

于 2021-08-18 08:01:42 首次发布

本文链接：https://blog.csdn.net/m0_51765966/article/details/119543144

版权

数据结构和算法专栏收录该内容

14 篇文章 1 订阅

订阅专栏

哈希

一.哈希概念

一种采用直接寻址方式（哈希函数）高效查找数据的数据结构

哈希冲突:不同关键字通过相同哈希函数计算出相同的哈希地址的现象

二.哈希冲突解决方案

1.闭散列

闭散列：也叫开放定址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，那么可以把新元素存放到冲突位置中的下一个空位置中去，而下一个空位置的查找又有线性探测和二次探测两种方式

(1)线性探测

线性探测：从发生冲突的位置开始，依次向后探测，直到寻找到下一个空位置为止

代码实现

#pragma once

#include <vector>

//哈希表中节点状态
enum State {
	EMPTY,
	EXIST,
	DELETE
};

//哈希表中数据节点
template<class V>
struct HashData {
	V _data;
	State _state;
};

//哈希表
template<class K, class V,class KeyOfV>
class HashTable {
public:
	typedef HashData<V> HashData;

	//插入
	bool Insert(const V& val) 
	{
		KeyOfV kofv;//根据value计算key

		//表如果满了，插入位置的查找就会循环下去
		//因此引入负载因子,负载因子 = 表中有效元素的个数 / 表的大小
		//负载因子越大，越容易发生哈希冲突
		//负载因子越小，冲突概率越小，整体效率越高，浪费空间越大
		//负载因子一般取一个折中值
		//哈希表并不是满了才增容，一般负载因子大于等于0.7就开始增容
		if (_tables.size() == 0 || _num * 10 / _tables.size() >= 7)
		{
			//1.增容的传统写法
			重新开辟一个二倍大小的新表
			//int newC = (_tables.size() == 0) ? 10 : 2 * _tables.size();
			//std::vector<HashData> newTables;
			//newTables.resize(newC);

			将旧表数据重新映射
			//for (size_t i = 0; i < _tables.size(); ++i)
			//{
			//	if (_tables[i]._state == EXIST)
			//	{
			//		int newPos = kofv(_tables[i]._data) % newC;
			//		while (newTables[newPos]._state == EXIST)
			//		{
			//			++newPos;
			//			if (newPos == newC)
			//				newPos = 0;
			//		}
			//		newTables[newPos] = _tables[i];
			//	}
			//}
			释放旧表空间
			//_tables.swap(newTables);//新表和旧表交换，除了作用域自动释放临时变量

			//增容的现代写法
			//1.建一个新的哈希对象
			HashTable<K,V,KeyOfV> newHt;
			int newC = _tables.size() == 0 ? 10 : 2 * _tables.size();
			newHt._tables.resize(newC);
			//2.插入数据
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				if (_tables[i]._state == EXIST)
				{
					newHt.Insert(_tables[i]._data);
				}
			}
			//3.交换两张表中的内容
			_tables.swap(newHt._tables);
		}

		//1.计算哈希位置
		size_t pos = kofv(val) % _tables.size();

		//2.寻找插入位置
		while (_tables[pos]._state == EXIST)
		{
			//不允许重复
			if (kofv(_tables[pos]._data) == kofv(val))
				return false;
			//线性探测
			++pos;
			if (pos == _tables.size())
				pos = 0;
		}

		//3.插入数据
		_tables[pos]._data = val;
		_tables[pos]._state = EXIST;
		++_num;

		return true;
	}

	//查找
	HashData* Find(const K& key)
	{
		KeyOfV kofv;
		//计算哈希位置
		int pos = key % _tables.size();
		while (_tables[pos]._state != EMPTY)
		{
			if (kofv(_tables[pos]._data) == key)
			{
				if (_tables[pos]._state == EXIST)
					return &_tables[pos];
				else if (_tables[pos]._state == DELETE)
					return nullptr;
			}

			++pos;
			if (pos == _tables.size())
				pos = 0;
		}

		return nullptr;//找不到
	}

	//删除
	bool Erase(const K& key)
	{
		//找到对应位置的数值
		HashData* pos = Find(key);
		if (pos != nullptr)
		{
			pos->_state = DELETE;
			--_num;
			return true;
		}
		return false;
	}
private:
	std::vector<HashData> _tables;	//表中数据
	size_t _num = 0;				//有效元素个数
};

(2)二次探测

二次探测:每次以二次方的形式递增，寻找下一个空位置

代码实现

#pragma once

#include <vector>

//哈希表中节点状态
enum State {
	EMPTY,
	EXIST,
	DELETE
};

//哈希表中数据节点
template<class V>
struct HashData {
	V _data;
	State _state;
};

//哈希表
template<class K, class V,class KeyOfV>
class HashTable {
public:
	typedef HashData<V> HashData;

	//插入
	bool Insert(const V& val) 
	{
		KeyOfV kofv;//根据value计算key

		//表如果满了，插入位置的查找就会循环下去
		//因此引入负载因子,负载因子 = 表中有效元素的个数 / 表的大小
		//负载因子越大，越容易发生哈希冲突
		//负载因子越小，冲突概率越小，整体效率越高，浪费空间越大
		//负载因子一般取一个折中值
		//哈希表并不是满了才增容，一般负载因子大于等于0.7就开始增容
		if (_tables.size() == 0 || _num * 10 / _tables.size() >= 7)
		{
			//1.增容的传统写法
			重新开辟一个二倍大小的新表
			//int newC = (_tables.size() == 0) ? 10 : 2 * _tables.size();
			//std::vector<HashData> newTables;
			//newTables.resize(newC);

			将旧表数据重新映射
			//for (size_t i = 0; i < _tables.size(); ++i)
			//{
			//	if (_tables[i]._state == EXIST)
			//	{
			//		int newPos = kofv(_tables[i]._data) % newC;
			//		while (newTables[newPos]._state == EXIST)
			//		{
			//			++newPos;
			//			if (newPos == newC)
			//				newPos = 0;
			//		}
			//		newTables[newPos] = _tables[i];
			//	}
			//}
			释放旧表空间
			//_tables.swap(newTables);//新表和旧表交换，除了作用域自动释放临时变量

			//增容的现代写法
			//1.建一个新的哈希对象
			HashTable<K,V,KeyOfV> newHt;
			int newC = _tables.size() == 0 ? 10 : 2 * _tables.size();
			newHt._tables.resize(newC);
			//2.插入数据
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				if (_tables[i]._state == EXIST)
				{
					newHt.Insert(_tables[i]._data);
				}
			}
			//3.交换两张表中的内容
			_tables.swap(newHt._tables);
		}

		//1.计算哈希位置
		size_t pos = kofv(val) % _tables.size();
		
		size_t start = kofv(val) % _tables.size();//起始位置
		pos = start;
		int i = 1;
		//2.寻找插入位置有两种方式，线性探测和二次探测
		while (_tables[pos]._state == EXIST)
		{
			//不允许重复
			if (kofv(_tables[pos]._data) == kofv(val))
				return false;

			//二次探测
			pos = start + i * i;
			pos %= _tables.size();
			++i;
		}

		//3.插入数据
		_tables[pos]._data = val;
		_tables[pos]._state = EXIST;
		++_num;

		return true;
	}

	//查找
	HashData* Find(const K& key)
	{
		KeyOfV kofv;
		//计算哈希位置
		int pos = key % _tables.size();
		while (_tables[pos]._state != EMPTY)
		{
			if (kofv(_tables[pos]._data) == key)
			{
				if (_tables[pos]._state == EXIST)
					return &_tables[pos];
				else if (_tables[pos]._state == DELETE)
					return nullptr;
			}

			++pos;
			if (pos == _tables.size())
				pos = 0;
		}

		return nullptr;//找不到
	}

	//删除
	bool Erase(const K& key)
	{
		//找到对应位置的数值
		HashData* pos = Find(key);
		if (pos != nullptr)
		{
			pos->_state = DELETE;
			--_num;
			return true;
		}
		return false;
	}
private:
	std::vector<HashData> _tables;	//表中数据
	size_t _num = 0;				//有效元素个数
};

测试代码:

#include <iostream>
#include "Hash.h"

template<class K>
struct setKeyOfValue
{
	const K& operator()(const K& key)
	{
		return key;
	}
};

void test()
{
	HashTable<int, int,setKeyOfValue<int>> table;
	table.Insert(3);
	table.Insert(5);
	table.Insert(6);
	table.Insert(4);
	table.Insert(13);
	table.Insert(23);
	table.Insert(14);
	table.Insert(15);

	HashData<int> *target = table.Find(3);
	if (target != nullptr)
		std::cout << target->_data << std::endl;
	else
		std::cout << "Not found" << std::endl;

	table.Erase(3);
	table.Erase(23);
	table.Erase(4);
}
int main()
{
	test();
	return 0;
}

2.开散列

开散列法又叫链地址法,首先对关键码集合用散列函数计算散列地址，具有相同地址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链接起来，各链表的头结点存储在哈希表中

#pragma once

/
//开散列
/

#include <vector>

//哈希节点
template<class V>
struct HashNode {
	V _data;
	HashNode<V>* _next;
	HashNode(const V& data)
		:_data(data)
		, _next(nullptr)
	{}
};


template<class K, class V, class KeyOfV>
class Hash {
	typedef HashNode<V> Node;
public:
	bool Insert(const V& val)
	{
		KeyOfV kofv;
		//负载因子等于1开始增容，避免大量的哈希冲突
		if (_tables.size() == 0 || _num >= _tables.size())
		{
			//1) 开新表
			std::vector<Node*> newTables;
			size_t newSz = _tables.size() == 0 ? 10 : 2 * _tables.size();
			newTables.resize(newSz, nullptr);

			//2) 将旧表中的数据重新映射
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* curNode = _tables[i];
				while (curNode)
				{
					Node* next = curNode->_next;

					//重新计算位置，并头插到新链表
					size_t index = kofv(curNode->_data) % newSz;
					curNode->_next = newTables[index];
					newTables[index] = curNode;

					curNode = next;
				}
				_tables[i] = nullptr;
			}

			//3) 释放旧表
			_tables.swap(newTables);
		}

		//1.计算哈希位置
		size_t index = kofv(val) % _tables.size();

		//2.检查表中是否存在相同的值
		Node* node = _tables[index];
		while (node)
		{
			if (kofv(node->_data) == kofv(val))
				return false;
			node = node->_next;
		}

		//3.头插挂到链表中
		Node* cur = new HashNode<V>(val);
		cur->_next = _tables[index];
		_tables[index] = cur;
		++_num;

		return true;
	}

	Node* Find(const K& key)
	{
		KeyOfV kofv;
		if (key < 0)
			return nullptr;
		size_t index = key % _tables.size();

		Node* cur = _tables[index];
		while (cur)
		{
			if (kofv(cur->_data) == key)
				return cur;
			else
				cur = cur->_next;
		}
		return nullptr;
	}
	
	bool Erase(const K& key)
	{
		KeyOfV kofv;
		if (key < 0 )
			return false;
		size_t index = key % _tables.size();

		Node* cur = _tables[index];
		Node* prev = nullptr;
		while (cur)
		{
			if (kofv(cur->_data) == key)
			{
				if (prev == nullptr)
				{
					_tables[key] = cur->_next;
				}
				else
				{
					prev->_next = cur->_next;
				}
				--_num;
				delete cur;

				return true;
			}
			else
			{
				prev = cur;
				cur = cur->_next;
			}
		}

		return false;
	}
private:
	std::vector<Node*> _tables;
	size_t _num = 0;//有效元素个数
};

测试代码:

#include <iostream>
#include "hash.hpp"

using namespace std;

template<class K>
struct setKeyOfValue
{
	const K& operator()(const K& key)
	{
		return key;
	}
};
void test()
{
	Hash<int, int, setKeyOfValue<int>> ht;
	ht.Insert(2);
	ht.Insert(3);
	ht.Insert(5);
	ht.Insert(15);
	ht.Insert(1);
	ht.Insert(25);
	ht.Insert(13);
	ht.Insert(7);
	ht.Insert(22);
	ht.Insert(23);
	ht.Insert(46);

	HashNode<int>* node = ht.Find(3);
	if (node)
		cout << "node: " << node->_data << endl;
	else
		cout << "node not exist" << endl;

	ht.Erase(3);
	ht.Erase(2);
}
int main()
{
	test();

	return 0;
}

开散列一些桶挂的数据很多，哈希冲突很严重，如何解决？

1）当一个桶中链的长度超过一定值时，将链表换成红黑树；

2）控制负载因子；

三.unordered_map&unordered_set模拟实现

hash.hpp

#pragma once

/
//开散列
/

#include <vector>
#include <string>

using std::vector;
using std::string;

//哈希节点
template<class V>
struct HashNode {
	V _data;
	HashNode<V>* _next;
	HashNode(const V& data)
		:_data(data)
		, _next(nullptr)
	{}
};

//前置声明
template<class K, class V, class KeyOfV, class Hash>
class HashTable;

//迭代器
template<class K,class V,class KeyOfV,class Hash>
struct HashTableIterator {
	typedef HashTableIterator<K, V, KeyOfV,Hash> Self;
	typedef HashNode<V> Node;
	typedef HashTable<K, V, KeyOfV, Hash> HT;

	Node* _node;
	HT* _ht;

	//构造函数传入哈希表指针，因为需要通过遍历哈希表完成迭代器的++
	HashTableIterator(Node* node,HT* ht)
		:_node(node)
		,_ht(ht)
	{}

	V& operator*()
	{
		return _node->_data;
	}

	V* operator&()
	{
		return &_node->_data;
	}

	//前置++
	Self operator++()
	{
		//一个桶没走完，继续走
		if (_node->_next)
			_node = _node->_next;
		//一个桶走完了，走到下一个同进行遍历
		else
		{
			KeyOfV kofv;
			//计算哈希位置
			size_t i = _ht->HashFunc(kofv(_node->_data)) % _ht->_tables.size();
			++i;
			for (;i < _ht->_tables.size(); ++i)
			{
				Node* cur = _ht->_tables[i];
				if (cur)
				{
					_node = cur;
					return *this;
				}
			}

			//哈希桶全为空
			_node = nullptr;
			return *this;
		}
	}

	//后置++
	Self operator++(int tmp)
	{
		//一个桶没走完，继续走
		if (_node->_next)
			_node = _node->_next;
		//一个桶走完了，走到下一个同进行遍历
		else
		{
			KeyOfV kofv;
			//计算哈希位置
			size_t i = _ht->HashFunc(kofv(_node->_data)) % _ht->_tables.size();
			++i;
			for (; i < _ht->_tables.size(); ++i)
			{
				Node* cur = _ht->_tables[i];
				if (cur)
				{
					_node = cur;
					return *this;
				}
			}

			//哈希桶全为空
			_node = nullptr;
			return *this;
		}
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}
};

//将key转换为可以取模类型的默认仿函数
template<class K>
struct _Hash {
	const K& operator()(const K& key)
	{
		return key;
	}
};

//针对string为Key，进行模板特化
template<>
struct _Hash<string> {
	const size_t operator()(const string& key)
	{
		//BKDR算法
		int ret = 0;
		for (int i = 0; i < key.size(); ++i)
		{
			ret *= 131;
			ret += key[i];
		}

		return ret;
	}
};

template<class K, class V, class KeyOfV, class Hash>
class HashTable {
public:
	typedef HashNode<V> Node;
	typedef HashTableIterator<K, V, KeyOfV, Hash> iterator;
	friend struct HashTableIterator<K,V,KeyOfV,Hash>;
	iterator begin()
	{
		for (int i = 0; i < _tables.size(); ++i)
		{
			if (_tables[i])
			{
				return iterator(_tables[i],this);
			}
		}
		return end();
	}

	iterator end()
	{
		return iterator(nullptr,this);
	}

	~HashTable()
	{
		Clear();
	}

	void Clear()
	{
		for (int i = 0; i < _tables.size(); ++i)
		{
			Node* cur = _tables[i];
			while (cur)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
			_tables[i] = nullptr;
		}
	}
	
	//将key转换为可以取模的类型
	size_t HashFunc(const K& key)
	{
		Hash hash;
		return hash(key);
	}

	bool Insert(const V& val)
	{
		KeyOfV kofv;
		//负载因子等于1开始增容，避免大量的哈希冲突
		if (_tables.size() == 0 || _num >= _tables.size())
		{
			//1) 开新表
			std::vector<Node*> newTables;
			size_t newSz = _tables.size() == 0 ? 10 : 2 * _tables.size();
			newTables.resize(newSz, nullptr);

			//2) 将旧表中的数据重新映射
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* curNode = _tables[i];
				while (curNode)
				{
					Node* next = curNode->_next;

					//重新计算位置，并头插到新链表
					size_t index = HashFunc(kofv(curNode->_data)) % newSz;
					curNode->_next = newTables[index];
					newTables[index] = curNode;

					curNode = next;
				}
				_tables[i] = nullptr;
			}

			//3) 释放旧表
			_tables.swap(newTables);
		}

		//1.计算哈希位置
		size_t index = HashFunc(kofv(val)) % _tables.size();

		//2.检查表中是否存在相同的值
		Node* node = _tables[index];
		while (node)
		{
			if (HashFunc(kofv(node->_data)) == HashFunc(kofv(val)))
				return false;
			node = node->_next;
		}

		//3.头插挂到链表中
		Node* cur = new HashNode<V>(val);
		cur->_next = _tables[index];
		_tables[index] = cur;
		++_num;

		return true;
	}

	Node* Find(const K& key)
	{
		KeyOfV kofv;
		if (key < 0)
			return nullptr;
		size_t index = HashFunc(key) % _tables.size();

		Node* cur = _tables[index];
		while (cur)
		{
			if (kofv(cur->_data) == key)
				return cur;
			else
				cur = cur->_next;
		}
		return nullptr;
	}
	
	bool Erase(const K& key)
	{
		KeyOfV kofv;
		if (key < 0 )
			return false;
		size_t index = HashFunc(key) % _tables.size();

		Node* cur = _tables[index];
		Node* prev = nullptr;
		while (cur)
		{
			if (HashFunc(kofv(cur->_data)) == HashFunc(key))
			{
				if (prev == nullptr)
				{
					_tables[key] = cur->_next;
				}
				else
				{
					prev->_next = cur->_next;
				}
				--_num;
				delete cur;

				return true;
			}
			else
			{
				prev = cur;
				cur = cur->_next;
			}
		}

		return false;
	}
private:
	vector<Node*> _tables;
	size_t _num = 0;//有效元素个数
};

MyUnorderedMap.h

#pragma once

#include "hash.hpp"
#include <utility>

namespace cxp
{
	template<class K, class V, class Hash = _Hash<K>>
	class Unordered_map 
	{
	private:
		struct MapKOfV
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};

	public:
		//typename进行模板声明
		typedef typename HashTable<K, K, MapKOfV, Hash>::iterator iterator;

		bool Insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

	private:
		HashTable<K, pair<K,V>, MapKOfV, Hash> _ht;
	};
}

MyUnorderedSet.h

#pragma once

#include "hash.hpp"
#include <utility>

namespace cxp
{
	template<class K, class Hash = _Hash<K>>
	class Unordered_Set
	{
	private:
		struct SetKOfV
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};

	public:
		//typename进行模板声明
		typedef typename HashTable<K, K, SetKOfV, Hash>::iterator iterator;

		bool Insert(const K& key)
		{
			return _ht.Insert(key);
		}

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

	private:
		HashTable<K, K, SetKOfV, Hash> _ht;
	};
}

四.位图

1.位图思想

用每一位来存放某种状态的数据结构，适用于海量数据，数据无重复的场景。通常是用来判断某个数据存不存在。
位图查找的时间复杂度为O(1)

2.位图代码实现

#pragma once

#include <vector>
using std::vector;

namespace cxp
{
	class BitSet
	{
	public:
		//传入位图中要存储的元素个数
		BitSet(int Num)
			:_set((Num >> 5) + 1)
			,_num(0)
		{}

		//将key位的比特位置为1
		void Set(int key)
		{
			int index = key >> 5;	//一个整形有32位，计算在_set所在下标,下标从0开始
			int pos = key % 32;		//取余得到对应位，位从0开始
			
			// 1<<pos之后，pos位为1，将其和_set[index]相或就可以将该位添加进位图
			_set[index] |= (1 << pos);

			++_num;
		}

		//将key位的比特位置为0
		void Reset(int key)
		{
			int index = key >> 5;
			int pos = key % 32;
		
			//1 << pos之后，第pos位为1
			//将其取反之后，为1的位变为0，为0的位变为1
			//和_set[index]相与，则key对应的位变为0，其它位和1相与不变
			_set[index] &= ~(1 << pos);

			--_num;
		}

		//key位的比特位是否为1
		bool isExist(int key)
		{
			int index = key >> 5;
			int pos = key % 32;
			//return (_set[index] >> pos) & 1;
			return _set[index] & (1 << pos);
		}

		//位图中位个数
		int getSum()
		{
			return _set.size() * 32;
		}

		int validSize()
		{
			return _num;
		}

	private:
		vector<int> _set;
		int _num;	//位数
	};
}

3.位图优点

占用存储空间小，查询效率高（O(1)）；

五.布隆过滤器

1.思想

位图+哈希算法：
将一个字符串通过多个不同的哈希算法映射到一个位图多个位置，当这些位置全为1则说明这个字符串存在。

布隆过滤器判断字符串不存在是准确的，判断字符串存在有一定概率出错，因为其他字符串计算的位可能会该字符串造成干扰。
可以通过映射位置个数的增加来减少误判，哈希函数的个数的计算公式如下：

k：哈希函数的个数
m:需要的bit位大小
n:元素个数
k= m / n * ln2
m = k * n * 1.4

2.常见的字符串哈希算法


//BKDR哈希算法
size_t BKDRHash(const string& str)
{
	size_t hash = 0;
	for (int i = 0; i < str.size(); ++i)
	{
		// 也可以乘以31、131、1313、13131、131313..     
		hash = hash * 131 + str[i];
	}
	return hash;
}

//RS哈希算法
size_t RSHash(const string& str)
{
	size_t hash = 0;
	size_t magic = 63689;
	for(int i = 0; i < str.size(); ++i)
	{
		hash = hash * magic + str[i];
		magic *= 378551;
	}
	return hash;
}

//SDBM哈希算法
size_t SDBMHash(const string& str)
{
	size_t hash = 0;
	for(int i =0 ; i<str.size();++i)
	{
		hash = 65599 * hash + str[i];
		//hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;  
	}
	return hash;
}

3.代码实现

bitSet.h

#pragma once

#include <vector>
using std::vector;

namespace cxp
{
	class BitSet
	{
	public:
		//传入位图中要存储的元素个数
		BitSet(int Num)
			:_set((Num >> 5) + 1)
			,_num(0)
		{}

		//将key位的比特位置为1
		void Set(int key)
		{
			int index = key >> 5;	//一个整形有32位，计算所在下标
			int pos = key % 32;		//取余得到对应位
			_set[index] |= (1 << pos);

			++_num;
		}

		//将key位的比特位置为0
		void Reset(int key)
		{
			int index = key >> 5;
			int pos = key % 32;
			_set[index] &= ~(1 << pos);

			--_num;
		}

		//key位的比特位是否为1
		bool isExist(int key)
		{
			int index = key >> 5;
			int pos = key % 32;
			return _set[index] & (1 << pos);
		}

		//位图中位个数
		int getSum()
		{
			return _set.size() * 32;
		}

		int validSize()
		{
			return _num;
		}

	private:
		vector<int> _set;
		int _num;	//位数
	};
}

BloomFilter.h

#pragma once

#include <string>
#include "bitSet.h"

using std::string;
using cxp::BitSet;

struct BKDRHash
{
	size_t operator()(const string& str)
	{
		size_t hash = 0;
		for (int i = 0; i < str.size(); ++i)
		{
			// 也可以乘以31、131、1313、13131、131313..     
			hash = hash * 131 + str[i];
		}
		return hash;
	}
};

struct RSHash
{
	size_t operator()(const string& str)
	{
		size_t hash = 0;
		size_t magic = 63689;
		for(int i = 0; i < str.size(); ++i)
		{
			hash = hash * magic + str[i];
			magic *= 378551;
		}
		return hash;
	}
};


struct SDBMHash
{
	size_t operator()(const string& str)
	{
		size_t hash = 0;
		for(int i =0 ; i<str.size();++i)
		{
			hash = 65599 * hash + str[i];
			//hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;  
		}
		return hash;
	}
};

//查找的时间复杂度为O(k),k为哈希函数的个数
template<class T = string,
	class Hash1 = BKDRHash, 
	class Hash2 = RSHash ,
	class Hash3 = SDBMHash>
class BloomFilter
{
public:
	//num：元素个数
	/*
		k：哈希函数的个数
		m:需要的bit位大小
		n:元素个数
		k= m / n * ln2
		m = k * n * 1.4
		比特位个数=3*1.4*num,取5*num
	*/
	BloomFilter(size_t num)
		:_set(5 * num)
		,_size(5 * num)
	{}

	//使用多个bit位存储信息，标记是否存在该字符串
	void Set(const string& str)
	{
		//通过哈希函数计算的位置可能大于总的比特位个数，需要进行规整（%）
		size_t index1 = Hash1()(str) % _size;
		size_t index2 = Hash2()(str) % _size;
		size_t index3 = Hash3()(str) % _size;

		_set.Set(index1);
		_set.Set(index2);
		_set.Set(index3);
	}

	//判断是否存在有误差
	//判断不存在没有误差
	bool isExist(const string& str)
	{
		size_t index1 = Hash1()(str) % _size;;
		size_t index2 = Hash2()(str) % _size;
		size_t index3 = Hash3()(str) % _size;

		bool result = true;
		result = _set.isExist(index1);
		if (!result)
			return false;
		result = _set.isExist(index2);
		if (!result)
			return false;
		result = _set.isExist(index3);
		if (!result)
			return false;
		
		//字符串可能存在
		return true;
	}

	//布隆过滤器如果需要删除，则要给每个位加一个计数器，不能直接置为0
private:
	BitSet _set;
	size_t _size; //总的比特位个数
};

六.一致性哈希

大佬博客:https://www.zsythink.net/archives/1182

cxpxatu521

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
哈希

哈希一.哈希概念二.哈希冲突解决方案1.闭散列(1)线性探测代码实现(2)二次探测代码实现开散列一.哈希概念一种采用直接寻址方式（哈希函数）高效查找数据的数据结构哈希冲突:不同关键字通过相同哈希函数计算出相同的哈希地址的现象二.哈希冲突解决方案1.闭散列闭散列：也叫开放定址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，那么可以把存放到冲突位置中的下一个空位置中去，而下一个空位置的查找又有线性探测和二次探测两种方式(1)线性探测线性探测：从发生冲突的位置开始，依次
复制链接

扫一扫