[C++]哈希

最新推荐文章于 2024-09-14 22:25:23 发布

杯酒问苍天

最新推荐文章于 2024-09-14 22:25:23 发布

阅读量1k

点赞数 12

分类专栏：数据结构 C++ 文章标签：哈希算法算法

本文链接：https://blog.csdn.net/m0_71071692/article/details/140396685

版权

C++ 同时被 2 个专栏收录

23 篇文章 0 订阅

订阅专栏

数据结构

18 篇文章 0 订阅

订阅专栏

一、概念

在顺序结构以及平衡树中，元素关键码（key）与其存储位置之间没有对应的关系，因此在查找一个元素时，必须要经过关键码（key）的多次比较。顺序查找时间复杂度为O(N)，平衡树中为树的高度，即O(log2 N)，搜索的效率取决于搜索过程中元素的比较次数。
理想的搜索方法：可以不经过任何比较，一次直接从表中得到要搜索的元素。如果构造一种存储结构，通过某种函数(HashFunc)使元素的存储位置与它的关键码（key）之间能够建立一一映射的关系，那么在查找时通过该函数可以很快找到该元素。

哈希函数：

哈希方法中使用的转换函数称为哈希(散列)函数，构造出来的结构称为哈希表(Hash Table)(或者称散列表)

哈希函数设置为：hash(key) = key % capacity; capacity为存储元素底层空间总的大小。

哈希冲突：

对于两个数据元素的关键字和 (i != j)，有 != ，但有：Hash( ) == Hash( )，即：不同关键字通过
相同哈希哈数计算出相同的哈希地址，该种现象称为哈希冲突或哈希碰撞。
把具有不同关键码而具有相同哈希地址的数据元素称为“同义词”。
发生哈希冲突该如何处理呢？

常见处理：

引起哈希冲突的一个原因可能是：哈希函数设计不够合理。哈希函数设计原则：

哈希函数的定义域必须包括需要存储的全部关键码，而如果散列表允许有m个地址时，其值域必须在0到m-1之间
哈希函数计算出来的地址能均匀分布在整个空间中
哈希函数应该比较简单

1. 直接定址法

取关键字的某个线性函数为散列地址：Hash（Key）= A*Key + B 优点：简单、均匀缺点：需要事先知道关键字的分布情况使用场景：适合查找比较小且连续的情况。

2. 除留余数法

设散列表中允许的地址数为m，取一个不大于m，但最接近或者等于m的质数p作为除数，按照哈希函数：Hash(key) = key% p(p<=m),将关键码转换成哈希地址。

此外，还有平方取中法、折叠法、数学分析法等
注意：哈希函数设计的越精妙，产生哈希冲突的可能性就越低，但是无法避免哈希冲突

解决哈希冲突两种常见的方法是：闭散列和开散列

二、闭散列（开放定址法）

1、概念

当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，那么可以把key存放到冲突位置中的“下一个” 空位置中去。

线性探测：

从发生冲突的位置开始，依次向后探测，直到寻找到下一个空位置为止。

优点：实现非常简单，
缺点：一旦发生哈希冲突，所有的冲突连在一起，容易产生数据“堆积”，即：不同关键码占据
了可利用的空位置，使得寻找某关键码的位置需要许多次比较，导致搜索效率降低。如何缓解呢？

二次探测：

线性探测的缺陷是产生冲突的数据堆积在一块儿，这与其找下一个空位置有关系，因为找空位置的方式就是挨着往后逐个去找，因此二次探测为了避免该问题，找下一个空位置的方法为： = (H0 + i^2)% m,或者： = (H0 - i^2)% m。其中：i = 1,2,3…，H0是通过散列函数Hash(x)对元素的关键码 key 进行计算得到的位置，m是表的大小。

2、模拟实现：

namespace Close_Hash //闭散列
{
	enum State 
	{ 
		EMPTY, 
		EXIST, 
		DELETE 
	};

	template<class K, class V>
	class HashTable
	{
		struct Node
		{
			pair<K, V> _val;
			State _state;
		};

	public:
		HashTable(size_t capacity = 3)
			: _ht(capacity)
			, _size(0)
			, _totalSize(0)
		{
			for (size_t i = 0; i < capacity; ++i)
				_ht[i]._state = EMPTY;
		}

		// 插入
		bool Insert(const pair<K, V>& val)
		{
			if (_totalSize == 0)
			{
				_totalSize = 3;
			}
			else if (Find(val.first))
			{
				return false;
			}
			else if (_size * 10 / _totalSize >= 7)
			{
				size_t totalsize = _totalSize * 2;
				HashTable newht;
				newht._ht.resize(totalsize);
				newht._totalSize = totalsize;
				for (size_t i = 0; i < _totalSize; i++)
				{
					if (_ht[i]._state == EXIST)
					{
						newht.Insert(_ht[i]._val);
					}
				}
				Swap(newht);
			}

			size_t hashi = val.first % _totalSize;
			while (_ht[hashi]._state == EXIST)
			{
				hashi++;
				hashi %= _totalSize;
			}

			
			_ht[hashi]._val = val;
			_ht[hashi]._state = EXIST;

			_size++;

		}

		// 查找
		size_t Find(const K& key)
		{
			size_t hashi = key % _totalSize;
			for (int i = 0; i < _totalSize; i++)
			{
				if (_ht[hashi]._state == EXIST && _ht[hashi]._val.first == key)
				{
					return true;
				}

				hashi++;
				hashi %= _totalSize;
			}

			return false;
		}

		// 删除
		bool Erase(const K& key)
		{
			size_t hashi = key % _totalSize;
			for (int i = 0; i < _totalSize; i++)
			{
				if (_ht[hashi]._state == EXIST && _ht[hashi]._val.first == key)
				{
					_ht[hashi]._state = DELETE;
					_size--;
					return true;
				}

				hashi++;
				hashi %= _totalSize;

			}

			return false;
		}

		size_t Size()const
		{
			return _size;
		}

		bool Empty() const
		{
			return _size == 0;
		}

		void Swap(HashTable<K, V>& ht)
		{
			swap(_size, ht._size);
			swap(_totalSize, ht._totalSize);
			_ht.swap(ht._ht);
		}

		void Print()
		{
			for (int i = 0; i < _totalSize; i++)
			{
				if (_ht[i]._state == EXIST)
				{
					cout << "[" << i << "]->" << _ht[i]._val.first << endl;
				}
				else if (_ht[i]._state == EMPTY)
				{
					cout << "[" << i << "]->EMPTY" << endl;
				}
				else if (_ht[i]._state == DELETE)
				{
					cout << "[" << i << "]->DELTET" << endl;
				}
			}
		}

	private:
		size_t HashFunc(const K& key)
		{
			return key % _ht.capacity();
		}

		void CheckCapacity();
	private:
		vector<Node> _ht;
		size_t _size;
		size_t _totalSize;  
	};
}

三、开散列

1、概念（哈希桶）

首先对关键码（key）集合用散列函数计算散列地址，具有相同地址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链接起来，各链表的头结点存储在哈希表中。

2、模拟实现

template<class T>
struct DefHashF
{
	size_t operator()(T& data)
	{
		return (size_t)data;
	}
};

template<>
struct DefHashF<string>
{
	size_t operator()(const string& key)
	{
		// BKDR
		size_t hash = 0;
		for (auto e : key)
		{
			hash *= 31;
			hash += e;
		}

		cout << key << ":" << hash << endl;
		return hash;
	}
};

template<class T>
struct HashBucketNode
{
	HashBucketNode(const T& data = T())
		:_data(data)
		,_pNext(nullptr)
	{}

	T _data;
	HashBucketNode<T>* _pNext;
};


	template<class K, class V, class KeyOfValue, class HF>
	class HashBucket;


	template <class K, class V, class KeyOfValue, class HF = DefHashF<K>>
	struct Iterator
	{
		typedef HashBucketNode<V>* PNode;
		typedef Iterator<K, V, KeyOfValue, HF> Self;

		Iterator(PNode pNode = nullptr, HashBucket<K, V, KeyOfValue, HF>* pHt = nullptr)
		{
			_pNode = pNode;
			_pHt = pHt;
		}

		Self& operator++()
		{
			// 当前迭代器所指节点后还有节点时直接取其下一个节点
			if (_pNode->_pNext)
				_pNode = _pNode->_pNext;
			else
			{
				// 找下一个不空的桶，返回该桶中第一个节点
				size_t bucketNo = _pHt->HF(KeyOfValue()(_pNode->_data)) + 1;
				for (; bucketNo < _pHt->BucketCount(); ++bucketNo)
				{
					if (_pNode = _pHt->_ht[bucketNo])
						break;
				}
			}

			return *this;
		}

		Self operator++(int)
		{
			Self cur = *this;
			if (_pNode->_pNext)
				_pNode = _pNode->_pNext;
			else
			{
				// 找下一个不空的桶，返回该桶中第一个节点
				size_t bucketNo = _pHt->HashFunc(KeyOfValue()(_pNode->_data)) + 1;
				for (; bucketNo < _pHt->BucketCount(); ++bucketNo)
				{
					if (_pNode = _pHt->_ht[bucketNo])
						break;
				}
			}

			return cur;
		}

		V& operator*()
		{
			return _pNode->_data;
		}

		V* operator->()
		{
			return &(_pNode->_data);
		}

		bool operator==(const Self& it) const
		{
			return _pNode == it._pNode;
		}

		bool operator!=(const Self& it) const
		{
			return _pNode != it._pNode;
		}


		PNode _pNode;             // 当前迭代器关联的节点
		const HashBucket<K, V, KeyOfValue, HF>* _pHt;         // 哈希桶--主要是为了找下一个空桶时候方便
	};

	
	template<class K, class V, class KeyOfValue, class HF>
	class HashBucket
	{
		template <class K, class V, class KeyOfValue, class HF>
		friend struct Iterator;
		KeyOfValue kov;
		HF hf;
		typedef Iterator<K, V, KeyOfValue, HF> iterator;
		typedef HashBucketNode<V> Node;
	public:
		HashBucket()
		{
			_tables.resize(10);
		}

		iterator begin()
		{
			if (_n == 0)
			{
				return iterator(nullptr, this);
			}

			//哈希表中有数据
			for (int i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					return iterator(_tables[i], this);
				}
			}
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}

		size_t size()const
		{
			return _n;
		}

		bool empty()const
		{
			return _n == 0;
		}


		pair<iterator, bool> InsertUnique(const V& data)
		{
			K key = kov(data);
			iterator it = Find(key);
			if (it._pNode != nullptr)
			{
				return make_pair(it, false);
			}

			if (_n == _tables.size())
			{
				//扩容
				vector<Node*> newt;
				newt.resize(_tables.size() * 2, nullptr);

				for (int i = 0; i < _tables.size(); i++)
				{
					if (_tables[i])
					{
						//桶里有数据
						Node* cur = _tables[i];
						Node* next = nullptr;
						while (cur)
						{
							//放到新桶中
							size_t hashi = kov(cur->_data) % newt.size();
							if (newt[hashi] == nullptr)
							{
								next = cur->_pNext;	//记录下一个数据
								cur->_pNext = nullptr;
								newt[hashi] = cur;

								cur = next;
							}
							else
							{
								Node* ntcur = newt[hashi];
								while (ntcur->_pNext)
								{
									ntcur = ntcur->_pNext;
								}
								next = cur->_pNext;
								ntcur->_pNext = cur;
								cur->_pNext = nullptr;

								cur = next;

							}


						}

						_tables[i] = nullptr;
					}
				}
				swap(newt, _tables);

			}

			Node* newnode = new Node;
			newnode->_data = data;
			newnode->_pNext = nullptr;

			size_t hashi = kov(data) % _tables.size();
			if (_tables[hashi] == nullptr)
			{
				_tables[hashi] = newnode;
			}
			else
			{
				Node* cur = _tables[hashi];

				while (cur->_pNext)
				{
					cur = cur->_pNext;
				}

				cur->_pNext = newnode;
				_n++;

			}
			return make_pair(iterator(newnode, this), true);


		}

		iterator Find(const K& key)
		{
			size_t hashi = key % _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kov(cur->_data) == key)
				{
					return iterator(cur, this);
				}

				cur = cur->_pNext;
			}

			return iterator(nullptr, this);
		}

		size_t Count(const K& key)
		{

		}

		pair<iterator, bool> Insert(const V value)
		{
			K key = kov(value);
			iterator it = Find(key);
			if (it._pNode != nullptr)
			{
				return make_pair(it, false);
			}

			if (_n == _tables.size())
			{
				//扩容
				vector<Node*> newt;
				newt.resize(_tables.size() * 2, nullptr);

				for (int i = 0; i < _tables.size(); i++)
				{
					if (_tables[i])
					{
						//桶里有数据
						Node* cur = _tables[i];
						Node* next = nullptr;
						while (cur)
						{
							//放到新桶中
							size_t hashi = kov(cur->_data) % newt.size();
							if (newt[hashi] == nullptr)
							{
								next = cur->_pNext;	//记录下一个数据
								cur->_pNext = nullptr;
								newt[hashi] = cur;

								cur = next;
							}
							else
							{
								Node* ntcur = newt[hashi];
								while (ntcur->_pNext)
								{
									ntcur = ntcur->_pNext;
								}
								next = cur->_pNext;
								ntcur->_pNext = cur;
								cur->_pNext = nullptr;

								cur = next;

							}


						}

						_tables[i] = nullptr;
					}
				}
				swap(newt, _tables);

			}

			Node* newnode = new Node;
			newnode->_data = value;
			newnode->_pNext = nullptr;

			size_t hashi = kov(value) % _tables.size();
			if (_tables[hashi] == nullptr)
			{
				_tables[hashi] = newnode;
			}
			else
			{
				Node* cur = _tables[hashi];

				while (cur->_pNext)
				{
					cur = cur->_pNext;
				}

				cur->_pNext = newnode;


			}

			_n++;
			iterator nit(newnode, this);
			return make_pair(nit, true);

		}

		iterator Erase(iterator position)
		{
			Node* pos = position._pNode;

			size_t hashi = kov(pos->_data) % _tables.size();
			if (_tables[hashi] == nullptr)
			{
				//该桶无数据
				return iterator(nullptr, this);
			}
			else
			{
				Node* cur = _tables[hashi];
				if (cur == pos)
				{
					//释放的是该桶第一个结点,即_tables[hashi]
					if (cur->_pNext == nullptr)
					{
						delete cur;
						_n--;
						_tables[hashi] = nullptr;
						for (int i = hashi + 1; i < _tables.size(); i++)
						{
							//返回下一个结点
							if (_tables[i])
							{
								return iterator(_tables[i], this);
							}
						}

						return iterator(nullptr, this);
					}
					else
					{
						Node* pnext = pos->_pNext;
						delete pos;
						_n--;
						_tables[hashi] = pnext;

						return iterator(pnext, this);
					}
				}

				//pos不在_tables[hashi]的位置，查找pos是否在该桶中
				Node* prev = cur;
				cur = cur->_pNext;
				Node* next = cur;
				while (cur)
				{
					next = cur->_pNext;
					if (cur == pos)
					{
						prev->_pNext = next;
						delete cur;
						_n--;

						//返回下一个结点
						if (next)
						{
							return iterator(next, this);
						}
						else
						{
							for (int i = hashi + 1; i < _tables.size(); i++)
							{
								//返回下一个结点
								if (_tables[i])
								{
									return iterator(_tables[i], this);
								}
							}

							return iterator(nullptr, this);
						}
					}

					prev = cur;
					cur = cur->_pNext;
					if (cur)
						next = cur->_pNext;
				}

				//没找到
				return iterator(nullptr, this);
			}
		}

		size_t BucketCount()
		{
			size_t cnt = 0;
			for (int i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					cnt++;
				}
			}

			return cnt;
		}

		size_t BucketSize(K& key)
		{
			size_t hashi = key % _tables.size();
			Node* cur = _tables[hashi];
			size_t cnt = 0;

			while (cur)
			{
				cnt++;
				cur = cur->_pNext;
			}

			return cnt;
		}

	private:
		vector<Node*> _tables;
		size_t _n = 0;
	};