数据结构——哈希

最新推荐文章于 2024-04-15 08:17:08 发布

小琛

最新推荐文章于 2024-04-15 08:17:08 发布

阅读量140

点赞数

分类专栏： C++ 数据结构

本文链接：https://blog.csdn.net/qq_44745063/article/details/109226503

版权

C++ 同时被 2 个专栏收录

33 篇文章 1 订阅

订阅专栏

数据结构

20 篇文章 0 订阅

订阅专栏

作者：小琛
欢迎转载，请标明出处

文章目录

哈希思想

在之前的存储结构中（搜索树、红黑树），都有其缺点。

例如：在查找一个元素时，必须要经过关键值的多次比较。顺序查找时间复杂度为O(N)，平衡树中为树的高度，即O(logN )，搜索的效率取决于搜索过程中元素的比较次数。这与其本身特质有关，它们毕竟是一种类似遍历的方法查找。

在这里插入图片描述
何为理想的搜索方法？
可以不经过任何比较，一次直接从表中得到要搜索的元素。 通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系，那么在查找时通过该函数可以很快找到该元素。
关键字：一一映射

例如：设定一个数组，该数组的每个成员对应不同的值，查找9时，直接去该数组第9个元素查找
在这里插入图片描述

哈希函数

常见哈希函数

直接定制法
取关键字的某个线性函数为散列地址：Hash（Key）= A*Key + B
优点：简单、均匀缺点：需要事先知道关键字的分布情况使用场景：适合查找比较小且连续的情况
除留余数法
设散列表中允许的地址数为m，取一个不大于m，但最接近或者等于m的质数p作为除数，按照哈希函数：Hash(key) = key% p(p<=m),将关键码转换成哈希地址

哈希冲突

对于两个数据元素的关键字j和k，有Hash(j)==Hash(k)。即：不同关键字通过相同哈希哈数计算出相同的哈希地址，该种现象称为哈希冲突或哈希碰撞。
把具有不同关键码而具有相同哈希地址的数据元素称为“同义词”。
例如：
在这里插入图片描述

闭散列解决哈希冲突

线性探测
先按照哈希函数寻找对应的位置，如果该位置中没有元素则直接插入新元素，如果该位置中有元素发生哈希冲突，使用线性探测找到下一个空位置，插入新元素。
二次探测
线性探测的缺陷是产生冲突的数据堆积在一块，这与其找下一个空位置有关系，因为找空位置的方式就是挨着往后逐个去找，因此二次探测为了避免该问题，找下一个空位置的方法为：H = (J + K)% m,
或者：H = ( J-K )% m。其中：i = 1,2,3…，是通过散列函数Hash(x)对元素的关键码 key 进行计算得到的位置，m是表的大小。

研究表明：当表的长度为质数且表装载因子a不超过0.5时，新的表项一定能够插入，而且任何一个位置都不会被探查两次。因此只要表中有一半的空位置，就不会存在表满的问题。在搜索时可以不考虑表装满的情况，但在插入时必须确保表的装载因子a不超过0.5，如果超出必须考虑增容。

因此：闭散列哈希表的最大缺陷就是空间利用率低，是一种典型的空间换时间的做法

namespace CLOSE_HASH
{
	// unordered_set<K>   ->HashTable<K, K>
	// unordered_map<K,V> ->HashTable<K, pair<K,V>>
	enum State
	{
		EMPTY,
		EXITS,
		DELETE,
	};

	template<class T>
	struct HashData
	{
		T _data;
		State _state;
	};

	template<class K, class T, class KeyOfT>
	class HashTable
	{
		typedef HashData<T> HashData;
	public:
		bool Insert(const T& d)
		{
			KeyOfT koft;

			// 负载因子 = 表中数据/表的大小 衡量哈希表满的程度
			// 表越接近满，插入数据越容易冲突，冲突越多，效率越低
			// 哈希表并不是满了才增容，开放定制法中，一般负载因子到了0.7左右就开始增容
			// 负载因子越小，冲突概率越低，整体效率越高，但是负载因子越小，浪费的空间越大，
			// 所以负载因子一般取一个折中值。
			if (_tables.size() == 0 || _num * 10 / _tables.size() >= 7)
			{
				// 1.开2倍大小的新表
				// 2.遍历旧表的数据，重新计算在新表中位置
				// 3.释放旧表
				//vector<HashData> newtables;
				//size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				//newtables.resize(newsize);
				//for (size_t i = 0; i < _tables.size(); ++i)
				//{
				//	if (_tables[i]._state == EXITS)
				//	{
				//		// 计算在新表中的位置并处理冲突
				//		size_t index = koft(_tables[i]._data) % newtables.size();
				//		while (newtables[index]._state == EXITS)
				//		{
				//			++index;
				//			if (index == _tables.size())
				//			{
				//				index = 0;
				//			}
				//		}

				//		newtables[index] = _tables[i];
				//	}
				//}
				//_tables.swap(newtables);

				HashTable<K, T, KeyOfT> newht;
				size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				newht._tables.resize(newsize);
				for (size_t i = 0; i < _tables.size(); ++i)
				{
					if (_tables[i]._state == EXITS)
					{
						newht.Insert(_tables[i]._data);
					}
				}

				_tables.swap(newht._tables);
			}

			// 线性探测
			 计算d中的key在表中映射的位置
			//size_t  index = koft(d) % _tables.size();
			//while (_tables[index]._state == EXITS)
			//{
			//	if (koft(_tables[index]._data) == koft(d))
			//	{
			//		return false;
			//	}

			//	++index;
			//	if (index == _tables.size())
			//	{
			//		index = 0;
			//	}
			//}

			//_tables[index]._data = d;
			//_tables[index]._state = EXITS;
			//_num++;

			// 二次探测
			// 计算d中的key在表中映射的位置
			size_t  start = koft(d) % _tables.size();
			size_t index = start;
			int i = 1;
			while (_tables[index]._state == EXITS)
			{
				if (koft(_tables[index]._data) == koft(d))
				{
					return false;
				}

				index = start + i*i;
				++i;
				index %= _tables.size();
			}

			_tables[index]._data = d;
			_tables[index]._state = EXITS;
			_num++;

			return true;
		}

		HashData* Find(const K& key)
		{
			KeyOfT koft;
			// 计算d中的key在表中映射的位置
			size_t index = key % _tables.size();
			while (_tables[index]._state != EMPTY)
			{
				if (koft(_tables[index]._data) == key)
				{
					if (_tables[index]._state == EXITS)
					{
						return &_tables[index];
					}
					else if (_tables[index]._state == DELETE)
					{
						return nullptr;
					}
				}

				++index;
				if (index == _tables.size())
				{
					index = 0;
				}
			}

			return nullptr;
		}

		bool Erase(const K& key)
		{
			HashData* ret = Find(key);
			if (ret)
			{
				ret->_state = DELETE;
				--_num;
				return true;
			}
			else
			{
				return false;
			}
		}

	private:
		vector<HashData> _tables;
		size_t    _num = 0;    // 存了几个有效数据
	};

开散列解决哈希冲突问题(重点)

开散列的概念
开散列通常称为哈希桶，首先对关键码集合用散列函数计算散列地址，具有相同地址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链接起来，各链表的头结点存储在哈希表中。

通俗的讲：哈希桶就是在原本的映射条件下，将发生冲突的元素挂接在一起。具体实现就是数组内存放节点指针，将若干个取余后相同的节点链起来。

	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	// 前置声明
	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;
	
	template<class K, class T, class KeyOfT, class Hash>
	struct __HashTableIterator 
	{
		typedef __HashTableIterator<K, T, KeyOfT, Hash> Self;
		typedef HashTable<K, T, KeyOfT, Hash> HT;
		typedef HashNode<T> Node;
		Node* _node;
		HT* _pht; 

		__HashTableIterator(Node* node, HT* pht)
			:_node(node)
			, _pht(pht)
		{}

		T& operator*()
		{
			return _node->_data;
		}

		T* operator->()
		{
			return &_node->_data;
		}

		Self operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else
			{
				// 如果一个桶走完了，找到下一个桶继续遍历
				KeyOfT koft;
				size_t i = _pht->HashFunc(koft(_node->_data)) % _pht->_tables.size();
				++i;
				for (; i < _pht->_tables.size(); i++)
				{
					Node* cur = _pht->_tables[i];
					if (cur)
					{
						_node = cur;
						return *this;
					}
				}

				_node = nullptr;
			}

			return *this;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}
	};

	template<class K>
	struct _Hash
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};

	// 特化
	template<>
	struct _Hash < string >
	{
		size_t operator()(const string& key)
		{
			// BKDR Hash
			size_t hash = 0;
			for (size_t i = 0; i < key.size(); ++i)
			{
				hash *= 131;
				hash += key[i];
			}

			return hash;
		}
	};

	//struct _HashString
	//{
	//	size_t operator()(const string& key)
	//	{
	//		// BKDR Hash
	//		size_t hash = 0;
	//		for (size_t i = 0; i < key.size(); ++i)
	//		{
	//			hash *= 131;
	//			hash += key[i];
	//		}

	//		return hash;
	//	}
	//};

	template<class K, class T, class KeyOfT, class Hash>
	class HashTable
	{
		typedef HashNode<T> Node;
	public:
		friend struct __HashTableIterator < K, T, KeyOfT, Hash>;
		typedef __HashTableIterator<K, T, KeyOfT, Hash> iterator;

		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					return iterator(_tables[i], this);
				}
			}

			return end();
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}

		~HashTable()
		{
			Clear();
		}

		void Clear()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}

				_tables[i] = nullptr;
			}
		}

		size_t HashFunc(const K& key)
		{
			Hash hash;
			return hash(key);
		}

		pair<iterator, bool> Insert(const T& data)
		{
			KeyOfT koft;
			// 如果负载因子等于1，则增容，避免大量的哈希冲突
			if (_tables.size() == _num)
			{
				// 1.开2倍大小的新表（不一定是2倍）
				// 2.遍历旧表的数据，重新计算在新表中位置
				// 3.释放旧表
				vector<Node*> newtables;
				size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				newtables.resize(newsize);
				for (size_t i = 0; i < _tables.size(); ++i)
				{
					// 将旧表中的节点取下来重新计算在新表中的位置，并插入进去
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;
						size_t index = HashFunc(koft(cur->_data)) % newtables.size();
						cur->_next = newtables[index];
						newtables[index] = cur;
						
						cur = next;
					}

					_tables[i] = nullptr;
				}

				_tables.swap(newtables);
			}

			// 计算数据在表中映射的位置
			size_t index = HashFunc(koft(data)) % _tables.size();

			// 1、先查找这个值在不在表中
			Node* cur = _tables[index];
			while (cur)
			{
				if (koft(cur->_data) == koft(data))
				{
					return make_pair(iterator(cur, this), false);
				}
				else
				{
					cur = cur->_next;
				}
			}

			// 2、头插到挂的链表中 (尾插也可以)
			Node* newnode = new Node(data);
			newnode->_next = _tables[index];
			_tables[index] = newnode;

			++_num;
			return make_pair(iterator(newnode, this), false);;
		}

		Node* Find(const K& key)
		{
			KeyOfT koft;
			size_t index = HashFunc(key) % _tables.size();
			Node* cur = _tables[index];
			while (cur)
			{
				if (koft(cur->_data) == key)
				{
					return cur;
				}
				else
				{
					cur = cur->_next;
				}
			}

			return nullptr;
		}

		bool Erase(const K& key)
		{
			KeyOfT koft;
			size_t index = HashFunc(key) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[index];
			while (cur)
			{
				if (koft(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						// 表示要删的值在第一个节点
						_tables[index] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					delete cur;

					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}

			return false;
		}

	private:
		vector<Node*> _tables;
		size_t _num = 0;   // 记录表中存储的数据个数
	};