学习-＞C++篇十六：哈希表与unordered_map、unordered_set

且随疾风前行->

已于 2022-11-17 13:21:31 修改

阅读量575

点赞数 3

分类专栏：数据结构 C++ 文章标签：散列表数据结构

于 2022-11-17 13:20:27 首次发布

本文链接：https://blog.csdn.net/u014801954/article/details/127480201

版权

C++ 同时被 2 个专栏收录

22 篇文章 2 订阅

订阅专栏

数据结构

6 篇文章 0 订阅

订阅专栏

3.封装unordered_map,unordered_set

1.哈希表概念

顺序结构以及平衡树中，元素键值与其存储位置之间没有对应的关系，因此在查找一个元素

时，必须要经过键值的多次比较。顺序查找时间复杂度为O(N)，平衡树中为树的高度，即

O(log2N)，搜索的效率取决于搜索过程中元素的比较次数，倘若无需进行多次比较，直接从表中获得所需的元素，这就需将键值和存储位置建立映射关系，以便快速查找元素。

该方法为 哈希法（散列法），哈希方法中使用将键值转换为存储位置的转换函数称为哈希(散列)函数，构造出来的结构称为 哈希表(或散列表）。

哈希冲突：不同关键字通过相同哈希函数计算出相同的哈希地址，该种现象称为哈希冲突或哈希碰撞。

2.常用哈希函数：

1. 直接定址法--(常用) 取关键字的某个线性函数为散列地址：Hash（Key）= A*Key + B

优点：简单、均匀

缺点：需要事先知道关键字的分布情况

使用场景：适合查找比较小且连续的情况

2. 除留余数法--(常用) 设散列表中的地址数为m，取一个不大于m，但最接近或者等于m的质数p作为除数，按照哈希函数：Hash(key) = key% p(p<=m),将关键码转换成哈希地址。

解决哈希冲突的两个方法：

一.闭散列法（开放定址法）：

1.线性探测法：

代码模拟实现：

#pragma once
#include<unordered_map>
#include<vector>
using std::vector;
using std::pair;

//线性探测法
// 
//给每一个空间标记
enum state{EXIST,DELETE,EMPTY};
template<class K,class V>
class hashtable
{
	//表中存储elem结构，包含pair键值对和标记状态的state
	struct elem
	{
		pair<K, V> _val;
		state _state;
	};

public:
	hashtable(size_t capacity=3)
		:_ht(capacity)
		,_size(0)
	{
		for (size_t i = 0; i < capacity; i++)
		{
			_ht[i]._state = EMPTY;
		}
	}
	~hashtable()
	{}

	bool Insert(const pair<K, V>& val)
	{
		checkCapacity();
		size_t hashi = val.first % _ht.size();
		while (_ht[hashi]._state != EMPTY)
		{
			if (_ht[hashi]._state == EXIST 
				&& _ht[hashi]._val.first == val.first)
				return false;
			++hashi;
			if (hashi == _ht.size())
				hashi = 0;
		}
		_ht[hashi]._state = EXIST;
		_ht[hashi]._val = val;
		_size++;
		return true;
	}

	elem* Find(const K& k)
	{
		if (_ht.size() == 0)
			return nullptr;
		size_t hashi = k % _ht.size();
		while (_ht[hashi]._state != EMPTY)
		{
			if (_ht[hashi]._state == EXIST && _ht[hashi]._val.first == k)
				return &_ht[hashi];
			++hashi;
			if (hashi == _ht.size())
				hashi = 0;
		}
		return nullptr;
	}
private:
	void checkCapacity()
	{
		if (_size * 10 / _ht.size() >= 7)
		{
			size_t newsize = _ht.size() * 2;
			hashtable<K,V>newht(newsize);
			for (auto e : _ht)
			{
				if (e._state == EXIST)
					newht.Insert({ e._val.first,e._val.second });
			}
			newht._size = _size;
			newht._ht.swap(_ht);
		}
	}

	bool Erase(const K&k)
	{
		if (elem* p = Find(k))
		{
			p->_state = DELETE;
			--_size;
			return true;
		}
		return false;
	}
	size_t HashFunc(const K& k)
	{
		return k % _ht.size();
	}
private:
	vector<elem>_ht;
	size_t _size;
};

2.二次探测法：

与线性探测法类似，将插入和查找的探测元素的顺序变成hashi，hashi+1^2，hashi+2^2,hashi+3^2......即可，记得计算好的哈希地址对哈希表的总大小取模，代码如下：

//二次探测法
	bool Insert(const pair<K, V>& val)
	{
		checkCapacity();
		size_t hashi = val.first % _ht.size();
		size_t index = hashi;
		int i = 0;
		while (_ht[hashi]._state != EMPTY)
		{
			if (_ht[hashi]._state == EXIST
				&& _ht[hashi]._val.first == val.first)
				return false;
			++i;
			hashi=index+i*i;
			hashi %= _ht.size();
		}
		_ht[hashi]._state = EXIST;
		_ht[hashi]._val = val;
		_size++;
		return true;
	}


	//二次探测法
	elem* Find(const K& k)
	{
		if (_ht.size() == 0)
			return nullptr;
		size_t hashi = k % _ht.size();
		size_t index = hashi;
		int i = 0;
		while (_ht[hashi]._state != EMPTY)
		{
			if (_ht[hashi]._state == EXIST && _ht[hashi]._val.first == k)
				return &_ht[hashi];
			i++;
			hashi=index+i*i;
			hashi %= _ht.size();
		}
		return nullptr;
	}

二.开散列法（哈希桶）：

开散列法：首先对关键码的集合通过哈希函数映射到对应地址，具有相同地址的的关键码归于同一个子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链接起来，各链表的头结点存储在哈希表中。（即将哈希冲突的元素放在单链表中）

简单实现：

namespace openhash
{
	template<class V>
	struct hashBucketNode
	{
		hashBucketNode(const V& data)
			:_next(nullptr)
			, _data(data)
		{}

		hashBucketNode<V>* _next;
		V _data;
	};

	template<class V>
	class hashBucket
	{
		typedef hashBucketNode<V> Node;
	public:
		hashBucket(size_t capacity = 3)
			:_size(0)
		{
			_ht.resize(capacity, nullptr);
		}

		Node* insert(const V& data)
		{
			//几号桶
			size_t Bucketi = data % _ht.size();
			Node* cur = _ht[Bucketi];
			while (cur)
			{
				if (cur->_data == data)
					return cur;
				cur = cur->_next;
			}
			Node* newnode = new Node(data);
			newnode->_next = _ht[Bucketi];
			_ht[Bucketi] = newnode;
			_size++;
			return newnode;
		}

		Node* erase(const V& data)
		{
			size_t Bucketi = data % _ht.size();
			Node* cur = _ht[Bucketi], prev = nullptr;
			while (cur)
			{
				if (cur->_data == data)
				{
					if (prev)
						prev->_next = cur->_next;
					else
						_ht[Bucketi] = cur->_next;
					//返回删除元素的下一个节点
					prev = cur->_next;
					delete cur;
					--_size;
					return prev;
				}
			}
			return nullptr;
		}

		Node* Find(const V& data)
		{
			size_t Bucketi = data % _ht.size();
			Node* cur = _ht[Bucketi];
			while (cur)
			{
				if (cur->_data == data)
					return cur;
				cur = cur->_next;
			}
			return nullptr;
		}
		size_t size()const { return _size; }
		bool Empty()const { return _size == 0; }
		void clear()
		{
			for (int i = 0; i < _ht.size(); i++)
			{
				while (_ht[i])
				{
					Node* cur = _ht[i];
					_ht[i] = cur->_next;
					delete cur;
				}
			}
			_size = 0;
		}
		~hashBucket() { clear(); }
	private:
		size_t hashFunc(const V& data) { return data % _ht.size(); }
	private:
		vector<Node*> _ht;
		size_t _size;
	};
}

开散列法的扩容：

因为扩容时桶的总数改变了，每个元素需要重新映射地址，所以有大的性能消耗。理想情况下是每个桶一个元素，于是，可以在桶的个数等于存储的元素个数的时候进行哈希表的扩容操作。

扩容代码：（将原链表节点一个个插入到新的哈希桶）

size_t BucketCount()const
		{
			size_t bc = 0;
			for (auto p : _ht)
				if (p)++bc;
			return bc;
		}
	private:
		void checkCapacity()
		{
			if (BucketCount() == _size)
			{
				vector<Node*>newht(_size == 0 ? 3:_size * 2);
				for (auto& p : _ht)
				{
					Node* pnode = p;
					while (pnode)
					{
						p = p->_next;
						size_t Bucketi = pnode->_data % newht.size();
						pnode->_next = newht[Bucketi];
						newht[Bucketi] = pnode;
						pnode = p;
					}
				}
				_ht.swap(newht);
			}
		}

3.封装unordered_map,unordered_set

上述实现是将key取模得到存储地址，如果要处理字符串，则需提供字符串哈希函数，如果要处理自定义类型也需提供哈希函数，可以通过传入仿函数实现，为封装unordered_map和unordered_set还需实现迭代器，与红黑树类似，还需有一个取value中key的仿函数。

简单实现封装如下：

//hashtable11-3.h

namespace openhash
{
	template<class T>
	struct HashFunc
	{
		//默认的哈希函数
		size_t operator()(const T& val) { return val; }
	};
	//针对字符串哈希函数->模板特化
	template<>
	struct HashFunc<string>
	{
		size_t operator()(const string& s)
		{
			const char* str = s.c_str();
			size_t seed = 31;
			size_t hash = 0;
			while (*str)
			{
				hash = hash * seed + (*str++);
			}
			return hash;
		}
	};

	//定义节点：存储的T可能是K,也可能是pair<K,V>
	template<class T>
	struct HashNode
	{
		HashNode(const T&data)
			:_next(nullptr)
			,_data(data)
		{}
		HashNode<T>* _next;
		T _data;
	};

	//迭代器需用到哈希桶，要做前置声明
	template<class K,class T,class KeyOfT,class HashFunc>
	class HashBucket;

	//实现迭代器，其中有一个节点指针和哈希桶的指针
	template<class K,class T,class KeyOfT,class HashFunc>
	class __HBIterator
	{
		typedef HashNode<T> Node;
		typedef HashBucket<K, T, KeyOfT, HashFunc>* Phb;
		typedef __HBIterator<K, T, KeyOfT, HashFunc> Self;
	public:
		__HBIterator(){}
		__HBIterator(Node*node,Phb phb)
			:_node(node),_phb(phb)
		{}

		//单向迭代器只需实现++
		Self& operator++()//前置加加
		{
			assert(_node);//当_node为空(即end，不可以++)
			if (_node->_next)
				_node = _node->_next;
			else
			{
				KeyOfT kot;
				HashFunc hf;
				size_t sz = _phb->_table.size();
				//计算下一个桶
				size_t hashi = hf(kot(_node->_data)) % sz + 1;
				//寻找下一个不为nullptr的桶
				while (hashi < sz && !_phb->_table[hashi])
					++hashi;
				if (hashi == sz)
					_node = nullptr;//已经走完最后一个哈希桶
				else
					_node = _phb->_table[hashi];
			}
			return *this;
		}

		Self& operator++(int)//后置加加
		{
			Self ret(_node, _phb);
			operator++();
			return ret;
		}

		T& operator*() { return _node->_data; }
		T* operator->() { return &_node->_data; }
		bool operator!=(const Self& s) const { return _node != s._node; }
		bool operator==(const Self& s) const { return _node == s._node; }
	private:
		Node* _node;
		Phb _phb;
	};

	template<class K,class T,class KeyOfT,class HashFunc>
	class HashBucket
	{
		template<class K,class T,class KeyOfT,class hashFunc>
		friend class __HBIterator;//将迭代器作为哈希桶的友元类模板，以便能访问哈希表(遍历桶)
		typedef HashNode<T> Node;
	public:
		typedef __HBIterator<K, T, KeyOfT, HashFunc> Iterator;
		Iterator begin()
		{
			for (auto p : _table)
				if (p)return Iterator(p, this);
			return Iterator(nullptr, this);
		}
		Iterator end() { return Iterator(nullptr, this); }

		HashBucket(size_t capacity = 0)
			:_table(GetNextPrime(_table.size()),nullptr)
			,_size(0)
		{}

		~HashBucket() { Clear(); }

		void Clear()
		{
			for (size_t i = 0; i < _table.size(); i++)
			{
				while (_table[i])
				{
					Node* cur = _table[i];
					_table[i] = cur->_next;
					delete cur;
				}
			}
			_size = 0;
		}

		Iterator find(const K& key)
		{
			HashFunc hf;
			size_t hashi = hf(key) % _table.size();
			Node* cur = _table[hashi];
			while (cur)
			{
				if (KeyOfT()(cur->_data) == key)
					return Iterator(cur, this);
				cur = cur->_next;
			}
			return Iterator(nullptr, this);
		}

		pair<Iterator, bool> insert(const T& data)
		{
			Iterator it = find(KeyOfT()(data));
			if (it != end())
				return make_pair(it, false);
			checkCapacity();
			Node* newnode = new Node(data);
			size_t hashi = HashFunc()(KeyOfT()(data)) % _table.size();
			newnode->_next = _table[hashi];
			_table[hashi] = newnode;
			++_size;
			return make_pair(Iterator(newnode, this), true);
		}

		size_t erase(const K& key)
		{
			KeyOfT kot;
			HashFunc hf;
			size_t hashi = hf(key)%_table.size();
			Node* prev = nullptr;
			Node* cur = _table[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev)
						prev->_next = cur->_next;
					else
						_table[hashi] = cur->_next;
					delete cur;
					--_size;
					return 1;
				}
				prev = cur;
				cur = cur->_next;
			}
			return 0;
		}

	private:
		//将桶的数量设置为一个素数，据说能减少哈希冲突
		size_t GetNextPrime(size_t prime)
		{
			const int PRIMECOUNT = 28;
			//素数表
			static const size_t primeList[PRIMECOUNT] =
			{
				53ul, 97ul, 193ul, 389ul, 769ul,
				1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
				49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
				1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
				50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
				1610612741ul, 3221225473ul, 4294967291ul
			};

			// 获取比prime大那一个素数
			size_t i = 0;
			for (; i < PRIMECOUNT; ++i)
			{
				if (primeList[i] > prime)
					return primeList[i];
			}
			return primeList[i];
		}

		void checkCapacity()
		{
			if (_size == _table.size())
			{
				KeyOfT kot;
				HashFunc hf;
				vector<Node*>newtable(GetNextPrime(_table.size()),nullptr);
				for (auto p : _table)
				{
					auto cur = p;
					while (cur)
					{
						Node* next = cur->_next;
						cur->_next = newtable[hf(kot(p->_data))];
						newtable[hf(kot(p->_data))] = cur;
						cur = next;
					}
				}
				newtable.swap(_table);
			}
		}
	private:
		vector<Node*>_table;
		size_t _size;
	};
}

封装unordered_map和unordered_set类似：

unordered_map主要要多实现一个operator[]函数，注意这个函数是先调用insert实现的。

#pragma once
#include"hashtable11-3.h"
namespace openhash
{
	template<class K, class V, class HashFunc = HashFunc<K> >
	class unordered_map
	{
		struct MapKeyOfT { const K& operator()(const pair<K, V>& p) { return p.first; } };
	public:
		typedef typename HashBucket<K, pair<K, V>, MapKeyOfT, HashFunc>::Iterator iterator;
		iterator begin() { return _hb.begin(); }
		iterator end() { return _hb.end(); }

		iterator find(const K& key) {
			return _hb.find(key);
		}
		size_t erase(const K& key) {
			return _hb.erase(key);
		}
		pair<iterator, bool>insert(const pair<K, V>& p) {
			return _hb.insert(p);
		}
		V& operator[](const K& k) {
			pair<iterator, bool>p = insert(make_pair(k, V()));
			return p.first->second;
		}
	private:
		HashBucket<K, pair<K, V>, MapKeyOfT, HashFunc>_hb;
	};
}

#pragma once
namespace openhash
{
#include"hashtable11-3.h"
	template<class K, class HashFunc = HashFunc<K> >
	class unordered_set
	{
		struct SetKeyOfT { const K& operator()(const K& k) { return k; } };
	public:
		typedef typename HashBucket<K,K,SetKeyOfT, HashFunc>::Iterator iterator;
		iterator begin() { return _hb.begin(); }
		iterator end() { return _hb.end(); }

		iterator find(const K& key) {
			return _hb.find(key);
		}
		size_t erase(const K& key) {
			return _hb.erase(key);
		}
		pair<iterator, bool>insert(const K& k) {
			return _hb.insert(k);
		}
	private:
		HashBucket<K, K, SetKeyOfT, HashFunc>_hb;
	};
}

且随疾风前行->

关注

3
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
学习-＞C++篇十六：哈希表与unordered_map、unordered_set

目录1.哈希表概念 2.常用哈希函数：解决哈希冲突的两个方法：一.闭散列法（开放定址法）：二.开散列法（哈希桶）：3.封装unordered_map,unordered_set
复制链接

扫一扫