【C++高阶】哈希的应用（封装unordered_map和unordered_set）

island1314

于 2024-07-30 16:58:32 发布

阅读量390

点赞数 32

分类专栏： C++学习—进步之路文章标签： c++ 学习哈希算法散列表算法

本文链接：https://blog.csdn.net/island1314/article/details/140798817

版权

C++学习—进步之路专栏收录该内容

26 篇文章 2 订阅

订阅专栏

✨ 世事漫随流水，算来一生浮梦 🌏

📃个人主页：island1314

🔥个人专栏：C++学习

🚀 欢迎关注：👍点赞 👂🏽留言 😍收藏 💞 💞 💞

🚀 前言

哈希类的实现参考上一篇文章：【C++高阶】哈希函数底层原理全面探索和深度解析-CSDN博客

之前我们已经学习了如何手搓哈希，现在让我们来对哈希进行改造，并且封装成unordered_map和unordered_set。

注意：本篇我们采用开散列的方式来模拟实现unordered

1. 哈希的改造

📒改造HashTable以适配unordered_map和unordered_set容器，主要涉及到如何根据这两种容器的特性来设计和实现HashTable节点的存储以及相应的操作。unordered_map和unordered_set的主要区别在于它们存储的元素类型：map存储键值对（key-value pairs），而set仅存储唯一的键值（通常是键本身作为值）。尽管如此，它们在底层数据结构（如HashTable）的实现上有很多相似之处

改造内容如下：

K：key的类型
T：如果是unordered_map，则为pair<K, V>; 如果是unordered_set，则为K
KeyOfT：通过T来获取key的一个仿函数类
Hash: 哈希函数仿函数对象类型，哈希函数使用除留余数法，需要将Key转换为整形数字才能取模

// unordered_set 与 unordered_map
// unordered_set -> HashTable<K, K>
// unordered_map -> HashTable<K, pair<K, V>>

🌈1.1. 哈希节点的改造

template<class T> //用一个值来确定底层存的是什么
struct HashNode
{
	T _data;
	HashNode<T>* _next;

	HashNode(const T&data)
		: _data(data)
		, _next(nullptr)
	{}
};

注意：在上一篇文章中，我们有介绍了一个关于非整形求关键值的仿函数HashFunc，在模拟实现是可以直接加在模拟实现的类上。

template <class K>
struct HashFunc{
	size_t operator()(const K& key){
		return (size_t)key;  //转成数字，把key
	}
};

// 特化
template<>
struct HashFunc<string>{
	size_t operator()(const string& key){
		size_t hash = 0;
		for (auto e : key){
			hash *= 31;
			hash += e;
		}

		return hash;
	}
};

🌈1.2 哈希的主体框架

template<class K, class T, class KeyOfT, class Hash>
class HashTable
{
	//友元声明
	template<class K, class T, class Ptr, class Ref, class KeyOfT, class Hash> //普通类的友元直接友元声明即可，而类模板需要把模板参数带上
	friend struct HTIterator;

	typedef HashNode<T> Node; //节点

public:
	typedef HTIterator<K, T, T*, T&, KeyOfT, Hash> Iterator;  //迭代器
	typedef HTIterator<K, T, const T*,const T&, KeyOfT, Hash> ConstIterator; //const迭代器

   // ... 其他功能的实现

private:
	vector<Node*> _tables; //指针数组，数组的每个位置存的是指针
	size_t _n; //表中存储数据个数
};

2. 哈希的迭代器

🎈2.1 迭代器基本设计

// 为了实现简单，在哈希桶的迭代器类中需要用到hashBucket本身，所以我们要进行一下前置声明，并且我们在 HashTable 中也要设置一个友元(friend)

//前置声明
template<class K, class T, class KeyOfT, class Hash>
class HashTable;

// 通过模板来达到const的迭代器的复用
template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
struct __HTIterator
{
	typedef HashNode<T> Node;
	typedef __HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
	Node* _node;

	Ref operator*()
	{
		return _node->_data;
	}

	Ptr operator->()
	{
		return &(_node->_data);
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}
    Self& operator++()
    {}
};

🎈2.2 begin()与end()

关于构建迭代器的begin()与end()当我们模拟实现const版本时，又会遇到新的问题，const版本在调用构造时，调不动，因为我最开始实现的构造函数不是const版本，当const版本想要调用构造函数时，这时造成了权限的扩大，因此为了解决这个问题，我们重载了构造函数

示例代码如下：

typedef HTIterator<K, T, T*, T&, KeyOfT, Hash> Iterator;  //迭代器
typedef HTIterator<K, T, const T*,const T&, KeyOfT, Hash> ConstIterator; //const迭代器

Iterator Begin()
{
	if (_n == 0) return End(); //如果没有数据
	for (size_t i = 0; i < _tables.size(); i++)
	{
		Node* cur = _tables[i];
		if (cur)
		{
			return Iterator(cur, this);
		}
	}
	return End();
}
Iterator End()
{
	return Iterator(nullptr, this);
}


ConstIterator Begin() const
{
	if (_n == 0) return End(); //如果没有数据
	for (size_t i = 0; i < _tables.size(); i++)
	{
		Node* cur = _tables[i];
		if (cur)
		{
			return ConstIterator(cur, this);
		}
	}
	return End();
}
ConstIterator End()const
{
	return ConstIterator(nullptr, this); 
}

🎈2.3 operator++()

📚因为哈希桶在底层是单链表结构，所以哈希桶的迭代器不需要operator--()操作，在operator++()的设计上，我们的问题是在走完这个桶之后，如何找到下一个桶，因此我们需要记录来方便寻找，于是我们引入了两个变量

// HashTable
const HashTable<K, T, KeyOfT, Hash>* _pht;
// 当前桶的位置
size_t _hashi;

对引入两变量的构造：

//需要用const版本，防止权限放大
HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>* pht) 
	:_node(node)
	,_pht(pht)
{}

operator++()代码示例如下：

Self& operator++()
{
	if (_node->_next) //当前桶还有节点
	{ 
		_node = _node->_next;
	}
	else //当前桶遍历完毕，找下一个不为空的桶
	{
		KeyOfT kot;
		Hash hs;
		size_t hashi = hs(kot(_node->_data)) % _pht->_tables.size();
		++hashi;
		while (hashi < _pht->_tables.size()) //由于pht要访问其成员私有，但是无法访问，我们解决方法就是提法gettable或者提供友元
		{
			if (_pht->_tables[hashi]) 
				break; //找到不为空的桶 
			++hashi;
		}
		if (hashi == _pht->_tables.size()) //说明已经走完
		{
			_node = nullptr;
		}
		else _node = _pht->_tables[hashi];
	}
	return *this;
}

3. 红黑树相关接口的改造

✨3.1 Find 函数的改造

查找成功，返回查找到的那个节点的迭代器，查找失败，就返回 nullptr。

Iterator Find(const K& key)
{
	KeyOfT kot;

	Hash hs;
	size_t hashi = hs(key) % _tables.size();
	Node* cur = _tables[hashi];
	while (cur){
		if (kot(cur->_data) == key) return Iterator(cur, this);
		cur = cur->_next;
	}
	return End();
}

✨3.2 Insert 函数的改造

map 里的 operator[] 需要依赖 Insert 的返回值

pair<Iterator, bool> Insert(const T& data) //使用的是头插
{
	KeyOfT kot;
	Iterator it = Find(kot(data));
	//去重
	if (it != End()) //return false;
		return make_pair(it, false);
		
	Hash hs;
	size_t hashi = hs(kot(data)) % _tables.size();
	//负载因子 == 1扩容，
	// 越低，空间效率越低，时间效率越高
	///越高，空间效率越高，时间效率越低
	if (_n == _tables.size()){

		vector<Node*> newtables(_tables.size() * 2, nullptr);
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			while (cur) { 
				Node* next = cur->_next;
				//旧表中节点，挪动新表重新映射的位置
				size_t hashi = hs(kot(cur->_data)) % newtables.size();
				//头插到新表
				cur->_next = newtables[hashi];
				newtables[hashi] = cur;
				
				cur = next;
			}
			_tables[i] = nullptr;
		}
		_tables.swap(newtables);
	}

	//头插
	Node* newnode = new Node(data);
	newnode->_next = _tables[hashi];
	_tables[hashi] = newnode;
	++_n;

	return make_pair(Iterator(newnode, this),true);
}

4. Unordered_Set的模拟实现

🧩4.1 Unordered_Set的设计

template<class K, class Hash = HashFunc<K>>
class unordered_set
{
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	
	// 因为 unordered_set的特性K是不能够修改的，
	// 所以我们在 const迭代器和非const迭代器上，都用 const来修饰K来起到不能修改K的特点
	typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Iterator iterator;
	typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;


	iterator begin()
	{
		return _ht.Begin();
	}

	iterator end()
	{
		return _ht.End();
	}

	const_iterator begin() const
	{
		return _ht.Begin();
	}

	const_iterator end() const
	{
		return _ht.End();
	}

	pair<iterator, bool> insert(const K& key)
	{
		return _ht.Insert(key);
	}

	iterator Find(const K& key)
	{
		return _ht.Find(key);
	}

	bool Erase(const K& key)
	{
		return _ht.Erase(key);
	}

private:
	hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;
};

🧩4.2Unordered_Set的测试

void test_set()
{
	unordered_set<int> s;
	int a[] = { 4, 2, 6, 1, 3, 5, 15, 7, 16, 14 };
	for (auto e : a){
		s.insert(e);
	}

	for (auto e : s){
		cout << e << " ";
	}
	cout << endl;

	unordered_set<int>::iterator it = s.begin();
	while (it != s.end()){
		//*it += 1; //const迭代器不支持修改
		cout << *it << " ";
		++it;
	}
	cout << endl;

	Print(s);
}

5. Unordered_Map的模拟实现

🌸5.1 Unordered_Map的设计

template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:

	// 在 unordered_map我们就只需要考虑 kv.first不能修改
	// 但是 kv.first->second是可以修改的，因此我们需要将 K用 const修饰
	typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;
	typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::ConstIterator const_iterator;

	iterator begin()
	{
		return _ht.Begin();
	}

	iterator end()
	{
		return _ht.End();
	}

	const_iterator begin() const
	{
		return _ht.Begin();
	}

	const_iterator end() const
	{
		return _ht.End();
	}

	/*bool insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}*/

	pair<iterator, bool> insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}

	V& operator[](const K& key)
	{
		pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));

		return ret.first->second;
	}

	iterator Find(const K& key)
	{
		return _ht.Find(key);
	}

	bool Erase(const K& key)
	{
		return _ht.Erase(key);
	}

private:
	hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;
};

🌸5.2 Unordered_Map的测试

void test_map()
{
	unordered_map<string, string> dict;
	dict.insert({ "sort", "排序" });
	dict.insert({ "left", "左边" });
	dict.insert(make_pair("right", "右边"));

	dict["left"] = "左边，剩余";
	dict["insert"] = "插入";
	dict["string"];

	unordered_map<string, string>::iterator it = dict.begin();
	while (it != dict.end()){
		// 不能修改first，可以修改second
		//it->first += 'x'; //Key不能修改，因此pair<K, V> 要写成pair<const K, V>
		it->second += 'x';
		cout << it->first << ":" << it->second << endl;
		++it;
	}
	cout << endl;
}

📖哈希改造的完整代码及总结

#pragma once
#include <iostream>
#include <vector>
using namespace std;

template <class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;  //转成数字，把key
	}
};


// 特化
template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t hash = 0;
		for (auto e : key)
		{
			hash *= 31;
			hash += e;
		}

		return hash;
	}
};


namespace hash_bucket  //哈希桶-链式
{
	template<class T> //用一个值来确定底层存的是什么
	struct HashNode{
		/*pair<K, V> _kv;
		HashNode<K, V>* _next;*/

		T _data;
		HashNode<T>* _next;

		HashNode(const T&data)
			: _data(data)
			, _next(nullptr)
		{}
	};

	//前置声明
	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

	//template<class K, class T, class KeyOfT, class Hash>
	template<class K, class T, class Ptr, class Ref, class KeyOfT, class Hash> 
	struct HTIterator
	{
		typedef HashNode<T> Node;
		typedef HTIterator<K, T, Ptr, Ref, KeyOfT, Hash> Self;

		Node* _node;
		const HashTable<K, T, KeyOfT, Hash>* _pht; //由于向上找不到，老是会报_pht找不到，因此我们需要加个前置声明

		HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>* pht) //需要用const版本，防止权限放大
			:_node(node)
			,_pht(pht)
		{}

		Ref operator*(){return _node->_data;}

		Ptr operator->(){return &_node->_data;}

		bool operator!=(const Self& s){return _node != s._node;}


		Self& operator++()
		{
			if (_node->_next) //当前桶还有节点{ 
				_node = _node->_next;
			}
			else //当前桶遍历完毕，找下一个不为空的桶
			{
				KeyOfT kot;
				Hash hs;
				size_t hashi = hs(kot(_node->_data)) % _pht->_tables.size();
				
				++hashi;
				while (hashi < _pht->_tables.size()) //由于pht要访问其成员私有，但是无法访问，我们解决方法就是提法gettable或者提供友元
				{
					if (_pht->_tables[hashi]) 
						break; //找到不为空的桶 
					++hashi;
				}
				if (hashi == _pht->_tables.size()) //说明已经走完
				{
					_node = nullptr;
				}
				else _node = _pht->_tables[hashi];
			}
			return *this;
		}


	};

	template<class K, class T, class KeyOfT, class Hash>
	class HashTable
	{
		//友元声明
		template<class K, class T, class Ptr, class Ref, class KeyOfT, class Hash> //普通类的友元直接友元声明即可，而类模板需要把模板参数带上
		friend struct HTIterator;

		typedef HashNode<T> Node;
	
	public:
		typedef HTIterator<K, T, T*, T&, KeyOfT, Hash> Iterator;  //迭代器
		typedef HTIterator<K, T, const T*,const T&, KeyOfT, Hash> ConstIterator; //const迭代器


		Iterator Begin()
		{
			if (_n == 0) return End(); //如果没有数据
			for (size_t i = 0; i < _tables.size(); i++){
				Node* cur = _tables[i];
				if (cur){
					return Iterator(cur, this);
				}
			}
			return End();
		}

		Iterator End(){return Iterator(nullptr, this);}


		ConstIterator Begin() const
		{
			if (_n == 0) return End(); //如果没有数据
			for (size_t i = 0; i < _tables.size(); i++){
				Node* cur = _tables[i];
				if (cur){
					return ConstIterator(cur, this);
				}
			}
			return End();
		}

		ConstIterator End()const {return ConstIterator(nullptr, this); }

	public:
		HashTable()
		{
			_tables.resize(10, nullptr);
		}

		~HashTable()
		{ // 依次把每个桶释放
			for (size_t i = 0; i < _tables.size(); i++){
				Node* cur = _tables[i];
				while (cur){
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}


		pair<Iterator, bool> Insert(const T& data) //使用的是头插
		{
			KeyOfT kot;
			Iterator it = Find(kot(data));
			//去重
			if (it != End()) //return false;
				return make_pair(it, false);
				
			Hash hs;
			size_t hashi = hs(kot(data)) % _tables.size();
			//负载因子 == 1扩容，
			// 越低，空间效率越低，时间效率越高
			///越高，空间效率越高，时间效率越低
			if (_n == _tables.size()){

				vector<Node*> newtables(_tables.size() * 2, nullptr);
				for (size_t i = 0; i < _tables.size(); i++){
					Node* cur = _tables[i];
					while (cur) { 
						Node* next = cur->_next;
						//旧表中节点，挪动新表重新映射的位置
						size_t hashi = hs(kot(cur->_data)) % newtables.size();
						//头插到新表
						cur->_next = newtables[hashi];
						newtables[hashi] = cur;
						
						cur = next;
					}
					_tables[i] = nullptr;
				}
				_tables.swap(newtables);

			}

			//头插
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;

			return make_pair(Iterator(newnode, this),true);
		}

		Iterator Find(const K& key){
			KeyOfT kot;

			Hash hs;
			size_t hashi = hs(key) % _tables.size();
			Node* cur = _tables[hashi];
			while (cur){
				if (kot(cur->_data) == key) return Iterator(cur, this);
				cur = cur->_next;
			}
			return End();
		}

		bool Erase(const K& key)
		{
			KeyOfT kot;
			Hash hs;
			size_t hashi = hs(key) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr) //如果是第一个节点 
					{
						_tables[hashi] = cur->_next;
					}
					else //否则，则让前一个指向我的后一个
					{
						prev->_next = cur->_next;
					}

					delete cur;
					--_n;
					return true;
				}

				prev = cur;  
				cur = cur->_next;
			}

			return false;
		}

	private:
		vector<Node*> _tables; //指针数组，数组的每个位置存的是指针
		size_t _n; //表中存储数据个数
	};

}

以上就是哈希改造的全部内容，让我们来总结以下哈希的实现及改造封装吧

——————————步骤——————————

1、实现哈希表
2、封装unordered_map和unordered_set 解决KetOfT
3、实现 iterator 迭代器
4、实现 const_iterator 迭代器

5、修改Key的问题
6、解决operate[]

本篇到此就结束，希望我的这篇博客可以给你提供有益的参考和启示，感谢大家支持！！！

祝大家天天顺心如意。

island1314

关注

32
点赞
踩
25

收藏

觉得还不错? 一键收藏
9
评论
【C++高阶】哈希的应用（封装unordered_map和unordered_set）

【C++高阶】哈希函数底层原理全面探索和深度解析-CSDN博客之前我们已经学习了如何手搓哈希，现在让我们来对哈希进行改造，并且封装成unordered_map和unordered_set。注意：本篇我们采用开散列的方式来模拟实现unordered//转成数字，把key// 特化template<>hash *= 31;hash += e;namespace hash_bucket //哈希桶-链式template<class T> //用一个值来确定底层存的是什么*/
复制链接

扫一扫

专栏目录