＜unordered_set、unordered_map的模拟实现＞——《C++高阶》

最新推荐文章于 2024-09-22 18:31:44 发布

新晓·故知

最新推荐文章于 2024-09-22 18:31:44 发布

阅读量716

点赞数 3

分类专栏：《C++面向对象程序设计》文章标签： c++ 开发语言哈希算法算法后端

本文链接：https://blog.csdn.net/m0_57859086/article/details/126861898

版权

《C++面向对象程序设计》专栏收录该内容

41 篇文章 7 订阅

订阅专栏

本文深入探讨了C++中unordered_set和unordered_map的内部结构，包括哈希表的改造、迭代器操作的增加以及通过key获取value的方法。通过详细的代码示例，模拟实现了这两个容器，详细解析了它们的插入、查找和删除操作。此外，还介绍了开散列哈希表的实现，包括哈希函数、迭代器和桶的管理。

摘要由CSDN通过智能技术生成

1.unordered_set、unordered_map的结构分析:

1.1 哈希表的改造

1.2 unordered_map模型分析：

2.unordered_set、unordered_map模拟实现：

2.1 unordered_set模拟实现:

2.2 unordered_map模拟实现:

2.3 附用的模拟实现的开散列哈希表

后记：●由于作者水平有限，文章难免存在谬误之处，敬请读者斧正，俚语成篇，恳望指教！

——By 作者：新晓·故知

1.unordered_set、unordered_map的结构分析:

1.1 哈希表的改造

1. 模板参数列表的改造

// K:关键码类型
// V: 不同容器V的类型不同，如果是unordered_map，V代表一个键值对，如果是
unordered_set,V 为 K
// KeyOfValue: 因为V的类型不同，通过value取key的方式就不同，详细见
unordered_map/set的实现
// HF: 哈希函数仿函数对象类型，哈希函数使用除留余数法，需要将Key转换为整形数字才能
取模
template<class K, class V, class KeyOfValue, class HF = DefHashF<T> >
class HashBucket;

2. 增加迭代器操作

// 为了实现简单，在哈希桶的迭代器类中需要用到hashBucket本身，
template<class K, class V, class KeyOfValue, class HF>
class HashBucket;
// 注意：因为哈希桶在底层是单链表结构，所以哈希桶的迭代器不需要--操作
template <class K, class V, class KeyOfValue, class HF>
struct HBIterator
{
 typedef HashBucket<K, V, KeyOfValue, HF> HashBucket;
 typedef HashBucketNode<V>* PNode;
 typedef HBIterator<K, V, KeyOfValue, HF> Self;
 HBIterator(PNode pNode = nullptr, HashBucket* pHt = nullptr);
 Self& operator++()
 {
         // 当前迭代器所指节点后还有节点时直接取其下一个节点
 if (_pNode->_pNext)
 _pNode = _pNode->_pNext;
 else
 {
 // 找下一个不空的桶，返回该桶中第一个节点
 size_t bucketNo = _pHt->HashFunc(KeyOfValue()(_pNode- >_data))+1;
 for (; bucketNo < _pHt->BucketCount(); ++bucketNo)
 {
 if (_pNode = _pHt->_ht[bucketNo])
 break;
 }
 }return *this;
 }
 Self operator++(int);
    V& operator*();
 V* operator->();
 bool operator==(const Self& it) const;
 bool operator!=(const Self& it) const;
 PNode _pNode;             // 当前迭代器关联的节点
 HashBucket* _pHt;         // 哈希桶--主要是为了找下一个空桶时候方便
};

3. 增加通过key获取value操作

template<class K, class V, class KeyOfValue, class HF = DefHashF<T> >
class HashBucket
{
 friend HBIterator<K, V, KeyOfValue, HF>;
    // ......
public:
 typedef HBIterator<K, V, KeyOfValue, HF> Iterator;
 //
    // ...
 // 迭代器
 Iterator Begin()
 {
 size_t bucketNo = 0;
 for (; bucketNo < _ht.capacity(); ++bucketNo)
 {
 if (_ht[bucketNo])
 break;
 }
 if (bucketNo < _ht.capacity())
 return Iterator(_ht[bucketNo], this);
 else
 return Iterator(nullptr, this);
 }
 Iterator End(){ return Iterator(nullptr, this);}
    Iterator Find(const K& key);
 Iterator Insert(const V& data);
 Iterator Erase(const K& key);
    
    // 为key的元素在桶中的个数
 size_t Count(const K& key)
 {
 if(Find(key) != End())
            return 1;
        
        return 0;
 }
    
    size_t BucketCount()const{ return _ht.capacity();}
    size_t BucketSize(size_t bucketNo)
   {
        size_t count = 0;
   PNode pCur = _ht[bucketNo];
        while(pCur)
       {
            count++;
            pCur = pCur->_pNext;
       }
        
        return count;
   }
    
    // ......
};

1.2 unordered_map模型分析：

// unordered_map中存储的是pair<K, V>的键值对，K为key的类型，V为value的类型，HF哈希
函数类型
// unordered_map在实现时，只需将hashbucket中的接口重新封装即可
template<class K, class V, class HF = DefHashF<K>>
class unordered_map
{
 typedef pair<K, V> ValueType;
typedef HashBucket<K, ValueType, KeyOfValue, HF> HT;
// 通过key获取value的操作
 struct KeyOfValue
 {
 const K& operator()(const ValueType& data)
 { return data.first;}
 };
public:
 typename typedef HT::Iterator iterator;
public:
 unordered_map(): _ht()
 {}
 
 iterator begin(){ return _ht.Begin();}
 iterator end(){ return _ht.End();}
 
 // capacity
 size_t size()const{ return _ht.Size();}
 bool empty()const{return _ht.Empty();}
 ///
 // Acess
 V& operator[](const K& key)
 {
 return (*(_ht.InsertUnique(ValueType(key, V())).first)).second;
 }
 const V& operator[](const K& key)const;
 //
 // lookup
 iterator find(const K& key){ return _ht.Find(key);}
 size_t count(const K& key){ return _ht.Count(key);}
 /
 // modify
 pair<iterator, bool> insert(const ValueType& valye)
 { return _ht.Insert(valye);}
 iterator erase(iterator position)
 { return _ht.Erase(position);}

 // bucket
 size_t bucket_count(){ return _ht.BucketCount();}
 size_t bucket_size(const K& key){ return _ht.BucketSize(key);}
private:
 HT _ht;
};

2.unordered_set、unordered_map模拟实现：

2.1 unordered_set模拟实现:

#include"OpenHT.h"

namespace my
{
	template<class K, class HashFunc = DefaultHash<K>>
	class unordered_set
	{
		//内部类
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		typedef typename Bucket::HashTable<K, K, SetKeyOfT, HashFunc>::iterator iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}
		//bool insert(const K& key)
		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		Bucket::HashTable<K, K, SetKeyOfT,HashFunc> _ht;
	};
	
}

2.2 unordered_map模拟实现:

#include"OpenHT.h"

namespace my
{
	template<class K, class V, class HashFunc = DefaultHash<K>>
	class unordered_map
	{
		//内部类
		struct MapKeyOfT
		{
			const K& operator()(const pair<K,V>& kv)
			{
				return kv.first;
			}
		};
	public:
		//定义map的迭代器
		typedef typename Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc> ::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}
		//bool insert(const pair<K, V>& kv)
		pair<iterator,bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);

		}
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}
		V& operator[](const K& key)
		{
			//pair<iterator, bool> ret = insert(make_pair(key, V()));
			auto ret = insert(make_pair(key, V()));
			return ret.first->second;  //first是迭代器，而map里面访问second要用->,其实这里有两个->,但为了可读性
		}
	private:
		Bucket::HashTable<K, pair<K, V>, MapKeyOfT,HashFunc> _ht;
	};
}

2.3 附用的模拟实现的开散列哈希表

OpenHT.h:

#pragma once
#include<iostream>
#include<string>
#include<vector>
#include<map>
#include<set>
#include<unordered_map>
#include<unordered_set>
using namespace std;

//仿函数
template<class K>
struct DefaultHash        //1.普通类直接强转
{
	size_t operator()(const K& key)
	{
		return(size_t)key; //支持取模，强转为整数
	}
};
template<>
struct DefaultHash<string>     //String类特化
{
	size_t operator()(const string& key)
	{
		//4.BKDR法
		size_t hash = 0;
		for (auto ch : key)
		{
			hash = hash * 131 + ch;
		}
		return hash;
	}
};
namespace Bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;
		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable;

	template<class K, class T, class KeyOfT, class HashFunc>
	class __HTIterator   //单向迭代器
	{
		typedef HashNode<T> Node;
		typedef __HTIterator<K, T, KeyOfT, HashFunc> Self;
	public:
		Node* _node;
		HashTable<K, T, KeyOfT, HashFunc>* _pht;
		__HTIterator(){}   //默认哈希迭代器构造函数
		__HTIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
			:_node(node)
			,_pht(pht)
		{}

		Self& operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else
			{
				KeyOfT kot;
				HashFunc hf;
				size_t hashi = hf(kot(_node->_data))%_pht->_tables.size();
				++hashi;
				//找下一个不为空的桶
				for (; hashi < _pht->_tables.size(); ++hashi)
				{
					if (_pht->_tables[hashi])
					{
						_node = _pht->_tables[hashi];
						break;
					}
				}

				// 没有找到不为空的桶，用nullptr去做end标识
				if (hashi == _pht->_tables.size())
				{
					_node = nullptr;
				}
			}

			return *this;
		}

		T& operator*()
		{
			return _node->_data;
		}

		T* operator->()
		{
			return &_node->_data;
		}

		bool operator!=(const Self& s) const
		{
			return _node != s._node;
		}

		bool operator==(const Self& s) const
		{
			return _node == s._node;
		}
	};

	//unordered_set--->HashTable<K, K, SetKeyOfT> _ht;
	//unordered_map--->HashTable<K, pair<K, V>, MapKeyOfT> _ht;
	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable
	{
		template<class K, class T, class KeyOfT, class HashFunc>
		friend class __HTIterator;
		typedef HashNode<T> Node;
	public:
		typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;
		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					return iterator(cur, this);
				}
			}

			return end();
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}
		//手动实现Node的析构函数
		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}
		
		///
		HashTable() = default;  //强制生成默认构造函数

		链表桶的深拷贝
		//HashTable(const HashTable<K, V> ht)
		//{
		//	int sz = ht._tables.size();
		//	for (int i = 0; i < sz; i++)
		//	{
		//		Node* cur = ht._tables[i];
		//		while (cur)
		//		{
		//			this->Insert(cur->_kv);
		//			cur = cur->_next;
		//		}
		//	}
		//	_n = ht._n;

		//}
		链表桶的赋值重载
		//HashTable<K, V>& operator=(HashTable<K, V> ht)
		//{
		//	//现代写法
		//	ht._tablle.swap(_tables);
		//	_n = ht._n;
		//}
		///
		size_t GetNextPrime(size_t prime)
		{
			const int PRIMECOUNT = 28;
			static const size_t primeList[PRIMECOUNT] =
			{
				53ul, 97ul, 193ul, 389ul, 769ul,
				1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
				49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
				1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
				50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
				1610612741ul, 3221225473ul, 4294967291ul
			};

			// 获取比prime大那一个素数
			size_t i = 0;
			for (; i < PRIMECOUNT; ++i)
			{
				if (primeList[i] > prime)
					return primeList[i];
			}

			return primeList[i];
		}
		//bool Insert(const T& data)
		pair<iterator,bool> Insert(const T& data)
		{
			HashFunc hf;
			KeyOfT kot;
			//if (Find(kot(data)))  //使用仿函数，不知道data是set还是map
			//if (Find(kot(data))!=end())  //使用仿函数，不知道data是set还是map

			iterator pos = Find(kot(data));
			if(pos!=end())
			{
				//return false;
				return make_pair(pos,false);
			}

			//负载因子（这里设置为1）  扩容
			if (_tables.size() == _n)
			{
				//写法2：扩容
				//size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				//写法3：
				size_t newSize = GetNextPrime(_tables.size());
				if (newSize != _tables.size())
				{
					vector<Node*> newTable;
					newTable.resize(newSize, nullptr);

					for (size_t i = 0; i < _tables.size(); ++i)
					{
						Node* cur = _tables[i];
						while (cur)
						{
							size_t hashi = hf(kot(cur->_data)) % newSize;  //hf转换为整型,kot()取k
							cur->_next = newTable[hashi];
							newTable[hashi] = cur;
							cur = cur->_next;
						}
						_tables[i] = nullptr;
					}
					newTable.swap(_tables);

				}
			}
			size_t hashi = hf(kot(data)); //两层仿函数调用
			hashi %= _tables.size();
			//头插到对应的桶即可
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;
			//return true;
			return make_pair(iterator(newnode,this),false);
		}
		//Node* Find(const K& key)
		iterator Find(const K& key)
		{
			if (_tables.size() == 0)
			{
				//return nullptr;
				return iterator(nullptr,this);

			}
			
			KeyOfT kot;
			//写法1：有名对象
			HashFunc hf;
			size_t hashi = hf(key);

			//写法2：匿名对象
			//size_t hashi = HashFunc()(key);

			hashi %= _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					//return cur;
					return iterator(cur, this);
				}
				cur = cur->_next;
			}
			//return nullptr;  //效率高，是因为直接返回了指针
			return iterator(nullptr, this);
		}
		bool Erase(const K& key)
		{
			if (_tables.size() == 0)
			{
				return false;
			}
			KeyOfT kot;

			//写法1：有名对象
			HashFunc hf;
			size_t hashi = hf(key);
			hashi %= _tables.size();
			Node* cur = _tables[hashi];
			Node* prev = nullptr;
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					delete cur;
					return true;
					
				}
				cur = cur->_next;
			}
			return false;
		}
	private:
		//指针数组
		//没有使用list，是因为list是双向链表，成本大 vector<list<>> _tables; 但不用手动写析构函数
		vector<Node*> _tables;    
		//开散列采用挂起，如果新表扩容，那么当旧表释放，vector会将自己的释放，
		//但是挂在vector的结点Node* 不会自动释放，因为Node* 是内置类型，需要手动释放。
		size_t _n = 0;
	};
}

Test.cpp:

#include"OpenHT.h"
#include"UnorderedSet.h"
#include"UnorderedMap.h"

//
//1.库中的UnorderedSet、UnorderedMap测试
void test_std_UnorderedSet()
{
	std::unordered_set<int> s;
	s.insert(9);
	s.insert(7);
	s.insert(2);
	s.insert(1);
	s.insert(6);
	s.insert(5);
	//1.迭代器遍历
	//unordered_set<int>::iterator it = s.begin();
	//auto it = s.begin();
	//写法2：
	std::unordered_set<int>::iterator it;
	it = s.begin();
	while (it != s.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;
	//2.范围for遍历
	for (auto e : s)
	{
		cout << e << " ";
	}
	cout << endl;
}
void test_std_UnorderedMap()
{
	std::unordered_map<string, string> dict;
	dict.insert(make_pair("sort", "排序"));
	dict.insert(make_pair("left", "左边"));
	dict.insert(make_pair("left", "剩余"));
	dict["string"];
	dict["left"] = "剩余";
	dict["string"] = "字符串";
	//迭代器遍历
	//std::unordered_map<string, string>::iterator it = dict.begin();
	auto it = dict.begin();
	while (it != dict.end())
	{
		cout << it->first << " " << it->second << endl;
		++it;
	}
	cout << endl;
	//范围for遍历
	for (auto& kv : dict)
	{
		cout << kv.first << " " << kv.second << endl;
	}
}
//
//2.模拟实现UnorderedSet、UnorderedMap测试
void test_my_UnorderedSet()
{
	my::unordered_set<int> s;
	s.insert(9);
	s.insert(7);
	s.insert(2);
	s.insert(1);
	s.insert(6);
	s.insert(5);
	//1.迭代器遍历
	//my::unordered_set<int>::iterator it = s.begin();
	//auto it = s.begin();
	//写法2：
	my::unordered_set<int>::iterator it;
	it = s.begin();
	while (it != s.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;
	//2.范围for遍历
	for (auto e : s)
	{
		cout << e << " ";
	}
	cout << endl;
}
void test_my_UnorderedMap()
{
	my::unordered_map<string, string> dict;
	dict.insert(make_pair("sort", "排序"));
	dict.insert(make_pair("left", "左边"));
	dict.insert(make_pair("left", "剩余"));
	dict["string"];
	dict["left"] = "剩余";
	dict["string"] = "字符串";
	//迭代器遍历
	//my::unordered_map<string, string>::iterator it = dict.begin();
	auto it = dict.begin();
	while (it != dict.end())
	{
		cout << it->first << " " << it->second << endl;
		++it;
	}
	cout << endl;
	//范围for遍历
	for (auto& kv : dict)
	{
		cout << kv.first << " " << kv.second << endl;
	}
}
int main()
{
	//test_std_UnorderedSet();
	//test_std_UnorderedMap();

	//test_my_UnorderedSet();
	test_my_UnorderedMap();

	return 0;
}

测试示例：