从零带你底层实现unordered_map的代码补全（2）

最新推荐文章于 2024-07-19 21:56:09 发布

陈大大陈

最新推荐文章于 2024-07-19 21:56:09 发布

阅读量768

点赞数 16

文章标签：哈希算法算法数据结构

本文链接：https://blog.csdn.net/weixin_73534885/article/details/134696245

版权

先把代码发出来，大家先看，有什么不懂得都可以私信问我。

这块的迭代器是比较复杂，需要仔细看看。

💯 博客内容：代码补全

😀 作者：陈大大陈

🚀 个人简介：一个正在努力学技术的准C++后端工程师，专注基础和实战分享，欢迎私信！

💖 欢迎大家：这里是CSDN，我总结知识和写笔记的地方，喜欢的话请三连，有问题请私信 😘 😘 😘

HashTable.h

#pragma once
#include<vector>
#include<iostream>
using namespace std;
//HashFunc<int>
template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

// 11:46继续
//HashFunc<string>
template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		// BKDR
		size_t hash = 0;
		for (auto e : key)
		{
			hash *= 31;
			hash += e;
		}

		cout << key << ":" << hash << endl;
		return hash;
	}
};

namespace open_address
{
	enum Status
	{
		EMPTY,
		EXIST,
		DELETE
	};

	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		Status _s;          //状态
	};

	//struct HashFuncString
	//{
	//	size_t operator()(const string& key)
	//	{
	//		// BKDR
	//		size_t hash = 0;
	//		for (auto e : key)
	//		{
	//			hash *= 31;
	//			hash += e;
	//		}

	//		cout << key << ":" << hash << endl;
	//		return hash;
	//	}
	//};

	template<class K, class V, class Hash = HashFunc<K>>
	class HashTable
	{
	public:
		HashTable()
		{
			_tables.resize(10);
		}

		bool Insert(const pair<K, V>& kv)
		{
			if (Find(kv.first))
				return false;

			// 负载因子0.7就扩容
			if (_n * 10 / _tables.size() == 7)
			{
				size_t newSize = _tables.size() * 2;
				HashTable<K, V, Hash> newHT;
				newHT._tables.resize(newSize);
				// 遍历旧表
				for (size_t i = 0; i < _tables.size(); i++)
				{
					if (_tables[i]._s == EXIST)
					{
						newHT.Insert(_tables[i]._kv);
					}
				}

				_tables.swap(newHT._tables);
			}

			Hash hf;
			// 线性探测
			size_t hashi = hf(kv.first) % _tables.size();
			while (_tables[hashi]._s == EXIST)
			{
				hashi++;

				hashi %= _tables.size();
			}

			_tables[hashi]._kv = kv;
			_tables[hashi]._s = EXIST;
			++_n;

			return true;
		}

		HashData<K, V>* Find(const K& key)
		{
			Hash hf;

			size_t hashi = hf(key) % _tables.size();
			while (_tables[hashi]._s != EMPTY)
			{
				if (_tables[hashi]._s == EXIST
					&& _tables[hashi]._kv.first == key)
				{
					return &_tables[hashi];
				}

				hashi++;
				hashi %= _tables.size();
			}

			return NULL;
		}

		// 伪删除法
		bool Erase(const K& key)
		{
			HashData<K, V>* ret = Find(key);
			if (ret)
			{
				ret->_s = DELETE;
				--_n;
				return true;
			}
			else
			{
				return false;
			}
		}

		void Print()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				if (_tables[i]._s == EXIST)
				{
					//printf("[%d]->%d\n", i, _tables[i]._kv.first);
					cout << "[" << i << "]->" << _tables[i]._kv.first << ":" << _tables[i]._kv.second << endl;
				}
				else if (_tables[i]._s == EMPTY)
				{
					printf("[%d]->\n", i);
				}
				else
				{
					printf("[%d]->D\n", i);
				}
			}

			cout << endl;
		}

	private:
		vector<HashData<K, V>> _tables;
		size_t _n = 0; // 存储的关键字的个数
	};

	void TestHT1()
	{
		HashTable<int, int> ht;
		int a[] = { 4,14,24,34,5,7,1 };
		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}

		ht.Insert(make_pair(3, 3));
		ht.Insert(make_pair(3, 3));
		ht.Insert(make_pair(-3, -3));
		ht.Print();

		ht.Erase(3);
		ht.Print();

		if (ht.Find(3))
		{
			cout << "3存在" << endl;
		}
		else
		{
			cout << "3不存在" << endl;
		}

		ht.Insert(make_pair(3, 3));
		ht.Insert(make_pair(23, 3));
		ht.Print();
	}

	void TestHT2()
	{
		string arr[] = { "香蕉", "甜瓜","苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };
		//HashTable<string, int, HashFuncString> ht;
		HashTable<string, int> ht;
		for (auto& e : arr)
		{
			//auto ret = ht.Find(e);
			HashData<string, int>* ret = ht.Find(e);
			if (ret)
			{
				ret->_kv.second++;
			}
			else
			{
				ht.Insert(make_pair(e, 1));
			}
		}

		ht.Print();

		ht.Insert(make_pair("apple", 1));
		ht.Insert(make_pair("sort", 1));

		ht.Insert(make_pair("abc", 1));
		ht.Insert(make_pair("acb", 1));
		ht.Insert(make_pair("aad", 1));

		ht.Print();
	}
}


namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		HashNode<T>* _next;
		T _data;
	
		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};
	template<class K,class T,class KeyOfT,class Hash>
	class HashTable;
	template<class K, class T,class Ref,class Ptr,class KeyOfT, class Hash>
	struct __HTiterator
	{
		typedef HashNode<T> Node;
		typedef __HTiterator<K, T,Ref,Ptr,KeyOfT, Hash> Self;
		Node* _node;
		const HashTable<K, T, KeyOfT, Hash>* _pht;//pointer of table
		size_t _hashi;
		__HTiterator(Node* node,HashTable<K,T,KeyOfT,Hash>*pht,size_t hashi)
			:_node(node)
			,_pht(pht)
			,_hashi(hashi)
		{

		}
		__HTiterator(Node* node,const HashTable<K, T, KeyOfT, Hash>* pht, size_t hashi)
			:_node(node)
			, _pht(pht)
			, _hashi(hashi)
		{

		}


		Self&operator++()
		{
			if (_node->_next)
			{
				//如果当前桶还有节点，走向下一个节点
				_node = _node->_next;
			}
			else
			{
				//当前桶走完了，找下一个桶
				/*KeyOfT kot;
				Hash hf;
				size_t hashi = hf(kot(_node->_data)) % _pht._tables.size();*/
				++_hashi;
				while (_hashi < _pht->_tables.size())
				{
					if (_pht->_tables[_hashi])
					{
						_node = _pht->_tables[_hashi];
						break;
					}
					++_hashi;
				}
				if (_hashi == _pht->_tables.size())
				{
					_node = nullptr;
				}

			}
			return *this;
		}
		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}
		Ref operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;
		}
	};
	template<class K, class T,class KeyOfT, class Hash>
	class HashTable
	{
		typedef HashNode<T> Node;
		template<class K,class T,class Ref,class Ptr,class KeyOfT,class Hash>
		friend struct __HTiterator;
	public:
		typedef __HTiterator<K, T,T&,T*,KeyOfT, Hash> iterator;
		typedef __HTiterator<K, T,const T&,const T*, KeyOfT, Hash> const_iterator;
		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					return iterator(_tables[i],this, i);//this就是哈希表的指针
				}
			}
			return end();
		}
		iterator end()
		{
			return iterator(nullptr, this, -1);
		}
		const_iterator begin() const
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					return const_iterator(_tables[i], this, i);//this就是哈希表的指针
				}
			}
			return end();
		}
		//this->HashTable<K, T, KeyOfT, Hash>* _pht const
		const_iterator end() const
		{
			return const_iterator(nullptr, this, -1);
		}
		HashTable()
		{
			_tables.resize(10);
		}

		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

		pair<iterator,bool> Insert(const T &data)
		{
			Hash hf;
			KeyOfT kot;
			iterator it = Find(kot(data));
			if (it != end())
				return make_pair(it, false);
			//if (Find(kot(data))) return false;
			// 负载因子最大到1
			//if (_n*10 / _tables.size() == 7)
			if (_n == _tables.size())
			{
				//size_t newSize = _tables.size() * 2;
				//HashTable<K, V> newHT;
				//newHT._tables.resize(newSize);

				 遍历旧表
				//for (size_t i = 0; i < _tables.size(); i++)
				//{
				//	Node* cur = _tables[i];
				//	while(cur)
				//	{
				//		newHT.Insert(cur->_kv);
				//		cur = cur->_next;
				//	}
				//}

				//_tables.swap(newHT._tables);

				vector<Node*> newTables;
				newTables.resize(_tables.size() * 2, nullptr);
				// 遍历旧表
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;

						// 挪动到映射的新表
						size_t hashi = hf(kot(cur->_data)) % newTables.size();
						cur->_next = newTables[i];
						newTables[i] = cur;

						cur = next;
					}

					_tables[i] = nullptr;
				}

				_tables.swap(newTables);
			}

			size_t hashi = hf(kot(data)) % _tables.size();
			Node* newnode = new Node(data);

			// 头插
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;

			return make_pair(iterator(newnode,this, hashi), true);
		}

		iterator Find(const K& key)
		{
			Hash hf;
			KeyOfT kot;
			size_t hashi = hf(key) % _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return iterator(cur,this,hashi);
				}

				cur = cur->_next;
			}

			return end();
		}

		// 15:45继续
		bool Erase(const T& data)
		{
			Hash hf;
			KeyOfT kot;
			size_t hashi = hf(kot(data)) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (cur->_data == data)
				{
					if (prev == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					delete cur;

					return true;
				}

				prev = cur;
				cur = cur->_next;
			}

			return false;
		}

		void Some()
		{
			size_t bucketSize = 0;
			size_t maxBucketLen = 0;
			size_t sum = 0;
			double averageBucketLen = 0;

			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					++bucketSize;
				}

				size_t bucketLen = 0;
				while (cur)
				{
					++bucketLen;
					cur = cur->_next;
				}

				sum += bucketLen;

				if (bucketLen > maxBucketLen)
				{
					maxBucketLen = bucketLen;
				}
			}

			averageBucketLen = (double)sum / (double)bucketSize;

			printf("all bucketSize:%d\n", _tables.size());
			printf("bucketSize:%d\n", bucketSize);
			printf("maxBucketLen:%d\n", maxBucketLen);
			printf("averageBucketLen:%lf\n\n", averageBucketLen);
		}

	private:
		vector<Node*> _tables;
		size_t _n = 0;
		list<T> _linklist;
	};

}

MyUnorderedSet.h

#pragma once
#include"HashTable.h"
namespace bit
{
	template<class K,class Hash=HashFunc<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& Key)
			{
				return Key;
			}
		};
	public:
		typedef typename hash_bucket::HashTable<K, K, SetKeyOfT,Hash>::const_iterator iterator;
		typedef typename hash_bucket::HashTable<K, K, SetKeyOfT, Hash>::const_iterator const_iterator;
		pair<const_iterator, bool> insert(const K& key)
		{
			auto ret = _ht.Insert(key);
			return pair<const_iterator, bool>(const_iterator(ret.first._node, ret.first._pht, ret.first._hashi), ret.second);
		}
		/*iterator begin()
		{
			return _ht.begin();
		}
		iterator end()
		{
			return _ht.end();
		}*/
		const_iterator begin() const
		{
			return _ht.begin();
		}
		const_iterator end() const
		{
			return _ht.end();
		}
	private:
		hash_bucket::HashTable<K, K, SetKeyOfT,Hash> _ht;
	};
	void test_set()
	{
		unordered_set<int> us;
		us.insert(5);
		us.insert(15);
		us.insert(32);
		us.insert(3);
		unordered_set<int>::iterator it = us.begin();
		while (it != us.end())
		{
			cout << *it << ' ';
			++it;
		}
		cout << endl;
		for (auto e : us)
		{
			cout << e << ' ';
		}
		cout << endl;
	}
}

MyUnorderedMap.h

#pragma once
#include"HashTable.h"
namespace bit
{
	template<class K, class V,class Hash=HashFunc<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::iterator iterator;
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::const_iterator const_iterator;
		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}
		const_iterator begin() const
		{
			return _ht.begin();
		}

		const_iterator end() const
		{
			return _ht.end();
		}
		pair<iterator, bool> insert(const pair<const K,V>& kv)
		{
			return _ht.Insert(kv);
		}
		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
			return ret.first->second;
		}

	private:
		hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT,Hash> _ht;
	};

	void test_map()
	{
		unordered_map<string, string> dict;
		dict.insert(make_pair("sort", ""));
		dict.insert(make_pair("string", "ַ"));
		dict.insert(make_pair("insert", ""));

		for (auto& kv : dict)
		{
			//kv.first += 'x';
			kv.second += 'x';

			cout << kv.first << ":" << kv.second << endl;
		}
		cout << endl;

		string arr[] = { "橘子", "苹果","香蕉", "香蕉", "橘子", "橘子", "可乐", "砂糖橘" };
		unordered_map<string, int> count_map;
		for (auto& e : arr)
		{
			count_map[e]++;
		}

		for (auto& kv : count_map)
		{
			cout << kv.first << ":" << kv.second << endl;
		}
		cout << endl;
	}
}

test.cpp

#include<iostream>
#include<string>
#include<unordered_set>
#include<unordered_map>
#include<map>
#include<set>
using namespace std;

#include"HashTable.h"

//int main()
//{
//	unordered_set<int> s;
//	s.insert(5);
//	s.insert(2);
//	s.insert(6);
//	s.insert(1);
//	s.insert(4);
//
//	unordered_set<int>::iterator it = s.begin();
//	while (it != s.end())
//	{
//		cout << *it << " ";
//		++it;
//	}
//	cout << endl;
//
//	for (auto e : s)
//	{
//		cout << e << " ";
//	}
//	cout << endl;
//
//	cout << s.load_factor() << endl;
//	cout << s.max_load_factor() << endl;
//
//	unordered_map<string, string> dict;
//	dict["sort"];
//	dict["sort"] = "排序";
//	dict["string"] = "字符串";
//	dict["abc"] = "xx";
//
//	for (auto& kv : dict)
//	{
//		cout << kv.first << ":" << kv.second << endl;
//	}
//	cout << endl;
//
//	return 0;
//}

//int main()
//{
//	const size_t N = 100000;
//
//	unordered_set<int> us;
//	set<int> s;
//
//	vector<int> v;
//	v.reserve(N);
//	srand(time(0));
//	for (size_t i = 0; i < N; ++i)
//	{
//		//v.push_back(rand()); // N比较大时，重复值比较多
//		//v.push_back(rand()+i); // 重复值相对少
//		v.push_back(i); // 没有重复，有序
//	}
//
//	// 21:15
//	size_t begin1 = clock();
//	for (auto e : v)
//	{
//		s.insert(e);
//	}
//	size_t end1 = clock();
//	cout << "set insert:" << end1 - begin1 << endl;
//
//	size_t begin2 = clock();
//	for (auto e : v)
//	{
//		us.insert(e);
//	}
//	size_t end2 = clock();
//	cout << "unordered_set insert:" << end2 - begin2 << endl;
//
//
//	size_t begin3 = clock();
//	for (auto e : v)
//	{
//		s.find(e);
//	}
//	size_t end3 = clock();
//	cout << "set find:" << end3 - begin3 << endl;
//
//	size_t begin4 = clock();
//	for (auto e : v)
//	{
//		us.find(e);
//	}
//	size_t end4 = clock();
//	cout << "unordered_set find:" << end4 - begin4 << endl << endl;
//
//	cout <<"插入数据个数："<< s.size() << endl;
//	cout <<"插入数据个数：" << us.size() << endl << endl;
//
//	size_t begin5 = clock();
//	for (auto e : v)
//	{
//		s.erase(e);
//	}
//	size_t end5 = clock();
//	cout << "set erase:" << end5 - begin5 << endl;
//
//	size_t begin6 = clock();
//	for (auto e : v)
//	{
//		us.erase(e);
//	}
//	size_t end6 = clock();
//	cout << "unordered_set erase:" << end6 - begin6 << endl << endl;
//	
//	return 0;
//}

#include"MyUnorderedMap.h"
#include"MyUnorderedSet.h"

int main()
{
	//hash_bucket::TestHT1();
	//hash_bucket::TestHT3();
	bit::test_map();
	bit::test_set();


	return 0;
}