unordered_map，unordered_set模拟实现

最新推荐文章于 2024-07-24 22:13:06 发布

亿多白云朵

最新推荐文章于 2024-07-24 22:13:06 发布

阅读量95

点赞数

文章标签：哈希算法算法 c++ c语言迭代器模式散列表数据结构

本文链接：https://blog.csdn.net/Pursue_Harmony/article/details/134485211

版权

三 .实现unordered_map, unordered_set

1. 初步实现unordered_map, unordered_set

2.加上迭代器（自行理解）

3.测试用例

一 . 底层结构--哈希

哈希思想：构造一种存储结构，通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系，那么在查找时通过该函数可以很快找到该元素。
哈希方法构造出来的结构称为哈希表(Hash Table)(或者称散列表)

1.直接定址法

2. 除留余数法 + 哈希桶

如果数据过多，并且数据很散，直接定址法不适合。

3. 一些定义

不同关键字（4，14，24，84）通过相同的方法（% 10）计算出相同的哈希地址，该种现象称为哈希冲突或哈希碰撞。

解决哈希冲突两种常见的方法是：闭散列和开散列

闭散列：也叫开放定址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有
空位置，那么可以把key存放到冲突位置中的“下一个” 空位置中去

开散列法又叫链地址法(开链法)：首先对关键码集合用散列函数计算散列地址，具有相同地
址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链
接起来，各链表的头结点存储在哈希表中。
（我们采用的是开散列法）

二 . 模拟实现哈希表

1.哈希表框架

代码：

//节点
template<class K, class V>
struct HashNode
{
	HashNode(const pair<K, V>& kv)
		:_kv(kv)
		, _next(nullptr)
	{}
	pair<K, V> _kv;
	HashNode<K, V>* _next;
};

template<class K, class V>
{
    typedef HashNode<K, V> Node;
public:
	HashTable()
	{
		_tables.resize(10, nullptr);
	}
    ~HashTable()
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			while (cur)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
		}
	}
    //删除
    //插入
    //查找

private:
	vector<Node*> _tables;
	size_t _n = 0; // 哈希表实际元素个数
}

_n存在的意义：判断_tables什么时候扩容。

开散列最好的情况是：每个哈希桶中刚好挂一个节点，再继续插入元素时，每一次都会发生哈希冲突，因此，在元素个数刚好等于桶的个数时，可以给哈希表增容。

2.插入

bool Insert(const pair<K, V>& kv)
	{
		if(Find(kv.first))
			return false;
	    //扩容
		if (_n / _tables.size() == 1)
		{
			size_t newSize = _tables.size() * 2;
			vector<Node*> newTables;
			newTables.resize(newSize, nullptr);
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					size_t hashi = cur->_kv.first % newTables.size();
					cur->_next = newTables[hashi];
					newTables[hashi] = cur;
				}
				_tables[i] = nullptr;
			}
			_tables.swap(newTables);
		}

		size_t hashi = kv.first % _tables.size();
		Node* newnode = new Node(kv);
		newnode->_next = _tables[hashi];
		_tables[hashi] = newnode;
		_n++;

		return false;
	}

3.查找

Node* Find(const K& key)
	{
		size_t hashi = key % _tables.size();//找到插入位置
		Node* cur = _tables[hashi];
		while (cur)
		{
			if (cur->_kv.first == key)
			{
				return cur;
			}
			cur = cur->_next;
		}
		return nullptr;
	}

4 . 删除

	bool erase(const K& key)
	{
		size_t hashi = key % _tables.size();
		Node* cur = _tables[hashi];
		Node* prev = nullptr;
		if (cur->_kv.first == key)
		{
			_tables[hashi] = nullptr;
			return true;
		}
		while (cur)
		{
			if (cur->_kv.first == key)
			{
				prev->_next = cur->_next;
				delete cur;
				return true;
			}
			prev = cur;
			cur = cur->_next;
		}
		return false;
	}

5.解决使用问题

如果kv.first为string或者其他类型，就会出问题。

解决：

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t sum = 0;
		for (auto& e : key)
		{
			sum *= 31;
			sum += e;
		}
		return sum;
	}
};

接着把所有用除留余数法的部分进行修改：

6.完整代码

#pragma once

template<class K, class V>
struct HashNode
{
	HashNode(const pair<K, V>& kv)
		:_kv(kv)
		, _next(nullptr)
	{}
	pair<K, V> _kv;
	HashNode<K, V>* _next;
};


template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t sum = 0;
		for (auto& e : key)
		{
			sum *= 31;
			sum += e;
		}
		return sum;
	}
};

//哈希桶
template<class K, class V, class Hash = HashFunc<K>>
class HashTable
{
	typedef HashNode<K, V> Node;
	Hash hf;
public:
	HashTable()
	{
		_tables.resize(10, nullptr);
	}

	~HashTable()
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			while (cur)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
		}
	}

	bool Insert(const pair<K, V>& kv)
	{
		if(Find(kv.first))
			return false;
	    //扩容
		if (_n / _tables.size() == 1)
		{
			size_t newSize = _tables.size() * 2;
			vector<Node*> newTables;
			newTables.resize(newSize, nullptr);
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					size_t hashi = hf(cur->_kv.first) % newTables.size();
					cur->_next = newTables[hashi];
					newTables[hashi] = cur;
				}
				_tables[i] = nullptr;
			}
			_tables.swap(newTables);
		}

		size_t hashi = hf(kv.first) % _tables.size();
		Node* newnode = new Node(kv);
		newnode->_next = _tables[hashi];
		_tables[hashi] = newnode;
		_n++;

		return false;
	}

	Node* Find(const K& key)
	{
		size_t hashi = hf(key) % _tables.size();
		Node* cur = _tables[hashi];
		while (cur)
		{
			if (cur->_kv.first == key)
			{
				return cur;
			}
			cur = cur->_next;
		}
		return nullptr;
	}

	bool erase(const K& key)
	{
		size_t hashi = hf(key) % _tables.size();
		Node* cur = _tables[hashi];
		Node* prev = nullptr;
		if (cur->_kv.first == key)
		{
			_tables[hashi] = nullptr;
			return true;
		}
		while (cur)
		{
			if (cur->_kv.first == key)
			{
				prev->_next = cur->_next;
				delete cur;
				return true;
			}
			prev = cur;
			cur = cur->_next;
		}
		return false;
	}

private:
	vector<Node*> _tables;
	size_t _n = 0;
};

三 .实现unordered_map, unordered_set

1. 初步实现unordered_map, unordered_set

这部分内容类似红黑树封装map,set。

unordered_set.h

#pragma once

template<class K, class Hash = HashFunc<K>>
class unordered_set
{
	struct KeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};

public:
	bool insert(const K& key)
	{
		return _ht.Insert(key);
	}

	bool erase(const K& key)
	{
		return _ht.Erase(key);
	}

	HashNode<K>* find(const K& key)
	{
		return _ht.Find(key);
	}


private:
	HashTable<K, K, KeyOfT> _ht;
};

unordered_map.h

#pragma once

template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{
	struct KeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};

public:
	bool insert(const pair<K, V>& key)
	{
		return _ht.Insert(key);
	}

	bool erase(const K& key)
	{
		return _ht.Erase(key);
	}

	HashNode<pair<K, V>>* find(const K& key)
	{
		return _ht.Find(key);
	}

private:
	HashTable<K, pair<K, V>, KeyOfT> _ht;
};

HashTable.h

#pragma once


template<class T>
struct HashNode
{
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
	T _data;
	HashNode<T>* _next;
};


template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t sum = 0;
		for (auto& e : key)
		{
			sum *= 31;
			sum += e;
		}
		return sum;
	}
};

//哈希桶
template<class K, class T, class KeyOfT>
class HashTable
{
	typedef HashNode<T> Node;
	HashFunc<K> hf;
	KeyOfT kot;
public:
	HashTable()
	{
		_tables.resize(10, nullptr);
	}

	~HashTable()
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			while (cur)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
		}
	}

	bool Insert(const T& data)
	{
		if(Find(kot(data)))
			return false;
	    //扩容
		if (_n / _tables.size() == 1)
		{
			size_t newSize = _tables.size() * 2;
			vector<Node*> newTables;
			newTables.resize(newSize, nullptr);
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					size_t hashi = hf(kot(cur->_data)) % newTables.size();
					cur->_next = newTables[hashi];
					newTables[hashi] = cur;
				}
				_tables[i] = nullptr;
			}
			_tables.swap(newTables);
		}

		size_t hashi = hf(kot(data)) % _tables.size();
		Node* newnode = new Node(data);
		newnode->_next = _tables[hashi];
		_tables[hashi] = newnode;
		_n++;

		return false;
	}

	Node* Find(const K& key)
	{
		size_t hashi = hf(key) % _tables.size();
		Node* cur = _tables[hashi];
		while (cur)
		{
			if (kot(cur->_data) == key)
			{
				return cur;
			}
			cur = cur->_next;
		}
		return nullptr;
	}

	bool Erase(const K& key)
	{
		size_t hashi = hf(key) % _tables.size();
		Node* cur = _tables[hashi];
		Node* prev = nullptr;
		if (kot(cur->_data) == key)
		{
			_tables[hashi] = nullptr;
			return true;
		}
		while (cur)
		{
			if (kot(cur->_data) == key)
			{
				prev->_next = cur->_next;
				delete cur;
				return true;
			}
			prev = cur;
			cur = cur->_next;
		}
		return false;
	}

private:
	vector<Node*> _tables;
	size_t _n = 0;
};

2.加上迭代器（自行理解）

unordered_map.h

#pragma once

template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{
	struct KeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:
	typedef typename HashTable<K, pair<const K, V>, KeyOfT, Hash>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}

	pair<iterator, bool> insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}

	V& operator[](const K& key)
	{
		pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
		return ret.first->second;
	}

	const V& operator[](const K& key) const
	{
		pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
		return ret.first->second;
	}

	iterator find(const K& key)
	{
		return _ht.Find(key);
	}

	bool erase(const K& key)
	{
		return _ht.Erase(key);
	}

private:
	HashTable<K, pair<const K, V>, KeyOfT, Hash> _ht;
};

unordered_set.h

#pragma once


template<class K, class Hash = HashFunc<K>>
class unordered_set
{
	struct KeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};

public:
	typedef typename HashTable<K, K, KeyOfT, Hash>::const_iterator iterator;
	typedef typename HashTable<K, K, KeyOfT, Hash>::const_iterator const_iterator;

	const_iterator begin() const
	{
		return _ht.begin();
	}

	const_iterator end() const
	{
		return _ht.end();
	}

	pair<const_iterator, bool> insert(const K& key)
	{
		auto ret = _ht.Insert(key);
		return pair<const_iterator, bool>(const_iterator(ret.first._node, ret.first._pht, ret.first._hashi), ret.second);
	}

	iterator find(const K& key)
	{
		return _ht.Find(key);
	}

private:
	HashTable<K, K, KeyOfT, Hash> _ht;
};

HashTable.h

#pragma once


template<class T>
struct HashNode
{
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
	T _data;
	HashNode<T>* _next;
};


template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t sum = 0;
		for (auto& e : key)
		{
			sum *= 31;
			sum += e;
		}
		return sum;
	}
};

//前置声明
template<class K, class T, class KeyOfT, class Hash>
class HashTable;

//迭代器
template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash = HashFunc<K>>
struct _HTIterator
{
	typedef HashNode<T> Node;
	typedef _HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
	Node* _node;
	size_t _hashi;
	const HashTable<K, T, KeyOfT, Hash>* _pht;

	_HTIterator(Node* node, HashTable<K, T, KeyOfT, Hash>* pht, size_t hashi)
		:_node(node)
		, _hashi(hashi)
		, _pht(pht)
	{}

	_HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>* pht, size_t hashi)
		:_node(node)
		, _hashi(hashi)
		, _pht(pht)
	{}

	Self& operator++()
	{
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else//需要哈希表
		{
			++_hashi;
			while (_hashi < _pht->_tables.size())
			{
				if (_pht->_tables[_hashi])
				{
					_node = _pht->_tables[_hashi];
					break;
				}
				++_hashi;
			}
			if (_hashi == _pht->_tables.size())
			{
				_node = nullptr;
			}
		}
		return *this;
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}

	Ref operator*()
	{
		return _node->_data;
	}

	Ptr operator->()
	{
		return &(_node->_data);
	}

};




//哈希桶
template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
public:
	typedef HashNode<T> Node;

	//为了访问HashTable的私有成员
	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	friend struct _HTIterator;
	typedef _HTIterator<K, T, T&, T*, KeyOfT, Hash> iterator;
	typedef _HTIterator<K, T, const T&, const T*, KeyOfT, Hash> const_iterator;

	Hash hf;
	KeyOfT kot;
	HashTable()
	{
		_tables.resize(10, nullptr);
	}

	~HashTable()
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			while (cur)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
		}
	}

	iterator begin()
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			if (_tables[i])
			{
				return iterator(_tables[i], this, i);
			}
		}

		return end();
	}

	iterator end()
	{
		return iterator(nullptr, this, -1);
	}

	const_iterator begin() const
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			if (_tables[i])
			{
				return const_iterator(_tables[i], this, i);
			}
		}

		return end();
	}

	const_iterator end() const
	{
		return const_iterator(nullptr, this, -1);
	}

    pair<iterator, bool> Insert(const T& data)
	{
		iterator it = Find(kot(data));
		if (it != end())
			return make_pair(it, false);
	    //扩容
		if (_n / _tables.size() == 1)
		{
			size_t newSize = _tables.size() * 2;
			vector<Node*> newTables;
			newTables.resize(newSize, nullptr);
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					size_t hashi = hf(kot(cur->_data)) % newTables.size();
					cur->_next = newTables[hashi];
					newTables[hashi] = cur;
				}
				_tables[i] = nullptr;
			}
			_tables.swap(newTables);
		}

		size_t hashi = hf(kot(data)) % _tables.size();
		Node* newnode = new Node(data);
		newnode->_next = _tables[hashi];
		_tables[hashi] = newnode;
		_n++;

		return make_pair(iterator(newnode, this, hashi), true);
	}

	iterator Find(const K& key)
	{
		size_t hashi = hf(key) % _tables.size();
		Node* cur = _tables[hashi];
		while (cur)
		{
			if (kot(cur->_data) == key)
			{
				return iterator(cur, this, hashi);
			}
			cur = cur->_next;
		}
		return end();
	}

	bool Erase(const K& key)
	{
		size_t hashi = hf(key) % _tables.size();
		Node* cur = _tables[hashi];
		Node* prev = nullptr;
		if (kot(cur->_data) == key)
		{
			_tables[hashi] = nullptr;
			return true;
		}
		while (cur)
		{
			if (kot(cur->_data) == key)
			{
				prev->_next = cur->_next;
				delete cur;
				return true;
			}
			prev = cur;
			cur = cur->_next;
		}
		return false;
	}

private:
	vector<Node*> _tables;
	size_t _n = 0;
};

3.测试用例

#include<iostream>
#include<vector>
#include<string>
#include<assert.h>
#include<utility>

using namespace std;
#include"HashTable.h"
#include"myunordered_set.h"
#include"myunordered_map.h"

void test()
{
	//HashTable<string, int> ht;
	//ht.Insert(make_pair("西瓜", 1));
	//HashNode<string, int>* ret = ht.Find("西瓜");
	//ret->_kv.second++;
	//cout << ret->_kv.first << ":" << ret->_kv.second << endl;;
	//ht.Insert(make_pair("桃子", 1));
	//ht.Insert(make_pair("桃子", 2));
	//ht.Insert(make_pair("苹果", 1));
}

//void testset()
//{
//	unordered_set<string> us;
//	us.insert("西瓜");
//	us.insert("香蕉");
//	us.insert("苹果");
//	us.insert("西瓜");
//
//	us.erase("西瓜");
//	HashNode<string>* ret = us.find("香蕉");
//	cout << ret->_data << endl;
//}

//void testmap()
//{
//	string arr[] = { "西瓜", "香蕉", "西瓜", "苹果", "西瓜", "西瓜", "香蕉", "西瓜" };
//	unordered_map<string, int> up;
//	for (auto e : arr)
//	{
//		HashNode<pair<string, int>>* ret = up.find(e);
//		if (ret)
//		{
//			ret->_data.second++;
//			cout << ret->_data.first << ":" << ret->_data.second << endl;
//
//		}
//		else
//		{
//			up.insert(make_pair(e, 1));
//		}
//	}
//	
//}

void test_set()
{
	// 17:05
	unordered_set<int> us;
	us.insert(5);
	us.insert(15);
	us.insert(52);
	us.insert(3);

	unordered_set<int>::iterator it = us.begin();
	while (it != us.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;

	for (auto e : us)
	{
		cout << e << " ";
	}
	cout << endl;
}

void test_map()
{
	//unordered_map<string, string> dict;
	//dict.insert(make_pair("sort", ""));
	//dict.insert(make_pair("string", "ַ"));
	//dict.insert(make_pair("insert", ""));

	//for (auto& kv : dict)
	//{
	//	//kv.first += 'x';
	//	kv.second += 'x';

	//	cout << kv.first << ":" << kv.second << endl;
	//}
	//cout << endl;

	string arr[] = { "西瓜", "苹果", "苹果", "胡萝卜", "梨子", "橘子", "哈密瓜", "桃子", "西瓜", "西瓜", "梨子" };
	unordered_map<string, int> count_map;
	for (auto& e : arr)
	{
		count_map[e]++;
	}

	for (auto& kv : count_map)
	{
		cout << kv.first << ":" << kv.second << endl;
	}
	cout << endl;
}

int main()
{
	//test();
	//testset();
	//testmap();
	//test_set();
	test_map();
	return 0;
}