C++必修：unordered_set与unordered_map的实现

最新推荐文章于 2024-10-23 18:09:55 发布

Betty’s Sweet

最新推荐文章于 2024-10-23 18:09:55 发布

阅读量1.3k

点赞数 21

分类专栏： C++ 文章标签： c++ STL unordered_set unordered_map

本文链接：https://blog.csdn.net/Bettysweetyaaa/article/details/141645560

版权

C++ 专栏收录该内容

25 篇文章 8 订阅

订阅专栏

✨✨ 欢迎大家来到贝蒂大讲堂✨✨

🎈🎈养成好习惯，先赞后看哦~🎈🎈

所属专栏：C++学习
贝蒂的主页：Betty’s blog

1. unordered_set与unordered_map的结构

我们知道STL中的unordered_set与unordered_map底层就是一个开散列的哈希表，接下来我们模拟set与map肯定需要哈希表作为我们的成员变量，如果在此之前并不了解哈希表，可以参考——哈希表。

1.1 unordered_set的结构

我们知道unordered_set其实就是K模型，所以unordered_set容器对红黑树的封装如下：

template<class K>
class unordered_set
{
public:
    //成员函数
private:
    HashTable<K, K> _ht;
};

1.2 unordered_map的结构

我们也知道unordered_map其实就是KV模型，所以unordered_map容器对红黑树的封装如下：

template<class K, class V>
class unordered_map
{
public:
    //成员函数
private:
    HashTable<K, pair<const K,V>> _ht;
};

其中为了防止pair中K被修改，我们可以加上const修饰。其实unordered_set中的K同样不能修改，但是我们可以在其迭代器实现中进行统一处理。

2. 改造哈希表

其中哈希表的接口如下：

template<class K, class V>
class HashTable

第一个参数代表key，第二个参数为key或者key与value形成的键值对。对于unordered_set来说，第一个模版参数与第二个模版参数相同；对于unordered_map来说，第一个参数为key，第二个参数为key与value形成的键值对。

2.1 改造哈希表的节点

其中哈希表的节点类型就是模版参数T，所以我们对节点改造如下：

//节点
template<class T>
struct HashNode
{
	T _value;
	HashNode<T>* _next;
	HashNode(const T& value)
		:_value(value)
		, _next(nullptr)
	{}
};

2.2 改造哈希表

2.2.1 增加仿函数

首先为了方便不同类型之间的比较，所以我们需要定义一个仿函数，而不同类型需要的仿函数可能是不同的。所以我们需要模版参数显示传参：

template<class K>
class unordered_set
{
    //增加仿函数
    struct SetKeyOfT
    {
        const K& operator()(const K& key)
        {
            return key;
        }
    };
public:
    //成员函数
private:
    HashTable<K, K, SetKeyOfT> _ht;
};

template<class K, class V>
class unordered_map
{
    //仿函数
    struct MapKeyOfT
    {
        const K& operator()(const pair<K, V>& kv)
        {
            return kv.first;
        }
    };
public:
    //成员函数
private:
    HashTable<K, pair<const K,V> ,MapKeyOfT> _ht;
};

然后我们以Find函数为例：

iterator Find(const K& key)
{
    //使用仿函数
    HashFunc hs;
    KeyOfT kot;
    if (_table.size() == 0)
    {
        return iterator(nullptr,this);
    }
    size_t index = hs(key) % _table.size();
    Node* cur = _table[index];
    while (cur)
    {
        if (kot(cur->_value) == key)
        {
            return iterator(cur,this);
        }
        cur = cur->_next;
    }
    return iterator(nullptr, this);
}

但是还有一个问题就是：在哈希表中的哈希函数一般都需要进行取模操作，而有些自定义类型如string就无法直接进行取模操作，这时我们就需要通过某种方法将string转化为整型，然后再带入哈希函数求对应的下标。但遗憾的是，我们无法找到一种能实现字符串和整型之间一对一转换的方法，因为在计算机中，整型的大小是有限的，比如用无符号整型能存储的最大数字是4294967295，但是不同字符串能组合的数字是无限的，以无限对有限，这就意味着无论哪种哈希函数都可能存在哈希冲突。

经过大量研究发现BKDRHash算法无论是在实际效果还是编码实现中，效果都是最突出的。该算法由于在Brian Kernighan与Dennis Ritchie的《The C Programing Language》一书被展示而得名，是一种简单快捷的hash算法，也是Java目前采用的字符串的hash算法

所以同理我们也需要在哈希表中的模版参数添加一个仿函数作为默认的转换函数，这个转换函数我们将其设为缺省值，方便使用者根据不同类型传参不同的仿函数。

template<class K, class T, class KeyOfT, class HashFunc = Hash<K>>

然后我们在哈希表中实现对应的转换函数，并针对特定的类型进行特化：

template<class K>
struct Hash
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
//string类型的特化
template<>
struct Hash<string>
{
	size_t operator()(const string& s) //BKDRHash算法
	{
		size_t value = 0;
		for (auto ch : s)
		{
			value = value * 131 + ch;
		}
		return value;
	}
};

2.2.2 改造插入

首先我们改造Insert函数，除了比较需要用仿函数替换，以及需要将key通过对应的转换函数转换以外，我们还需要对返回值进行改造：返回值是一个pair，第一个参数是一个迭代器，第二个参数是一个bool值

若待插入元素的键值key在容器当中不存在，则insert函数插入成功，并返回插入后元素的迭代器和true。
若待插入元素的键值key在容器当中已经存在，则insert函数插入失败，并返回容器当中键值为key的元素的迭代器和false。

pair<iterator,bool> Insert(const T& value)
{
	HashFunc hs;
	KeyOfT kot;
	iterator it = Find(kot(value));
	if (it != end())
	{
		return make_pair(it, false);
	}
	// 如果负载因子等于 1 进行扩容
	if (_n == _table.size())
	{
		vector<Node*> newHT;
		// 如果原哈希表大小为 0，则新哈希表大小为 10，否则为原大小的两倍
		//size_t newsize = _table.size() == 0 ? 10 : _table.size() * 2;
		//newtable.resize(newsize);
		newHT.resize(GetNextPrime(_table.size()));
		for (int i = 0; i < _table.size(); i++)
		{
			if (_table[i])
			{
				Node* cur = _table[i];
				// 遍历链表进行头插更新节点进新的哈希表
				while (cur)
				{
					// 记录下一个节点
					Node* next = cur->_next;
					size_t index = hs(kot(cur->_value)) % newHT.size();
					// 进行头插
					cur->_next = newHT[index];
					newHT[index] = cur;
					cur = next;
				}
				// 将原哈希桶置空
				_table[i] = nullptr;
			}
		}
		// 通过交换让原本哈希表自动回收，同时新哈希表成为当前使用的哈希表
		_table.swap(newHT);
	}
	// 计算插入位置
	size_t index = hs(kot(value)) % _table.size();
	Node* newnode = new Node(value);
	// 进行头插
	newnode->_next = _table[index];
	_table[index] = newnode;
	_n++;
	return make_pair(iterator(newnode,this),true);
}

2.2.3 改造删除

删除函数中除了比较需要用仿函数替换，以及需要将key通过对应的转换函数转换以外没什么需要注意的地方。

bool Erase(const K& key)
{
    HashFunc hs;
    KeyOfT kot;
    size_t index = hs(key) % _table.size();
    //记录前一个节点方便链接
    Node* prev = nullptr;
    Node* cur = _table[index];
    while (cur)
    {
        //找到
        if (kot(cur->_data) == key)
        {
            //如果为头节点
            if (prev == nullptr)
            {
                _table[index] = cur->_next;
            }
            else
            {
                prev->_next = cur->_next;
            }
            delete cur;
            --_n;
            return true;
        }
        //没找到的话就遍历下一个
        prev = cur;
        cur = cur->_next;
    }
    return false;
}

3. 迭代器

因为unordered_map/unordered_set底层为开散列实现的哈希表，有些节点与节点之间并不连续，所以迭代器也肯定需要进行封装。为了同时支持const迭代器与普通迭代器，我们需要三个模版参数，第一个模版参数T代表数据存储类型，第二个模版参数Ref代表存储类型的引用，第三个模版参数Ptr代表存储类型的指针。
在迭代器中应该有两个参数，一个就是节点指针，另一个就是我们的哈希表的指针，因为我们可能需要遍历哈希表的下一个桶的位置。其中为了方便访问哈希表的私有成员我们可以将迭代器设为友元类，并且在最开始要给出哈希表的声明。然后我们重载如*，->等常见操作符。

template<class K, class T, class KeyOfT, class HashFunc>
class HashTable;//声明

template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
struct __HashIterator
{
	typedef HashNode<T> Node;
    typedef HashTable<K, T, KeyOfT, HashFunc> HashTable;
	typedef __HashIterator<K, T, Ref, Ptr, KeyOfT, HashFunc> self;
	Node* _node;
	HashTable* _ht;
	__HashIterator(Node* node, HashTable* ht)
		:_node(node)
		, _ht(ht)
	{}
	Ref operator*()
	{
		return _node->_value;
	}

	Ptr operator->()
	{
		return &_node->_value;
	}
	bool operator == (const self& it)
	{
		return _node == it._node;
	}
	bool operator!=(const self& it)
	{
		return _node != it._node;
	}
};

接下来我们需要在哈希表中编写常见的迭代器函数，所以begin()就是哈希表的第一个有数据的节点，而end()我们可以先将其设置nullptr。

iterator begin()
{
	Node* cur = nullptr;
	for (size_t i = 0; i < _table.size(); i++)
	{
		if (_table[i])
		{
			cur = _table[i];
			break;
		}
	}
	return iterator(cur, this);
}
iterator end()
{
	return iterator(nullptr, this);
}

const_iterator begin()const
{
	Node* cur = nullptr;
	for (size_t i = 0; i < _table.size(); i++)
	{
		if (_table[i])
		{
			cur = _table[i];
			break;
		}
	}
	return const_iterator(cur, this);
}
const_iterator end()const
{
	return const_iterator(nullptr, this);
}

接下来我们重载++操作符，其中需要注意的是：哈希表的迭代器一般为是单向迭代器，即没有反向迭代器，也没有实现–-等运算符的重载，当然若是想让哈希表支持双向遍历，可以考虑将哈希桶中存储的单链表结构换为双链表结构。

//前置++
self& operator++()
{
	HashFunc hs;
	KeyOfT kot;
	if (_node->_next != nullptr)
	{
		_node = _node->_next;
	}
	else
	{
		//计算当前桶的位置
		size_t hashi =hs(kot(_node->_value)) % (_ht->_table.size());
		//++走到下一个桶
		++hashi;
		while (hashi < _ht->_table.size())
		{
			//不为空
			if (_ht->_table[hashi])
			{
				_node = _ht->_table[hashi];
				break;
			}
			else
			{
				++hashi;
			}
		}
		//如果后面没有非空桶返回nullptr
		if (hashi == _ht->_table.size())
		{
			_node = nullptr;
		}
	}
	return *this;
}

当然++运算符的实现不止上面一种，我同样可将哈希表中的数据维护成一个链表。这时桶中节点就有两个指针，一个维护哈希桶，一个维护单链表。这时begin()就可以指向单链表的起始节点，end()同样为nullptr，++运算符就可以直接通过下一个节点指针来得到。

然后我们可以在unordered_set/unordered_map中增加相应的迭代器，其中需要注意的是unordered_set中的迭代器为了防止key被修改，将普通迭代器也设为const迭代器。但这就引发了一个问题就是：普通迭代器调用仍是哈希表的普通迭代器，返回普通迭代器并不与const迭代器兼容，为了解决这个问题我们仍需要在迭代器中重载一个普通迭代器初始化const迭代器的构造函数。

typedef __HashIterator<K, T, T&, T*, KeyOfT, HashFunc> Iterator;
//普通迭代器构造const迭代器
__HashIterator(const Iterator&it)
    :_node(it._node)
    ,_ht(it._ht)
{}

最后我们直接复用哈希表中的接口就可以实现unordered_set/unordered_map的成员函数。

template<class K>
class unordered_set
{
    //增加仿函数
    struct SetKeyOfT
    {
        const K& operator()(const K& key)
        {
            return key;
        }
    };
public:
    //成员函数
    typedef typename HashTable<K, K, SetKeyOfT>::const_iterator iterator;
    typedef typename HashTable<K, K, SetKeyOfT>::const_iterator const_iterator;

    iterator begin()
    {
        return _ht.begin();
    }
    iterator end()
    {
        return _ht.end();
    }
    //插入函数
    pair<iterator, bool> insert(const K& key)
    {
        return _ht.Insert(key);
    }
    //删除函数
    void erase(const K& key)
    {
        _ht.Erase(key);
    }
    //查找函数
    iterator find(const K& key)
    {
        return _ht.Find(key);
    }
private:
    HashTable<K, K, SetKeyOfT> _ht;
};

template<class K, class V>
class unordered_map
{
    //仿函数
    struct MapKeyOfT
    {
        const K& operator()(const pair<K, V>& kv)
        {
            return kv.first;
        }
    };
public:
    //成员函数
    typedef typename HashTable<K, pair<const K, V>, MapKeyOfT>::iterator iterator;
    typedef typename HashTable<K, pair<const K, V>, MapKeyOfT>::const_iterator const_iterator;
    iterator begin()
    {
        return _ht.begin();
    }

    iterator end()
    {
        return _ht.end();
    }

    const_iterator begin()const
    {
        return _ht.begin();
    }

    const_iterator end()const
    {
        return _ht.end();
    }

    pair<iterator, bool> insert(const pair<K, V>& kv)
    {
        return _ht.Insert(kv);
    }

    V& operator[](const K& key)
    {
        pair<iterator, bool> ret = insert(make_pair(key, V()));
        return ret.first->second;
    }
    iterator find()
    {
        return _ht.Find();
    }
    void erase()
    {
        return _ht.Erase();
    }
private:
    HashTable<K, pair<const K,V> ,MapKeyOfT> _ht;
};

4. 源码

4.1 HashTable.h

#pragma once
#include<utility>
#include<vector>
template<class K>
struct Hash
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
//string类型的特化
template<>
struct Hash<string>
{
	size_t operator()(const string& s) //BKDRHash算法
	{
		size_t value = 0;
		for (auto ch : s)
		{
			value = value * 131 + ch;
		}
		return value;
	}
};
//节点
template<class T>
struct HashNode
{
	T _value;
	HashNode<T>* _next;
	HashNode(const T& value)
		:_value(value)
		, _next(nullptr)
	{}
};
template<class K, class T, class KeyOfT, class HashFunc>
class HashTable;//声明

template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
struct __HashIterator
{
	typedef HashNode<T> Node;
    typedef HashTable<K, T, KeyOfT, HashFunc> HashTable;
	typedef __HashIterator<K, T, Ref, Ptr, KeyOfT, HashFunc> self;
	//普通迭代器
	typedef __HashIterator<K, T, T&, T*, KeyOfT, HashFunc> Iterator;
	Node* _node;
	HashTable* _ht;
	__HashIterator(Node* node, HashTable* ht)
		:_node(node)
		, _ht(ht)
	{}
	//普通迭代器构造const迭代器
	__HashIterator(const Iterator&it)
		:_node(it._node)
		,_ht(it._ht)
	{}
	Ref operator*()
	{
		return _node->_value;
	}

	Ptr operator->()
	{
		return &_node->_value;
	}
	bool operator == (const self& it)
	{
		return _node == it._node;
	}
	bool operator!=(const self& it)
	{
		return _node != it._node;
	}
	//前置++
	self& operator++()
	{
		HashFunc hs;
		KeyOfT kot;
		if (_node->_next != nullptr)
		{
			_node = _node->_next;
		}
		else
		{
			//计算当前桶的位置
			size_t hashi =hs(kot(_node->_value)) % (_ht->_table.size());
			//++走到下一个桶
			++hashi;
			while (hashi < _ht->_table.size())
			{
				//不为空
				if (_ht->_table[hashi])
				{
					_node = _ht->_table[hashi];
					break;
				}
				else
				{
					++hashi;
				}
			}
			//如果后面没有非空桶返回nullptr
			if (hashi == _ht->_table.size())
			{
				_node = nullptr;
			}
		}
		return *this;
	}
};
template<class K, class T, class KeyOfT, class HashFunc = Hash<K>>
class HashTable
{
	template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
	friend struct __HashIterator;
	typedef HashNode<T> Node;
public:
	typedef __HashIterator<K, T, T&, T*, KeyOfT, HashFunc> iterator;
	typedef __HashIterator<K, T, const T&, const T*, KeyOfT, HashFunc> const_iterator;
	iterator begin()
	{
		Node* cur = nullptr;
		for (size_t i = 0; i < _table.size(); i++)
		{
			if (_table[i])
			{
				cur = _table[i];
				break;
			}
		}
		return iterator(cur, this);
	}
	iterator end()
	{
		return iterator(nullptr, this);
	}

	const_iterator begin()const
	{
		Node* cur = nullptr;
		for (size_t i = 0; i < _table.size(); i++)
		{
			if (_table[i])
			{
				cur = _table[i];
				break;
			}
		}
		return const_iterator(cur, this);
	}
	const_iterator end()const
	{
		return const_iterator(nullptr, this);
	}

	//构造函数
	HashTable() = default;

	//拷贝构造函数
	HashTable(const HashTable& ht)
	{
		//1、将哈希表的大小调整为ht._table的大小
		_table.resize(ht._table.size());
		for (size_t i = 0; i < ht._table.size(); i++)
		{
			if (ht._table[i]) //桶不为空
			{
				Node* cur = ht._table[i];
				while (cur) //将该桶的结点取完为止
				{
					Node* copy = new Node(cur->_data); 
					//将拷贝结点头插到当前桶
					copy->_next = _table[i];
					_table[i] = copy;
					cur = cur->_next; 
				}
			}
		}
		_n = ht._n;
	}
	//赋值运算符重载函数
	HashTable& operator=(HashTable ht)
	{
		//交换哈希表中两个成员变量的数据
		_table.swap(ht._table);
		swap(_n, ht._n);

		return *this; //支持连续赋值
	}
	//析构函数
	~HashTable()
	{
		for (size_t i = 0; i < _table.size(); i++)
		{
			if (_table[i]) //桶不为空
			{
				Node* cur = _table[i];
				while (cur) //将该桶的结点取完为止
				{
					Node* next = cur->_next; //记录下一个结点
					delete cur; //释放结点
					cur = next;
				}
				_table[i] = nullptr; 
			}
		}
	}
	//获取本次增容后哈希表的大小
	size_t GetNextPrime(size_t prime)
	{
		const int PRIMECOUNT = 28;
		//素数序列
		const size_t primeList[PRIMECOUNT] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};
		size_t i = 0;
		for (i = 0; i < PRIMECOUNT; i++)
		{
			if (primeList[i] > prime)
				return primeList[i];
		}
		return primeList[i];
	}
	pair<iterator,bool> Insert(const T& value)
	{
		HashFunc hs;
		KeyOfT kot;
		iterator it = Find(kot(value));
		if (it != end())
		{
			return make_pair(it, false);
		}
		// 如果负载因子等于 1 进行扩容
		if (_n == _table.size())
		{
			vector<Node*> newHT;
			// 如果原哈希表大小为 0，则新哈希表大小为 10，否则为原大小的两倍
			//size_t newsize = _table.size() == 0 ? 10 : _table.size() * 2;
			//newtable.resize(newsize);
			newHT.resize(GetNextPrime(_table.size()));
			for (int i = 0; i < _table.size(); i++)
			{
				if (_table[i])
				{
					Node* cur = _table[i];
					// 遍历链表进行头插更新节点进新的哈希表
					while (cur)
					{
						// 记录下一个节点
						Node* next = cur->_next;
						size_t index = hs(kot(cur->_value)) % newHT.size();
						// 进行头插
						cur->_next = newHT[index];
						newHT[index] = cur;
						cur = next;
					}
					// 将原哈希桶置空
					_table[i] = nullptr;
				}
			}
			// 通过交换让原本哈希表自动回收，同时新哈希表成为当前使用的哈希表
			_table.swap(newHT);
		}
		// 计算插入位置
		size_t index = hs(kot(value)) % _table.size();
		Node* newnode = new Node(value);
		// 进行头插
		newnode->_next = _table[index];
		_table[index] = newnode;
		_n++;
		return make_pair(iterator(newnode,this),true);
	}

	iterator Find(const K& key)
	{
		//使用仿函数
		HashFunc hs;
		KeyOfT kot;
		if (_table.size() == 0)
		{
			return iterator(nullptr,this);
		}
		size_t index = hs(key) % _table.size();
		Node* cur = _table[index];
		while (cur)
		{
			if (kot(cur->_value) == key)
			{
				return iterator(cur,this);
			}
			cur = cur->_next;
		}
		return iterator(nullptr, this);
	}
	bool Erase(const K& key)
	{
		HashFunc hs;
		KeyOfT kot;
		size_t index = hs(key) % _table.size();
		//记录前一个节点方便链接
		Node* prev = nullptr;
		Node* cur = _table[index];
		while (cur)
		{
			//找到
			if (kot(cur->_data) == key)
			{
				//如果为头节点
				if (prev == nullptr)
				{
					_table[index] = cur->_next;
				}
				else
				{
					prev->_next = cur->_next;
				}
				delete cur;
				--_n;
				return true;
			}
			//没找到的话就遍历下一个
			prev = cur;
			cur = cur->_next;
		}
		return false;
	}
private:
	vector<Node*> _table;//哈希表
	size_t _n = 0;//有效数据个数
};

4.2 unordered_set.h

#pragma once
#include "Hash.h"
namespace betty
{
	template<class K>
	class unordered_set
	{
		//增加仿函数
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		//成员函数
		typedef typename HashTable<K, K, SetKeyOfT>::const_iterator iterator;
		typedef typename HashTable<K, K, SetKeyOfT>::const_iterator const_iterator;

		iterator begin()
		{
			return _ht.begin();
		}
		iterator end()
		{
			return _ht.end();
		}
		//插入函数
		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}
		//删除函数
		void erase(const K& key)
		{
			_ht.Erase(key);
		}
		//查找函数
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
	private:
		HashTable<K, K, SetKeyOfT> _ht;
	};
}

4.3 unordered_map.h

#pragma once
#include "Hash.h"
namespace betty
{
	template<class K, class V>
	class unordered_map
	{
		//仿函数
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		//成员函数
		typedef typename HashTable<K, pair<const K, V>, MapKeyOfT>::iterator iterator;
		typedef typename HashTable<K, pair<const K, V>, MapKeyOfT>::const_iterator const_iterator;
		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		const_iterator begin()const
		{
			return _ht.begin();
		}

		const_iterator end()const
		{
			return _ht.end();
		}

		pair<iterator, bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}

		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = insert(make_pair(key, V()));
			return ret.first->second;
		}
		iterator find()
		{
			return _ht.Find();
		}
		void erase()
		{
			return _ht.Erase();
		}
	private:
		HashTable<K, pair<const K,V> ,MapKeyOfT> _ht;
	};

}