基于哈希的unordered_系列容器实现

最新推荐文章于 2023-09-23 12:47:57 发布

爱马拉松的东宫高兴

最新推荐文章于 2023-09-23 12:47:57 发布

阅读量392

点赞数

分类专栏： C++

本文链接：https://blog.csdn.net/qq_43780350/article/details/100164891

版权

C++ 专栏收录该内容

18 篇文章 0 订阅

订阅专栏

文章目录

关联式容器：哈希结构

关联式容器：哈希结构

哈希表的改造

Common.hpp

const int PRIMECOUNT = 28;
const size_t primeList[PRIMECOUNT] =
{
	53ul, 97ul, 193ul, 389ul, 769ul,
	1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
	49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
	1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
	50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
	1610612741ul, 3221225473ul, 4294967291ul
};
size_t GetNextPrime(size_t prime)
{
	size_t i = 0;
	for (; i < PRIMECOUNT; ++i)
	{
		if (primeList[i] > prime)
			return primeList[i];
	}

	return primeList[i];//PRIMECOUNT-1
}

HashBucket.hpp

#pragma once

//开散列：一个链表的集合---产生相同哈希地址的元素放到同一个链表中
#include <string>
#include <vector>
#include <assert.h>
#include "Common.hpp"
template<class T>
struct HashNode
{
	HashNode(const T&data = T())
		:_pNext(nullptr)
		, _data(data)
	{}
	HashNode<T>* _pNext;
	T _data;
};
template<class T>
struct DefD2INIT
{
	const T& operator()(const T& data)
	{
		return data;
	}
};
struct Str2INT
{
	size_t operator()(const string& s)
	{
		return (size_t)s.c_str();//可以学习博客中的字符串哈希算法
	}
};
template<class T, class K, class KOFV, class DTOINT = DefD2INIT<T>>
class hashbucket;
template<class T, class K, class KOFV,class DTOINT>
struct hashbucketIterator
{
	typedef HashNode<T> Node;
	typedef hashbucketIterator<T, K, KOFV, DTOINT> Self;
public:
	hashbucketIterator(Node* pNode = nullptr, hashbucket<T, K, KOFV, DTOINT>* ht=nullptr)
		:_pNode(pNode)
		, _pHt(ht)
	{}
	T& operator*()
	{
		return _pNode->_data;
	}
	T* operator->()
	{
		return  &(operator*());
	}
	Self& operator++()
	{
		Next();
		return *this;
	}
	Self operator++(int)
	{
		Self temp(*this);
		Next();
		return temp;
	}
	bool operator==(const Self& s)const
	{
		return _pNode == s._pNode;
	}
	bool operator!=(const Self& s)const
	{
		return _pNode != s._pNode;
	}
	void Next()
	{
		if (_pNode->_pNext)
			_pNode = _pNode->_pNext;
		else
		{
			//获取当前桶号
			size_t bucketNo = _pHt->HashFunc(KOFV()(_pNode->_data)) + 1;
			//依次往后获取非空桶
			for (; bucketNo < _pHt->_table.capacity(); ++bucketNo)
			{
				if (_pHt->_table[bucketNo])
				{
					_pNode = _pHt->_table[bucketNo];
					return;
				}
			}
			_pNode = nullptr;
		}
	}
	Node* _pNode;
	hashbucket<T, K, KOFV, DTOINT>* _pHt;
};


//T代表节点中的元素
//unordered_map:键值对
//unordered_set:key
template<class T, class K, class KOFV, class DTOINT>
class hashbucket
{
	friend hashbucketIterator<T, K, KOFV, DTOINT>;
	typedef HashNode<T> Node;
	typedef hashbucket<T, K, KOFV, DTOINT> Self;
public:
	typedef hashbucketIterator<T, K, KOFV, DTOINT> iterator;
public:
	hashbucket(size_t capacity = 10)
		:_table(GetNextPrime(capacity))
		, _size(0)
	{}
	~hashbucket()
	{
		Clear();
	}
	iterator begin()
	{
		//找第一个非空桶
		for (size_t i = 0; i < _table.capacity(); ++i)
		{
			if (_table[i])
				return iterator(_table[i], this);
		}
		return end();
	}
	iterator end()
	{
		return iterator(nullptr, this);
	}
	pair<iterator, bool> Insert(const T& data)
	{
		_CheckCapacity();
		//通过哈希函数计算哈希桶号
		size_t bucketNo = HashFunc(KOFV()(data));
		Node* pCur = _table[bucketNo];
		while (pCur)
		{
			if (data == pCur->_data)
				return make_pair(iterator(pCur,this), false);
			pCur = pCur->_pNext;
		}
		pCur = new Node(data);
		pCur->_pNext = _table[bucketNo];
		_table[bucketNo] = pCur;
		_size++;
		return make_pair(iterator(pCur,this), true);
	}
	size_t Erase(const K& key)
	{
		size_t bucketNo = HashFunc(key);
		Node* pCur = _table[bucketNo];
		Node* pPre = nullptr;
		while (pCur)
		{
			if (key == KOFV()(pCur->_data))
			{
				//可以删除
				if (pCur == _table[bucketNo])
					_table[bucketNo] = pCur->_pNext;
				else
					pPre->_pNext = pCur->_pNext;
				delete pCur;
				--_size;
				return 1;
			}
			else
			{
				pPre = pCur;
				pCur = pCur->_pNext;
			}
		}
		return 0;
	}
	iterator Find(const K& key)
	{
		size_t bucketNo = HashFunc(key);
		Node* pCur = _table[bucketNo];
		while (pCur)
		{
			if (KOFV()(pCur->_data) == key)
				return iterator(pCur, this);
			pCur = pCur->_pNext;
		}
		return end();
	}
	size_t Size()const
	{
		return _size;
	}
	bool Empty()const
	{
		return 0 == _size;
	}
	void Clear()
	{
		for (size_t bucketNo = 0; bucketNo < _table.capacity(); bucketNo++)
		{
			Node* pCur = _table[bucketNo];
			while (pCur)
			{
				_table[bucketNo] = pCur->_pNext;
				delete pCur;
				pCur = _table[bucketNo];
			}
		}
		_size = 0;
	}
	void Swap(Self& ht)
	{
		_table.swap(ht._table);
		swap(_size, ht._size);
	}
	void PrintfHashBucket()
	{
		for (size_t i = 0; i < _table.capacity(); ++i)
		{
			Node* pCur = _table[i];
			cout << "table[" << i << "]:";
			while (pCur)
			{
				cout << pCur->_data << "--->";
				pCur = pCur->_pNext;
			}
			cout << "NULL" << endl;
		}
	}
	size_t BucketCount()const
	{
		return _table.capacity();
	}
	size_t BucketSize(size_t bucketNo)const
	{
		assert(bucketNo < BucketCount());
		Node* pCur = _table[bucketNo];
		size_t szcount = 0;
		while (pCur)
		{
			szcount++;
			pCur = pCur->_pNext;
		}
		return szcount;
	}
	size_t Bucket(const K& key)
	{
		return HashFunc(key);
	}
private:
	size_t HashFunc(const K& key)
	{
		//T：可以是任意类型，可能不是整型，string怎么办？
		return DTOINT()(key) % _table.capacity(); //怎么保证每次扩容是两次关系且（%的是素数），有待百度
	}
	void _CheckCapacity()
	{
		if (_size == _table.capacity())
		{
			Self newHT(GetNextPrime(_table.capacity()));
			//将旧哈希桶中的节点往新哈希桶中搬移
			for (size_t i = 0; i < _table.capacity(); ++i)
			{
				Node* pCur = _table[i];
				//将i号桶中的所有节点搬移到新哈希桶中
				while (pCur)
				{
					//将pCur节点从_table的第i号桶移除掉
					_table[i] = pCur->_pNext;
					//将pCur节点插入到新哈希桶中
					size_t bucketNo = newHT.HashFunc(KOFV()(pCur->_data));
					//采用头插法
					pCur->_pNext = newHT._table[bucketNo];
					newHT._table[bucketNo] = pCur;
					newHT._size++;
					_size--;
					pCur = _table[i];
				}
			}
			this->Swap(newHT);
		}
	}
private:
	vector<Node*> _table;//存每一个链表的首地址
	size_t _size;
};

C++11 unordered_系列

1. unordered_map

<key,value>—>key必须唯一

1.1代码实现

unordered_map.hpp

#pragma once

#include "HashBucket.hpp"
namespace bite
{
	template<class K,class V,class KTOINT=DefD2INIT<K>>
	class unordered_map
	{
		typedef pair<K, V> ValueType;
		struct KOFV
		{
			const K& operator()(const ValueType& data)
			{
				return data.first;
			}
		};
		typedef hashbucket<ValueType,K, KOFV, KTOINT> HT;
		typename typedef HT::iterator iterator;
	public:
		unordered_map()
			:_ht()
		{}
		//iterator
		iterator begin()
		{
			return _ht.begin();
		}
		iterator end()
		{
			return _ht.end();
		}
		//capacity
		bool empty()const
		{
			return _ht.Empty();
		}     
		size_t size()const
		{
			return _ht.Size();
		}
		//access
		V& operator[](const K& key)
		{
			return (*_ht.Insert(make_pair(key,V())).first).second;
		}
		//Modifiers
		pair<iterator, bool> insert(const ValueType& data)
		{
			return _ht.Insert(data);
 		}
		size_t erase(const K& key)
		{
			return _ht.Erase(key);
		}
		void clear()
		{
			_ht.Clear();
		}
		void swap(unordered_map<K,V,KTOINT>& m)
		{
			_ht.Swap(m._ht);
		}
		size_t bucket_count()const
		{
			return _ht.BucketCount();
		}
		size_t bucket_size(size_t bucketNo)
		{
			return _ht.BucketSize(bucketNo);
		}
		size_t bucket(const K& key)
		{
			return _ht.Bucket(key); 
		}
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
	private:
		HT _ht;
	};
}
void  TestUnorderedMap()//测试
{
 	bite::unordered_map<int, int> m;
 	m.insert(make_pair(1, 1));
	m.insert(make_pair(10, 10));
	m.insert(make_pair(21, 21));
	cout << m.size() << endl;
 	for (auto e:m)
 		cout << e.first << "--->" << e.second << endl;
 	cout << endl;
	m.erase(10);
	if (m.find(10) == m.end())
		cout << "10 erase is ok" << endl;
	else
		cout << "10 erase if no" << endl;
	m.clear();
	cout << m.size() << endl;
}

1.2map和unordered_map的区别

mao中key—>关于key有序的序列
查找的时间复杂度：map—>O(logN) unordered_mao：O(1)
应用场景：
底层结构：map—>红黑树 unordered_map：哈希桶
存储方式不同：

根据key的hash值判断元素是否相同，即unordered_map内部元素是无序的，
而map中的元素是按照二叉搜索树存储，进行中序遍历会得到有序遍历。
6.插入方式不同、查找方式不同、删除方式不同、接口不同

1.3优缺点以及适用处

map

优点：
有序性，这是map结构最大的优点，其元素的有序性在很多应用中都会简化很多的操作
红黑树，内部实现一个红黑书使得map的很多操作在lgnlgn的时间复杂度下就可以实现，因此效率非常的高
缺点：
空间占用率高，因为map内部实现了红黑树，虽然提高了运行效率，但是因为每一个节点都需要额外保存父节点，孩子节点以及红/黑性质，使得每一个节点都占用大量的空间
适用处，对于那些有顺序要求的问题，用map会更高效一些

unordered_map

优点：
因为内部实现了哈希表，因此其查找速度非常的快
缺点：
哈希表的建立比较耗费时间
适用处，对于查找问题，unordered_map会更加高效一些，因此遇到查找问题，常会考虑一下用unordered_map

2. unordered_set

key—>key必须唯一

2.1代码实现

unordered_set

#pragma once
#include "HashBucket.hpp"
namespace bite
{
	template<class K, class KTOINT = DefD2INIT<K>>
	class unordered_set
	{
		typedef K ValueType;
		struct KOFV
		{
			const K& operator()(const ValueType& data)
			{
				return data;
			}
		};
		typedef hashbucket<ValueType, K, KOFV, KTOINT> HT;
		typename typedef HT::iterator iterator;
	public:
		unordered_set()
			:_ht()
		{}
		//iterator
		iterator begin()
		{
			return _ht.begin();
		}
		iterator end()
		{
			return _ht.end();
		}
		//capacity
		bool empty()
		{
			return _ht.Empty();
		}
		size_t size()const
		{
			return _ht.Size();
		}
	
		//Modifiers
		pair<iterator, bool> insert(const ValueType& data)
		{
			return _ht.Insert(data);
		}
		size_t erase(const K& key)
		{
			return _ht.Erase(key);
		}
		void clear()
		{
			_ht.Clear();
		}
		void swap(unordered_set<K, KTOINT>& m)
		{
			_ht.Swap(m._ht);
		}
		size_t bucket_count()const
		{
			return _ht.BucketCount();
		}
		size_t bucket_size(size_t bucketNo)
		{
			return _ht.BucketSize(bucketNo);
		}
		size_t bucket(const K& key)
		{
			return _ht.Bucket(key);
		}
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
	private:
		HT _ht;
	};
}
void  TestUnorderedSet()//测试
{
	bite::unordered_set<int> m;
	m.insert(1);
	m.insert(10);
	m.insert(21);
	cout << m.size() << endl;
	for (auto e : m)
	cout << e<< endl;
 	cout << endl;
	m.erase(10);
	if (m.find(10) == m.end())
		cout << "10 erase is ok" << endl;
	else
		cout << "10 erase if no" << endl;
	m.clear();
	cout << m.size() << endl;
}