unorder_map的底层实现方法

最新推荐文章于 2024-09-20 14:47:16 发布

独L无二

最新推荐文章于 2024-09-20 14:47:16 发布

阅读量1.5k

点赞数 1

分类专栏： STL 面试

原文链接：https://blog.csdn.net/WX_East/article/details/56005664?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159857804919724842942300%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=159857804919724842942300&biz_id=0&u

版权

面试同时被 2 个专栏收录

49 篇文章 1 订阅

订阅专栏

STL

12 篇文章 1 订阅

订阅专栏

哈希表

哈希表是一个K_V的结构，又被称为是散列表！

它是根据关键字(key)来直接访问这个内存存储数据位置的数据结构；

在构建哈希表的方法，我们这里主要介绍两种方法：

1、直接地址法；

就是直接取key(或者是某种线性函数)作为这个下表来存储数据，key在这里称作是散列地址。

可以理解成这个样子 Hash（Key）= Key 或Hash（Key）= A*Key + B，A、B为常数。

2、除留余数法；

这里方法的说法是：

就是将 key 被某个不大于散列表长m的数p除后的所得的余数为散列地址。

Hash（Key）= Key%P；

但是这两种方法都有很大的缺陷：

比如说：第一种方法来说吧！

如果数据量大的话，那么要建立的哈希表的长度就非常的了，但是也许中间的很多的位置都没有存取数据，这样就会造成很多的浪费。所以这种方法我们经常不是很常用。

如果说是，使用第二种方法的话，也是存在问题的。

如果要是不同的key值通过除留余数法得到的哈希地址是相同的，在这里就会造成我们经常所说的哈希冲突（也就做是哈希碰撞 ）这种东西。

虽然这种除留余数法有这种缺陷，但是我们还是常常使用这种方法。所以我们想了许多的方法来解决这种哈希冲突！

哈希冲突(哈希碰撞)

解决方法:

1、开放地址法

开放地址法，有被称为是闭散列法。

(1)线性探测法：

线性探测的特点就是如果得到的哈希地址冲突(该位置上已存储数据)的话，我们就是将这个数据插到下一个位置，要是下个位置也已存储数据，就继续到下一个直到找到正确的可以插入的数据。

#pragma  once 
#include <assert.h>
#include<vector>
#include<assert.h>
#include<string>
enum Status//表示每个节点的状态
{
	EXIST,
	DELETE,
	EMPTY
};
template<class K,class V>
struct HashTableNode
{
	K  _key;
	V _value;
	Status _status;
	HashTableNode(const K& key = K(),const  V& value = V())
		:_key(key)
		,_value(value)
		,_status(EMPTY)
	{}
};
 
template<class K>
struct __HashFunc
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
 
template<>
struct __HashFunc<string>
{	
	size_t BKDRHash(const char* str)
	{
		register size_t  hash =  0;
		while(*str)
		{
			hash = hash*131 +*str;
			++str;
		}
		return hash;
	}
	size_t operator()(const string & key)
	{
		return BKDRHash(key.c_str());
	}
};
 
 
 
template<class K,class V,class HashFunc = __HashFunc<K>>
class HashTable
{
	typedef  HashTableNode<K,V> Node;
public:
	HashTable(size_t n)
		:_size(0)
	{
		assert(n>0);
		_table.resize(n);
	}
	~HashTable()
	{}
	//插入函数
	pair<Node* , bool>  Insert(const K& key,const  V& value =  V())
	{
		_CheckCapacity();
		Node * node = Find(key);//先查找
		if(node)//如果找到的话就返回插入失败
		{
			return make_pair(node,false);
		}
		size_t idex = _HashFunc(key);//得到哈希地址
		while(_table[idex]._status == EXIST)
		{
			idex++;//找到合适的位置
			if(idex==_table.size())//如果到结尾 
			{
				idex = 0 ;//从头重新来过 
			}
		}//直到找到合适的位置
		_table[idex]._key = key;
		_table[idex]._value  =value//插入数据;
		_table[idex]._status =  EXIST;
		++_size;
		return  make_pair(&_table[idex],true);
	}
	//删除函数 
	bool Remove(const K& key)
	{
		Node*  node  = Find(key);//先查找数据 
		if(node)//找到的话 
		{
			node->_status =  DELETE;//将该位置的状态改为删除后的状态 
			--_size;//存入数据的个数--
			return true;
		}
		return false;
	}
	//查找函数
	Node * Find(const  K& key)
	{
		size_t  idex=  _HashFunc(key);
		while(_table[idex]._status!= EMPTY)
		{
			if(_table[idex]._key == key)
				return  &_table[idex];
 
			++idex;
			if(idex == _table.size())
				idex=  0 ;
		}
		return NULL;
	}
private:
	//检查容量 
	void _CheckCapacity()
	{
		if(_table.size() == 0 )
		{
			_table.resize(7);
			return ;
		}
		//如果负载因子大于 7的话，，就对 该哈希表进行扩容
		if (_size*10 / _table.size() >= 7)
		{
			size_t newsize = _GetNextprime(_table.size());
			HashTable<K,V,HashFunc> tem(newsize);
			for(size_t i = 0 ;i< _table.size();++i)
			{
				if(_table[i]._status == EXIST)
					tem.Insert(_table[i]._key,_table[i]._value);
			}
			_Swap(tem);
		}
	}
	void _Swap(HashTable<K,V,HashFunc>& tem)
	{
		_table.swap(tem._table);
		std::swap(_size,tem._size);
	}
	//得到下个素数，，，，，一般情况下，哈希表的长度都是素数 ，，，
	size_t  _GetNextprime(size_t n)
	{
		const int _PrimeSize= 28;//下面为一个素数表 
		static const unsigned long _PrimeList[_PrimeSize] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul,
			786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul,
			25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul,
			805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};
		for(size_t i = 0 ;i< _PrimeSize;++i)
		{
			if(n < _PrimeList[i])
				return _PrimeList[i];//找到要找的下一个素数 
		}
		assert(false);
	}
	//得到哈希地址 
	size_t  _HashFunc(const K& key)
	{
		HashFunc hf;
		size_t ret = hf(key);
		return ret % _table.size();
	}
private:
	vector<Node> _table;//用一个vector容器来存储 
	size_t _size;//存储数据的个数 
};
void TestHashTable()
{
	HashTable<int,int> ht(10);
	ht.Insert(89);
	ht.Insert(18);
	ht.Insert(49);
	ht.Insert(58);
	ht.Insert(9);
	ht.Insert(1);
	ht.Insert(3);
	ht.Insert(2);
	ht.Insert(4);
	ht.Insert(5);
	ht.Insert(6);
	ht.Insert(7);
	ht.Insert(8);
	ht.Insert(9);
	ht.Insert(10);
	ht.Remove(1);
	HashTable<string,string,__HashFunc<string>> ht1(1);
	ht1.Insert("左边","left");
 
	ht1.Insert("左边","left");
}

除了这种方法之后，，我们还给了一种方法：：：

(2)二次探索法

所谓的二次探索法，，，也就是说的是，，，

当遇到冲突后，下次所找到的位置为当前的位置加上 n的二次方

解决哈希冲突的方法除了这种闭散列法之外，还有一种开链法；

开链法

所谓的开链法，就是说将产生哈希冲撞的数据全部链到当前位置的下面，

看上去就像是在下面链上了一个一个的桶子，因此这样的方法我们有称作是哈希桶

实现的代码：

template<class K>
struct __HashFunc
{
    size_t operator()(const K& key)
    {
        return key;
    }
};
template<>
struct __HashFunc<string>
{
    size_t BKDRHash(const char* str)
    {
        register size_t  hash =  0;
        while(*str)
        {
            hash = hash*131 +*str;
            ++str;
        }
        return hash;
    }
    size_t operator()(const string & key)
    {
        return BKDRHash(key.c_str());
    }
};

template<class K,class V>
struct HashTableNode
{
    pair<K,V> _kv;//存一个pair结构 
    HashTableNode<K,V> * _next;
    HashTableNode(const pair<K,V>& kv)
            :_kv(kv)
            ,_next(NULL)
    {}
};
//实现迭代器
template<class K,class V,class Ref,class Ptr>
struct HashTableIterator
{
    typedef HashTableNode<K,V> Node;
    typedef HashTableIterator<K,V,Ref,Ptr> Self;
    Node* _node;
    HashTable<K,V> * _ht;
    HashTableIterator(Node* node,HashTable<K,V> * ht)
            :_node(node)
            ,_ht(ht)
    {}
    Self& operator++()
    {
        _node =_Next();
        return *this;
    }
    Self operator++(int)
    {
        Node* cur =_node;
        _node =_Next();
        return cur;
    }
    Self& operator--()
    {
        _node = _Prev();
        return *this;
    }
    Self operator--(int)
    {
        Node* cur =_node;
        _node = _Prev();
        return cur;
    }
    Ptr operator->()
    {
        return  &_node->_kv;
    }
    Ref & operator*()
    {
        return  _node->_kv;
    }
    Node* _Next()
    {
        if(_node->_next)
        {
            return _node->_next;
        }
        size_t idex = _ht->_HashFunc((_node->_kv).first)+1;
        for(;idex < _ht->_tables.size();++idex)
        {
            if(_ht->_tables[idex])
            {
                return _ht->_tables[idex];
            }
        }
        return NULL;
    }
    Node* _Prev()
    {

    }
    bool operator!=(const Self& tem)
    {
        return (_node!= tem._node);
    }
};
//HashFunc用来表示不同类型的key，，，求哈希地址
template<class K,class V,class HashFunc = __HashFunc<K>>
class HashTable
{
    typedef HashTableNode<K,V> Node;
public:
    typedef HashTableIterator<K,V,pair<K,V>&,pair<K,V>*> Iterator;
    typedef HashTableIterator<K,V,const pair<K,V>&,const pair<K,V>*> const_Iterator;
    friend struct Iterator;
    friend struct const_Iterator;


    HashTable()
            :_size(0)
    {}
    ~HashTable()
    {
        Destory();
        _tables.clear();
    }
    //清空函数
    void Clear()
    {
        Destory();
        _size =0 ;
    }
    //销毁函数 
    void Destory()
    {
        for(size_t i = 0 ;i< _tables.size();++i)
        {
            Node* cur = _tables[i];
            while(cur)
            {
                Node* del = cur;
                cur=cur->_next;
                delete del;
            }
            _tables[i] = NULL;
        }
    }
    HashTable(const size_t n )
            :_size(0)
    {
        assert(n>0);
        _tables.resize(n);

    }
    //插入函数
    pair<Iterator ,bool> Insert(const pair<K,V>& kv)
    {
        //检查容量
        _CheckCapacity();
        //先查找 
        Node * cur = Find(kv.first)._node;
        if(cur)//找到返回失败
        {
            return make_pair(Iterator(cur,this),false);//返回该节点的迭代器，，并且插入失败 
        }
        Node * node = new Node(kv);//新建节点 
        size_t idex = _HashFunc(kv.first);
        cur = _tables[idex];
        if(cur)
        {
            node->_next =  cur;
        }
        _tables[idex] = node;
        ++_size;
        return  make_pair(Iterator(node,this),true);//返回新增的节点的迭代器，，，插入成功
    }
    //查找函数
    Iterator Find(const K& key)
    {
        size_t idex = _HashFunc(key);//找到对应的哈希地址 
        Node* cur = _tables[idex];
        while(cur)
        {
            if(cur->_kv.first == key)//找到的话
                return Iterator(cur,this);//返回该位置的迭代器
            cur =cur ->_next;
        }
        return Iterator(NULL,this);//没知道的话，，返回一个为NULL的迭代器

    }
    bool Erase(const K& key)
    {
        Node * cur =Find(key)._node;
        if(cur)
        {
            size_t idex = _HashFunc(key);
            if(_tables[idex] == cur)
            {

                delete cur ;
                _tables[idex] = NULL;
            }
            else
            {
                Node* parent = _tables[idex];
                while(parent->_next != cur)
                {
                    parent->_next = cur->_next;
                    delete cur;
                }
            }
            --_size;
            return true;
        }
        return false;
    }
    Iterator  Begin()
    {
        for(size_t i =0 ;i< _tables.size();++i)
        {
            Node* cur = _tables[i];
            if(cur)
            {
                return  Iterator(cur,this);
            }
        }
        return  Iterator(NULL,this);
    }
    const_Iterator  Begin()const
    {
        for(size_t i =0 ;i< _tables.size();++i)
        {
            Node* cur = _tables[i];
            if(cur)
            {
                return  Iterator(cur,this);
            }
        }
        return  Iterator(NULL,this);
    }
    const_Iterator End()const
    {
        return  Iterator(NULL,this);
    }
    Iterator End()
    {
        return  Iterator(NULL,this);
    }
    //使用迭代器输出哈希表内的数据 
    void  Print()
    {
        Iterator it =Begin();
        while(it!= End())
        {
            cout<<it->first<<" ,"<<it->second<<endl;
            ++it;
        }
        cout<<endl;
    }
protected:
    //获取下一个素数 
    size_t _GetNextPrime(const  size_t n)
    {
        const int _PrimeSize= 28;//编译器为我们提供的素数表的长度
        //素数表
        static const unsigned long _PrimeList[_PrimeSize] =
                {
                        53ul, 97ul, 193ul, 389ul, 769ul,
                        1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
                        49157ul, 98317ul, 196613ul, 393241ul,
                        786433ul,
                        1572869ul, 3145739ul, 6291469ul, 12582917ul,
                        25165843ul,
                        50331653ul, 100663319ul, 201326611ul, 402653189ul,
                        805306457ul,
                        1610612741ul, 3221225473ul, 4294967291ul
                };
        for(size_t i = 0 ;i < _PrimeSize;++i)
        {
            if(_PrimeList[i] > n)//找到下一个素数
            {
                return  _PrimeList[i];
            }
        }
        return _PrimeList[_PrimeSize-1];//如果要是已经是最大了，，就直接返回最后的值

    }
    //检查容量 ，，，判断是否需要来扩容 
    void _CheckCapacity()
    {
        if(_tables.size() == _size)//如果负载因子为1的话，就要进行 扩容
        {
            size_t newsize = _GetNextPrime(_tables.size());//得到下一步 扩容 后的哈希表的长度
            HashTable<K,V,HashFunc> tem(newsize);//生成一个新长度的哈希表
            for(size_t i = 0 ;i < _tables.size();++i)//将原哈希表内的数据全部，，，插入到生成的这个对象中 
            {
                Node* cur = _tables[i];
                while(cur)
                {
                    tem.Insert(cur->_kv);
                    cur = cur->_next;
                }
            }
            _Swap(tem);//插入结束后 ，，，我们就将原哈希表的内容与生成的新对象内容 交换  
        }
    }
    //用来交换两个HashTable的对象内容  
    void _Swap(HashTable<K,V>& tem)
    {
        _tables.swap(tem._tables);
        std::swap(_size,tem._size);
    }
    //获得
    size_t _HashFunc(const K& key)
    {
        HashFunc hf;
        size_t ret = hf(key);
        return ret % _tables.size();
    }
protected:
    vector<Node*> _tables;//哈希表的内部不存数据，存储的是所链节点的第一个指针
    size_t _size;//存储节点的数量 
};

关于哈希表的知识就大概这么多了；

那么我们下面就来说说 unorder_map这个东西，在系统库中的unorder_map这个容器就是使用的这个哈希表来实现的，，，并且所谓的哈希桶来实现的。

下面就来实现一下吧！

template<class K,class V,class HashFunc = __HashFunc<K>>
	class UnorderMap
	{
		typedef HashTable<K,V,HashFunc> Hash;
	public:
		typedef   typename Hash::Iterator  Iterator;
		typedef	  typename Hash::const_Iterator  const_Iterator;
		UnorderMap()
		{}
		//插入函数 
		pair<Iterator,bool> Insert(const pair<K,V> & kv)
		{
			return _ht.Insert(kv);//直接调用 hash表的函数
		}
		bool Erase(const K& key)
		{
			return  _ht.Erase(key);//调用 删除函数 
		}
		Iterator  Find(const  K& key)
		{
			return _ht.Find(key);//调用   查找函数 
		}
		Iterator Begin()
		{
			return  _ht.Begin();//直接调用  
		}
		const_Iterator Begin()const 
		{
			return  _ht.Begin();
		}
		Iterator End()
		{
			return _ht.End();
		}
		const_Iterator End()const 
		{
			return _ht.End();
		}
		void Print()
		{
			_ht.Print();
		}
	protected:
		Hash _ht;
	};