哈希表（闭散列、拉链法--哈希桶）

最新推荐文章于 2022-03-07 12:34:24 发布

-Thinker

最新推荐文章于 2022-03-07 12:34:24 发布

阅读量4.9k

点赞数 4

分类专栏：算法文章标签：数据结构散列函数 hash

算法专栏收录该内容

15 篇文章 0 订阅

订阅专栏

哈希表，也称散列表，是一种通过key值来直接访问在内存中的存储的数据结构。它通过一个关键值的函数（被称为散列函数）将所需的数据映射到表中的位置来访问数据。

关于哈希表，主要为以下几个方面：

一、哈希表的几种方法

1、直接定址法：取关键字key的某个线性函数为散列地址，如Hash(key) = key 或 Hash(key) = A*key+B；A,B为常数

2、除留取余法：关键值除以比散列表长度小的素数所得的余数作为散列地址。Hash(key) = key % p;

3、平均取中法：先计算构成关键码的标识符的内码的平方，然后按照散列表的大小取中间的若干位作为散列地址。

4、折叠法：把关键码自左到右分为位数相等的几部分，每一部分的位数应与散列表地址位数相同，只有最后一部分的位数可以短一些。把这些部分的数据叠加起来，就可以得到具有关键码的记录的散列地址。分为移位法和分界法。

5、随机数法：选择一个随机函数，取关键字的随机函数作为它的哈希地址。

6、数学分析法：设有N个d位数，每一位可能有r种不同的符号。这r种不同的符号在各位上出现的频率不一定相同，可能在某些位上分布均匀些，每种符号出现的机会均等；在某些位上分布不均匀，只有某几种符号经常出现。可根据散列表的大小，选取其中各种符号分布均匀的若干位作为散列地址。

在这里，我们建哈希表的方法用除留取余法为例。

尽管有这么多种方法，但是不同的key值可能会映射到同一散列地址上。这样就会造成哈希冲突/哈希碰撞。

那么遇到哈希冲突我们该如何处理呢？

二、处理哈希冲突的闭散列方法

1、线性探测：当不同的key值通过哈希函数映射到同一散列地址上时，检测当前地址的下一个地址是否可以插入，如果可以的话，就存在当前位置的下一个地址，否则，继续向下一个地址寻找，地址++。

2、二次探测：是针对线性探测的一个改进，线性探测后插入的key值太集中，这样造成key值通过散列函数后还是无法正确的映射到地址上，太集中也会造成查找、删除时的效率低下。因此，通过二次探测的方法，取当前地址加上i^2，可以取到的新的地址就会稍微分散开。

如：此时的散列表长度为10：

看到以上的例子之后，我们只是存入int型的数据时，计算余数寻找地址是比较容易的。如果我们存入的是字典值（string类型），那么我们就要通过string类型来转化成数值来计算地址。这里用到了BKDR哈希算法（字符串哈希算法）。

同时，经研究表明，通过素数表作为哈希表的长度可以降低哈希冲突。

三、闭散列实现代码

主要实现如下：

[cpp]view plain copy 
   
 
 #pragma once  
 #include<vector>  
 #include<iostream>  
 using namespace std;  
 #include<assert.h>  
 #include<string>  
   
 enum Status  
 {  
     EXIST,  
     DELETE,  
     EMPTY  
 };  
   
 template<class K,class V>  
 struct HashTableNode  
 {  
     K _key;  
     V _value;  
     Status _status;  
     HashTableNode(const K& key = K(), const V& value = V())  
         :_key(key)  
         ,_value(value)  
         ,_status(EMPTY)  
     {}  
 };  
   
 template<class K>  
 struct __HashFunc  
 {  
     size_t operator()(const K& key)  
     {  
         return key;  
     }  
 };  
   
 template<>  
 struct __HashFunc<string>  
 {  
     size_t BKDRHash(const char* str)  
     {  
         register size_t hash = 0;  
         while(*str)  
         {  
             hash = hash*131 + *str;  
             ++str;  
         }  
         return hash;  
     }  
     size_t operator()(const string& str)  
     {  
         return BKDRHash(str.c_str());  
     }  
 };  
 template<class K,class V,class _HashFunc = __HashFunc<K>>  
 class HashTable  
 {  
     typedef HashTableNode<K,V> Node;  
 public:  
     HashTable(size_t size)  
         :_size(0)  
     {  
         assert(size > 0);  
     //  _tables.resize(size);  
         _tables.resize(GetPrime());  
       
     }  
   
     ~HashTable()  
     {}  
     size_t HashFunc(const K& key)  
     {  
         _HashFunc hf;  
         size_t va = hf(key);  
         return va % _tables.size();  
     }  
   
     void Swap(HashTable<K,V,_HashFunc>& ht)  
     {  
         _tables.swap(ht._tables);  
         swap(_size,ht._size);  
     }  
   
     void _CheckCapacity()  
     {  
         if(_tables.size() == 0 || _size*10 / _tables.size() >= 7)  
         {  
             size_t OldSize = _tables.size();  
         //  size_t NewSize = _tables.size()*2+3;  
             size_t NewSize = GetPrime();  
             HashTable<K,V,_HashFunc> ht(NewSize);  
             for(size_t i = 0; i < OldSize; i++)  
             {  
                 if(_tables[i]._status == EXIST)  
                 {  
                     ht.Insert(_tables[i]._key,_tables[i]._value);  
                 }  
             }  
             this->Swap(ht);  
         }  
     }  
   
     pair<Node*,bool> Insert(const K& key,const V& value)  
     {  
         _CheckCapacity();  
         size_t index = HashFunc(key);  
         //线性探测  
         /*while(_tables[index]._status ==  EXIST ) 
         { 
             if(_tables[index]._key == key) 
                 return make_pair((Node*)NULL,false); 
             ++index; 
  
             if(index == _tables.size()) 
             { 
                 index = 0; 
             } 
         } 
         */  
         //二次探测  
         size_t i = 0;  
         size_t first = index;  
         while(_tables[index]._status ==  EXIST )  
         {  
             if(_tables[index]._key == key)  
                 return make_pair((Node*)NULL,false);  
             ++i;  
             index = first + i*i;  
             index %= _tables.size();  
         }  
         ++_size;  
         _tables[index]._key = key;  
         _tables[index]._value = value;  
         _tables[index]._status = EXIST;  
         return make_pair(&_tables[index],true);  
     }  
   
     Node* Find(const K& key,const V& value)  
     {  
         size_t index = HashFunc(key);  
         while(_tables[index]._status != EMPTY)  
         {  
             if(_tables[index]._key == key && _tables[index]._status == EXIST)  
             {  
                 return &_tables[index];  
             }  
             else  
             {  
                 ++index;  
                 if(index == _tables.size())  
                 {  
                     index = 0;  
                 }  
             }  
         }  
         return NULL;  
     }  
   
     bool Remove(const K& key,const V& value)  
     {  
         Node* tmp = Find(key,value);  
         if(tmp)  
         {  
             tmp->_status = DELETE;  
             return true;  
         }  
         return false;  
     }  
     size_t GetPrime()  
     {  
         // 使用素数表对齐做哈希表的容量，降低哈希冲突  
         const int _PrimeSize = 28;  
         static const unsigned long _PrimeList[_PrimeSize] =  
         {  
             53ul, 97ul, 193ul, 389ul, 769ul,  
             1543ul, 3079ul, 6151ul, 12289ul,   
             24593ul,49157ul, 98317ul, 196613ul,   
             393241ul,786433ul,1572869ul, 3145739ul,  
             6291469ul, 12582917ul,25165843ul,50331653ul,   
             100663319ul, 201326611ul, 402653189ul,  
             805306457ul,1610612741ul, 3221225473ul, 4294967291ul  
         };  
         for(size_t i = 0; i < _PrimeSize; i++)  
         {  
             if(_tables.size() < _PrimeList[i])  
                 return _PrimeList[i];  
         }  
         return 0;  
     }  
 private:  
     vector<Node> _tables;  
     size_t _size;  
 };  
   
 void HashTest()  
 {  
     HashTable<int,int> ht(10);  
     ht.Insert(89,0);  
     ht.Insert(18,0);  
     ht.Insert(49,0);  
     ht.Insert(58,0);  
     ht.Insert(9,0);  
       
     cout<<ht.Remove(58,0)<<endl;  
     if(ht.Find(49,0))  
         cout<<ht.Find(9,0)->_key<<endl;  
   
     HashTable<string,string> ht1(10);  
     ht1.Insert("sort","排序");  
     ht1.Insert("left","左边");  
     ht1.Insert("right","右边");  
     ht1.Insert("up","上边");  
     if(ht1.Find("sort","排序"))  
         cout<<ht1.Find("sort","排序")->_key<<endl;  
     cout<<ht1.Remove("sort","排序")<<endl;  
 }  

四、处理哈希冲突的开链法（哈希桶）

当用线性探测和二次探测时，总是在一个有限的哈希表中存储数据，当数据特别多时，效率就比较低。因此采用拉链法的方式来降低哈希冲突。

还有一种情况是，当一个链上链的数据过多时，我们可以采用红黑树的方式来降低高度，保持平衡且不至于过载。

五、哈希桶的实现方式（考虑到存储整形和字符串型）

[cpp]view plain copy 
   
 
 #pragma once  
 #include<iostream>  
 using namespace std;  
 #include<vector>  
 #include<string>  
   
 namespace HashBucket  
 {  
     template<class K>  
     struct __HashFunc  
     {  
         size_t operator()(const K& key)  
         {  
             return key;  
         }  
     };  
   
     template<>  
     struct __HashFunc<string>  
     {  
         size_t BKDRHash(const char* str)  
         {  
             register size_t hash = 0;  
             while(*str)  
             {  
                 hash = hash*131 + *str;  
                 ++str;  
             }  
             return hash;  
         }  
         size_t operator()(const string& str)  
         {  
             return BKDRHash(str.c_str());  
         }  
     };  
   
     template<class K,class V,class _HashFunc>  
     class HashTable;  
   
     template<class K,class V>  
     struct HashNode  
     {  
         pair<K,V> _kv;  
         HashNode<K,V> *_next;  
   
         HashNode(const pair<K,V>& kv)  
             :_kv(kv)  
             ,_next(NULL)  
         {}  
     };  
     template<class K,class V,class Ref,class Ptr>  
     struct HashIterator  
     {  
         typedef HashNode<K,V> Node;  
         typedef HashIterator<K,V,Ref,Ptr> Self;  
         Node* _node;  
         HashTable<K,V,__HashFunc<K>>* _ht;  
   
         HashIterator(Node* node,HashTable<K,V,__HashFunc<K>>* ht)  
             :_node(node)  
             ,_ht(ht)  
         {}  
   
         Ref operator*()  
         {  
             return _node->_kv;  
         }  
   
         Ptr operator->()  
         {  
             return &_node;  
         }  
   
         bool operator!=(const Self& s) const  
         {  
             return _node != s._node;  
         }  
   
         Self& operator++()  
         {  
             _node = Next(_node);  
             return *this;  
         }  
   
         Node* Next(Node* node)  
         {  
             Node* next = node->_next;  
             if(next)  
                 return next;  
             else  
             {  
                 size_t index = _ht->HashFunc(node->_kv.first)+1;  
                 for(;index < _ht->_tables.size();++index)  
                 {  
                     next = _ht->_tables[index];  
                     if(next)  
                     {  
                         return next;  
                     }  
                 }  
                 return NULL;  
             }  
         }  
     };  
   
     template<class K,class V,class _HashFunc = __HashFunc<K>>  
     class HashTable  
     {  
         typedef HashNode<K,V> Node;  
     public:  
         typedef HashIterator<K,V,pair<K,V>&,pair<K,V>*> Iterator;  
         typedef HashIterator<K,V,const pair<K,V>&,const pair<K,V>*> ConstIterator;  
         friend struct Iterator;  
         friend struct ConstIterator;  
     public:  
         HashTable()  
         :_size(0)  
         {  
             _tables.resize(GetNextPrime());  
         }  
   
         ~HashTable()  
         {  
             Clear();  
         }  
   
         void Clear()  
         {  
             Node* cur = NULL;  
             Node* del = NULL;  
             for(size_t index = 0; index < _tables.size(); ++index)  
             {  
                 cur = _tables[index];  
                 if(cur == NULL)  
                 {  
                     continue;  
                 }  
                 while(cur)  
                 {  
                     del = cur;  
                     cur = cur->_next;  
                     delete del;  
                     del = NULL;  
                 }  
             }  
   
         }  
   
         Iterator Begin()  
         {  
             Node* cur = _tables[0];  
             for(size_t index = 0; index < _tables.size();++index)  
             {  
                 cur = _tables[index];  
                 if(cur)  
                 {  
                     return Iterator(cur,this);  
                 }  
             }  
             return Iterator((Node*)NULL,this);  
         }  
   
         Iterator End()  
         {  
             return Iterator((Node*)NULL,this);  
             /*Node* cur = _tables[_tables.size()-1]; 
             while(cur) 
             { 
                 if(cur == NULL) 
                 { 
                     return Iterator(cur,this); 
                 } 
                 cur = cur->_next; 
             } 
             return Iterator((Node*)NULL,this);*/  
         }  
   
         size_t HashFunc(const K& key)  
         {  
             _HashFunc hf;  
             size_t va = hf(key);  
             return va % _tables.size();  
         }  
   
         void Swap(HashTable<K,V,_HashFunc>& ht)  
         {  
             _tables.swap(ht._tables);  
             swap(_size,ht._size);  
         }  
   
         void _CheckCapacity()  
         {  
             //负载因子为1时，扩容  
             if(_size == _tables.size())  
             {  
                 size_t index = GetNextPrime();  
                 HashTable<K,V> tmp;  
                 tmp._tables.resize(index);  
                 Node* cur = NULL;  
                 for(;index < _tables.size();++index)  
                 {  
                     cur = _tables[index];  
                     while(cur)  
                     {  
                         tmp.Insert(cur->_kv);  
                         cur = cur->_next;  
                     }  
                 }  
                 this->Swap(tmp);  
             }  
         }  
   
         size_t GetNextPrime()  
         {  
             // 使用素数表对齐做哈希表的容量，降低哈希冲突  
             const int _PrimeSize = 28;   
             static const unsigned long _PrimeList[_PrimeSize] =  
             {  
                 53ul, 97ul, 193ul, 389ul, 769ul,  
                 1543ul, 3079ul, 6151ul, 12289ul,   
                 24593ul,49157ul, 98317ul, 196613ul,   
                 393241ul,786433ul,1572869ul, 3145739ul,  
                 6291469ul, 12582917ul,25165843ul,50331653ul,   
                 100663319ul, 201326611ul, 402653189ul,  
                 805306457ul,1610612741ul, 3221225473ul, 4294967291ul  
             };  
             for(size_t i = 0; i < _PrimeSize; i++)  
             {  
                 if(_tables.size() < _PrimeList[i])  
                     return _PrimeList[i];  
             }  
             return 0;  
         }  
     public:  
         pair<Iterator,bool> Insert(pair<K,V> kv)  
         {  
             _CheckCapacity();  
             size_t index = HashFunc(kv.first);  
             Node* cur = _tables[index];  
             while(cur)  
             {  
                 if(cur->_kv.first == kv.first)  
                 {  
                     return make_pair(Iterator(cur,this),false);  
                 }  
                 cur = cur->_next;  
             }  
             Node* tmp = new Node(kv);  
             tmp->_next = _tables[index];  
             _tables[index] = tmp;  
             _size++;  
             return make_pair(Iterator(tmp,this),true);  
         }  
   
         Node* Find(const K& key)  
         {  
             size_t index = HashFunc(key);  
             Node* cur = _tables[index];  
             while(cur)  
             {  
                 if(cur->_kv.first == key)  
                 {  
                     return cur;  
                 }  
                 cur = cur->_next;  
             }  
             return NULL;  
         }  
   
         bool Erase(const K& key)  
         {  
             size_t index = HashFunc(key);  
             Node* prev = NULL;  
             Node* cur = _tables[index];  
             Node* del = NULL;  
             while (cur)  
             {  
                 if(cur->_kv.first == key)  
                 {  
                     if(prev == NULL)  
                     {  
                         _tables[index] = cur->_next;  
                     }  
                     else  
                     {  
                         prev->_next = cur->_next;  
                     }  
                     delete cur;  
                     cur = NULL;  
                     _size--;  
                     return true;  
                 }  
                 prev = cur;  
                 cur = cur->_next;  
             }  
             return false;  
         }  
     private:  
         vector<Node*> _tables;  
         size_t _size;  
     };  
   
     void HashTest()  
     {  
         HashTable<int,int> ht;  
         ht.Insert(make_pair<int,int>(89,0));  
         ht.Insert(make_pair<int,int>(18,0));  
         ht.Insert(make_pair<int,int>(49,0));  
         ht.Insert(make_pair<int,int>(58,0));  
         ht.Insert(make_pair<int,int>(9,0));  
         cout<<ht.Erase(58)<<endl;  
         if(ht.Find(49))  
             cout<<ht.Find(9)->_kv.first<<endl;  
         HashTable<int,int>::Iterator it = ht.Begin();  
         while(it != ht.End())  
         {  
             cout<<(*it).first<<":"<<(*it).second<<endl;  
             ++it;  
         }  
   
         HashTable<string,string> ht1;  
         ht1.Insert(make_pair<string,string>("sort","排序"));  
         ht1.Insert(make_pair<string,string>("left","左边"));  
         ht1.Insert(make_pair<string,string>("right","右边"));  
         ht1.Insert(make_pair<string,string>("up","上边"));  
   
         cout<<ht1.Erase("up")<<endl;  
         cout<<ht1.Find("sort")->_kv.second<<endl;  
         HashTable<string,string>::Iterator it1 = ht1.Begin();  
         while(it1 != ht1.End())  
         {  
             cout<<(*it1).first<<":"<<(*it1).second<<endl;  
             ++it1;  
         }  
     }  
 };