一个高效的hash str map 的实现

最新推荐文章于 2020-11-26 10:51:27 发布

wangsheng8888

最新推荐文章于 2020-11-26 10:51:27 发布

阅读量885

点赞数

分类专栏：算法文章标签： null delete 测试 list hashmap iterator

本文链接：https://blog.csdn.net/wangsheng8888/article/details/6736060

版权

算法专栏收录该内容

1 篇文章 0 订阅

订阅专栏

在使用hash_map 的过程中，发现 hash_map 对字符串做为键值支持的不是很好，就特意写了一个新的 StrMap, 主要用做字符串型键值的Map,经过简单的测试，设置和提取键值是hash_map的20倍左右，现在拿出来给大家分享，也希大家提出更新的修改意见。谢谢。

下面的代码是 StrMap的实现代码。

 
   view plain 
  
 #pragma once  
   
 /// hash str map   
 /** 
     /brief 利用 hash 算法实现，定长字符串的map 
 */  
 template<typename _T, int _STR_LEN = 128>  
 class StrMap  
 {  
 protected:  
     struct Assoc  
     {  
         char    m_key[_STR_LEN];  
         _T      m_data;  
         Assoc*  m_pNext;  
     };  
   
     typedef Assoc* LPAssoc;   
 public:  
     /// 迭代器结构体  
     struct iterator   
     {  
         friend class StrMap;  
   
         iterator()  
         {  
             m_pMap = NULL;  
             m_pIter = NULL ;  
             m_nIndex = 0;  
         }  
   
         iterator& operator ++()       
         {  
             if ( m_pIter->m_pNext )   
             {  
                 m_pIter = m_pIter->m_pNext;  
                 return *this;  
             }  
   
             for ( ULONG i=m_nIndex+1; i<m_pMap->m_nHashTableSize; ++i)   
             {  
                 if ( m_pMap->m_pHashTable[i] )  
                 {  
                     m_pIter = m_pMap->m_pHashTable[i];  
                     m_nIndex = i;  
                     return *this;  
                 }                 
             }  
   
             m_pIter = NULL ;  
             m_nIndex = 0;  
             return *this;  
         }   
   
         iterator operator ++(int)     
         {  
             iterator tmp( m_pMap, m_pIter, m_nIndex);  
   
             if ( m_pIter->m_pNext )   
             {  
                 m_pIter = m_pIter->m_pNext;  
                 return tmp;  
             }  
   
             for ( ULONG i=m_nIndex+1; i<m_pMap->m_nHashTableSize; ++i)   
             {  
                 if ( m_pMap->m_pHashTable[i] )  
                 {  
                     m_pIter = m_pMap->m_pHashTable[i];  
                     m_nIndex = i;  
                     return tmp;  
                 }                 
             }  
   
             m_pIter = NULL ;  
             m_nIndex = 0;  
             return tmp;  
         }   
   
         _T* operator ->()  
         {  
             return &(m_pIter->m_data) ;  
         }  
   
         _T& operator *()  
         {  
             return m_pIter->m_data ;  
         }  
   
         bool operator != (const iterator & obj )   
         {  
             return m_pMap != obj.m_pMap || m_pIter != obj.m_pIter ;  
         }  
         bool operator == (const iterator & obj )   
         {  
             return m_pMap == obj.m_pMap && m_pIter == obj.m_pIter ;  
         }  
   
     protected:  
   
         iterator(StrMap* pMap, LPAssoc pAssoc = NULL, ULONG nIndex = 0)   
         {  
             m_pMap = pMap;  
             m_pIter = pAssoc ;  
             m_nIndex = nIndex;  
         }  
   
         LPAssoc m_pIter ;  
         ULONG m_nIndex ;  
         StrMap* m_pMap;  
     };  
   
 public:  
     ///构造方法  
     /** 
         /param ULONG nSize =199         哈希表默认的初始大小 
         /param bool bAutoIncr = true    哈希表是否自动增加 
     */  
     StrMap(ULONG nSize = 199, bool bAutoIncr = true)  
     {  
         m_pHashTable = NULL ;  
         m_nHashTableSize = 0;  
         m_bAutoIncr = bAutoIncr;  
         m_nCount = 0 ;  
         m_nMaxLength = _STR_LEN ;   
         InitMap(nSize);  
     }  
       
     ///析构方法  
     virtual ~StrMap(void)  
     {  
         Destory();  
     }  
   
     /// 查找某个元素是否存在  
     /** 
         /param const char * pKey    关键字 
         /param _T& obj              元素的数据值 
         /return bool 是否查找正确 
     */  
     bool Find(const char * pKey, _T & obj)  
     {  
         if ( NULL == pKey )  
         {  
             return false;  
         }  
         if ( strlen(pKey) > m_nMaxLength )   
         {  
             return false ;  
         }  
   
         for(LPAssoc pAssoc = m_pHashTable[HashKey(pKey)%m_nHashTableSize] ;  
             pAssoc;   
             pAssoc = pAssoc->m_pNext )  
         {  
             if ( 0 == strcmp(pAssoc->m_key, pKey) )  
             {  
                 obj = pAssoc->m_data ;  
                 return true;  
             }             
         }  
   
         return false;  
     }  
   
     /// 查找某个元素是否存在  
     /** 
         /param const char* pKey 输入的字符串 
         /return bool 是否包含 pKey  
     */  
     bool Contains(const char* pKey)  
     {  
         if ( NULL == pKey )  
         {  
             return false;  
         }  
         if ( strlen(pKey) > m_nMaxLength )   
         {  
             return false ;  
         }  
   
         for(LPAssoc pAssoc = m_pHashTable[HashKey(pKey)%m_nHashTableSize] ;  
             pAssoc;   
             pAssoc = pAssoc->m_pNext )  
         {  
             if ( 0 == strcmp(pAssoc->m_key, pKey) )  
             {  
                 return true;  
             }             
         }  
   
         return false;  
     }  
   
     /// 设置一个key   
     /** 
         /param const char* pKey     要设置的键值 
         /param const _T & obj       键值对应的值 
     */  
     bool Set(const char * pKey, const _T & obj)   
     {  
         if( NULL == pKey )  
         {  
             return false;  
         }  
         if ( strlen(pKey) >= m_nMaxLength )   
         {  
             return false ;  
         }  
   
         ULONG nHashKey = HashKey(pKey)%m_nHashTableSize ;  
   
         if ( NULL == m_pHashTable[nHashKey] )  
         {  
             //新建一个  
             m_pHashTable[nHashKey] = new Assoc ;  
             strcpy(m_pHashTable[nHashKey]->m_key, pKey);  
             m_pHashTable[nHashKey]->m_data = obj;  
             m_pHashTable[nHashKey]->m_pNext = NULL;  
             ++m_nCount;  
         }  
         else  //查找有没有相等的  
         {  
             LPAssoc pAssoc = m_pHashTable[nHashKey] ;  
             LPAssoc pPervAssoc = pAssoc;  
             while( pAssoc )  
             {  
                 if ( strcmp(pAssoc->m_key, pKey) == 0 )  
                 {  
                     pAssoc->m_data = obj;  
                     break;  
                 }  
                 pPervAssoc = pAssoc;  
                 pAssoc = pAssoc->m_pNext;  
             }  
             if( NULL == pAssoc)  
             {  
                 pAssoc = pPervAssoc->m_pNext = new Assoc;  
                 strcpy(pAssoc->m_key, pKey);  
                 pAssoc->m_data = obj;  
                 pAssoc->m_pNext = NULL;  
                 ++m_nCount;  
             }  
         }  
   
         if ( (m_nCount > m_nHashTableSize) && m_bAutoIncr )  
         {  
             ReSetTableSize( AdjustSize(m_nCount));  
         }  
   
         return true;  
     }  
   
     /// 从哈希表里面删除一个键值  
     /** 
         /param const char* pKey 要删除的 key  
     */  
     void RemoveKey(const char * pKey)  
     {  
         if( NULL == pKey )  
             return ;  
   
         ULONG nHashKey = HashKey(pKey)%m_nHashTableSize ;  
         LPAssoc pAssoc =  m_pHashTable[nHashKey];   
   
         if ( pAssoc && (strcmp( pAssoc->m_key, pKey)==0))  
         {  
             m_pHashTable[nHashKey] = pAssoc->m_pNext ;  
             delete pAssoc;  
             -- m_nCount;  
         }  
         else if ( pAssoc )  
         {  
             LPAssoc pPrevAssoc = pAssoc ;  
             pAssoc = pAssoc->m_pNext;  
             while ( pAssoc )  
             {  
                 if ( strcmp( pAssoc->m_key, pKey) == 0 )  
                 {  
                     pPrevAssoc->m_pNext = pAssoc->m_pNext;  
                     delete pAssoc;  
                     --m_nCount;  
                     break ;  
                 }  
                 pPrevAssoc = pAssoc ;  
                 pAssoc = pAssoc->m_pNext;  
             }  
         }  
     }  
   
     /// 清空map  
     /** 
         /brief 清空 map 里面的数据，但是并不销毁哈希表所分配的内存。 
     */  
     void Clear()  
     {  
         LPAssoc pAssoc ;  
         LPAssoc pDelAssoc ;  
         for ( ULONG i=0; i<m_nHashTableSize; ++i)   
         {  
             pAssoc = m_pHashTable[i];  
             while( pAssoc )   
             {  
                 pDelAssoc = pAssoc ;                  
                 pAssoc = pAssoc->m_pNext;  
                 delete pDelAssoc;  
             }  
             m_pHashTable[i] = NULL;       
         }  
         m_nCount = 0;  
     }  
   
     /// 返加前数据元素的个数  
     ULONG Size()  
     {  
         return m_nCount;  
     }  
   
     /// 得到迭代器的开始  
     iterator Begin()   
     {  
         for ( ULONG i=0; i<m_nHashTableSize; ++i)   
         {  
             if ( m_pHashTable[i] )  
             {  
                 return iterator(this, m_pHashTable[i], i);  
             }  
         }  
         return iterator(this, NULL, 0);  
     }  
   
     /// 迭代器的结束  
     iterator End()  
     {         
         return iterator(this, NULL, 0);  
     }  
 protected:  
   
   
     bool InitMap(ULONG nSize)  
     {  
         m_nHashTableSize = AdjustSize(nSize);  
   
         if (m_pHashTable)  
         {  
             delete [] m_pHashTable;  
             m_pHashTable = NULL ;  
         }  
         m_pHashTable = new LPAssoc[m_nHashTableSize];  
         memset( m_pHashTable, 0 , sizeof( LPAssoc) * m_nHashTableSize);  
   
         return true;  
     }  
   
   
   
     /// 重新设置哈希表的大小  
     /** 
     /param ULONG nSize 新的哈希表的大小 
     /param bool 是否成功 
     */  
     bool ReSetTableSize(ULONG nSize)  
     {  
         LPAssoc* pNewTable ;   
         pNewTable = new LPAssoc[nSize];  
         memset(pNewTable, 0, sizeof(LPAssoc) * nSize );  
         ULONG nHashKey ;  
         LPAssoc pNewAssoc, pOldAssoc ;  
   
         for ( ULONG i=0; i<m_nHashTableSize; ++i)   
         {  
             pOldAssoc = m_pHashTable[i] ;  
             while( pOldAssoc )   
             {  
                 nHashKey = HashKey( pOldAssoc->m_key ) % nSize ;  
                 pNewAssoc = pNewTable[nHashKey];  
   
                 if ( pNewAssoc == NULL )   
                 {  
                     pNewTable[nHashKey] = pOldAssoc;  
                     pOldAssoc = pNewTable[nHashKey]->m_pNext;  
                     pNewTable[nHashKey]->m_pNext = NULL;  
                 }  
                 else  
                 {  
                     while ( NULL != pNewAssoc->m_pNext )   
                         pNewAssoc = pNewAssoc->m_pNext;   
                     pNewAssoc->m_pNext = pOldAssoc ;   
                     pOldAssoc = pOldAssoc->m_pNext;  
                     pNewAssoc->m_pNext->m_pNext = NULL ;  
                 }  
             }  
         }  
   
         delete [] m_pHashTable ;   
         m_pHashTable = pNewTable ;  
         m_nHashTableSize = nSize;  
         return true;  
     }  
   
   
     /// 计算字符串的哈希值  
     /** 
         /param const char* key  字符串 
         /param ULONG 哈希值 
     */  
     ULONG HashKey(const char* key)   
     {  
         if ( NULL == key )  
             return 0;  
         ULONG nHash = 0;  
         while (*key)  
             nHash = (nHash<<5) + nHash + *key++;  
         return nHash;  
     }  
   
     /// 根据当前的大小，调整新的哈希表的大小  
     /** 
         /param ULONG nSize 老的表的大小 
         /return ULONG 返回新的表大小 
     */  
     ULONG AdjustSize(ULONG nSize)  
     {  
         const ULONG prime_list[] =  
         {  
                 53,         97,             193,        389,        769,  
                 1543,       3079,           6151,       12289,      24593,  
                 49157,      98317,          196613,     393241,     786443,  
                 1572869,    3145739,        6291469,    12582917,   25165842,  
                 50331553,   100663319,      201326611,  402653189,  805306457,  
                 1610612741, 3221225473ul,   4294967291ul  
         };  
   
         int nArrSize  = sizeof(prime_list) / sizeof(prime_list[0]);  
         int i;    
         for ( i=0; i<nArrSize; ++i)  
         {  
             if ( prime_list[i] >= nSize )  
                 break ;  
         }  
         if ( i == nArrSize )   
             --i;  
   
         return prime_list[i];  
     }  
   
     // 销毁map包括分配的内存。  
     void Destory()  
     {  
         Clear();  
         delete [] m_pHashTable ;  
         m_pHashTable = NULL ;  
         m_nHashTableSize = 0;  
     }  
   
 private:  
     /// 为了安全，不能直接调用S拷贝构造方法  
     StrMap(const StrMap& obj)  
     {  
         //nothing  
     }  
   
     /// 为了安全，不能调用赋值方法  
     StrMap& operator = (const StrMap&obj)  
     {  
         //nothing  
         return *this ;  
     }  
   
 protected:  
     LPAssoc* m_pHashTable ;             ///< 哈希表的地址指针  
     ULONG   m_nHashTableSize;           ///< 表大小  
     ULONG   m_nCount ;                  ///< 当前数据元素的个数  
     bool    m_bAutoIncr;                ///< 自动增长  
     ULONG   m_nMaxLength;               //< 字符串的最大长度   
 };  

下面的代码是 StrMap 的测试以及和hash_map 的性能比较的代码。

 
   view plain 
  
 #include "stdafx.h"  
 #include <iostream>  
 #include "StrMap.h"  
   
 #include <string>  
 #include <list>  
 #include <hash_map>  
 using namespace std ;   
   
 void CompMap(int nItemCount )  
 {  
   
     printf("+++++++++++++++++++++++++++++++++++++++++++++++++++++++/n");   
     printf("开始测试 %ld 个数据的设置。/n", nItemCount);  
   
   
     StrMap<int> map;   
     std::hash_map<string, int> hashMap ;  
   
     list<string> listStr ;  
     char buf[128];  
     int len ;  
       
     ULONG beg = ::GetTickCount();  
     for ( ULONG i=0; i<nItemCount; ++i)  
     {  
         len = rand()%14 + 1;   
         for ( int j=0; j<len; ++j)  
         {  
             buf[j] = rand() % 26 + 'a';  
         }  
         buf[len] = 0 ;   
         listStr.push_back(buf);  
     }  
     printf("构造字符串用时:/t%d/n", ::GetTickCount()- beg);  
   
   
     // test my map   
     list<string>::iterator iter = listStr.begin();  
     beg = ::GetTickCount();   
     while (iter != listStr.end())  
     {  
         map.Set(iter->c_str(), 12);  
         ++iter ;  
     }  
     printf("设置 StrMap 用时:/t%d/n", ::GetTickCount()-beg);      
       
     iter = listStr.begin();  
     beg = ::GetTickCount();   
     while (iter != listStr.end())  
     {  
         hashMap[(*iter) ]  = 12 ;  
         ++iter ;  
     }  
     printf("设置 hash_map 用时:/t%d/n", ::GetTickCount()-beg);    
   
   
     iter = listStr.begin();  
    
     int t;  
     int nIndex = 0;  
     int nFind = 0;  
     int nLost =0;  
   
     beg = GetTickCount();  
     iter = listStr.begin();  
     while (iter != listStr.end())  
     {  
         if (map.Contains(iter->c_str()) )  
             ++nFind;  
         else   
             ++nLost;  
   
         ++nIndex;  
         ++iter ;  
     }  
     printf("遍历查找 strMap Key 用时:/t%d/n", ::GetTickCount() - beg);  
     printf("查询次数 %d /t 查询到 %d次/t 丢失 %d/n", nIndex, nFind, nLost);  
   
   
     nIndex = 0;  
     nFind = 0;  
     nLost =0;  
   
     beg = GetTickCount();  
     iter = listStr.begin();  
     while (iter != listStr.end())  
     {         
         if ( hashMap.find( (*iter)) != hashMap.end() )  
             ++nFind;  
         else   
             ++nLost;  
   
         ++nIndex;  
         ++iter ;  
     }  
     printf("遍历查找 hash_map Key 用时:/t%d/n", ::GetTickCount() - beg);  
     printf("查询次数 %d /t 查询到 %d次/t 丢失 %d/n", nIndex, nFind, nLost);  
   
     beg = GetTickCount();  
     StrMap<int>::iterator iterStrMap = map.Begin();   
     nIndex = 0;  
     while (  iterStrMap != map.End() )  
     {  
         ++ nIndex;  
         ++ iterStrMap;  
     }  
     printf("遍历 strMap 用时:/t%d/t 查找到个数:%ld/n", ::GetTickCount() - beg, nIndex );  
   
   
     beg = GetTickCount();  
     std::hash_map<string, int>::iterator iterHashMap = hashMap.begin();   
     nIndex = 0;  
     while (  iterHashMap != hashMap.end() )  
     {  
         ++ nIndex;  
         ++ iterHashMap;  
     }  
     printf("遍历 hash_map 用时:/t%d/t 查找到个数:%ld/n", ::GetTickCount() - beg, nIndex );  
   
 }  
   
   
 int main(int argc, char* argv[])  
 {  
     CompMap(1000);  
     CompMap(10000);  
     CompMap(20000);  
   
     return 0;  
   
 }  

比较的结果：

 
   view plain 
  
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++  
 开始测试 1000 个数据的设置。  
 构造字符串用时: 15  
 设置 StrMap 用时:       0  
 设置 hash_map 用时:     32  
 遍历查找 strMap Key 用时:       0  
 查询次数 1000    查询到 1000次   丢失 0  
 遍历查找 hash_map Key 用时:     15  
 查询次数 1000    查询到 1000次   丢失 0  
 遍历 strMap 用时:       0        查找到个数:950  
 遍历 hash_map 用时:     0        查找到个数:950  
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++  
 开始测试 10000 个数据的设置。  
 构造字符串用时: 63  
 设置 StrMap 用时:       15  
 设置 hash_map 用时:     344  
 遍历查找 strMap Key 用时:       16  
 查询次数 10000   查询到 10000次  丢失 0  
 遍历查找 hash_map Key 用时:     156  
 查询次数 10000   查询到 10000次  丢失 0  
 遍历 strMap 用时:       0        查找到个数:9011  
 遍历 hash_map 用时:     0        查找到个数:9011  
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++  
 开始测试 20000 个数据的设置。  
 构造字符串用时: 110  
 设置 StrMap 用时:       47  
 设置 hash_map 用时:     1296  
 遍历查找 strMap Key 用时:       32  
 查询次数 20000   查询到 20000次  丢失 0  
 遍历查找 hash_map Key 用时:     468  
 查询次数 20000   查询到 20000次  丢失 0  
 遍历 strMap 用时:       0        查找到个数:17797  
 遍历 hash_map 用时:     16       查找到个数:17797  
 Press any key to continue