在使用hash_map 的过程中,发现 hash_map 对字符串做为键值支持的不是很好, 就特意写了一个新的 StrMap, 主要用做字符串型键值的Map,经过简单的测试,设置和提取键值是hash_map的20倍左右,现在拿出来给大家分享, 也希大家提出更新的修改意见。谢谢。
下面的代码是 StrMap的实现代码。
- #pragma once
- /// hash str map
- /**
- /brief 利用 hash 算法实现,定长字符串的map
- */
- template<typename _T, int _STR_LEN = 128>
- class StrMap
- {
- protected:
- struct Assoc
- {
- char m_key[_STR_LEN];
- _T m_data;
- Assoc* m_pNext;
- };
- typedef Assoc* LPAssoc;
- public:
- /// 迭代器结构体
- struct iterator
- {
- friend class StrMap;
- iterator()
- {
- m_pMap = NULL;
- m_pIter = NULL ;
- m_nIndex = 0;
- }
- iterator& operator ++()
- {
- if ( m_pIter->m_pNext )
- {
- m_pIter = m_pIter->m_pNext;
- return *this;
- }
- for ( ULONG i=m_nIndex+1; i<m_pMap->m_nHashTableSize; ++i)
- {
- if ( m_pMap->m_pHashTable[i] )
- {
- m_pIter = m_pMap->m_pHashTable[i];
- m_nIndex = i;
- return *this;
- }
- }
- m_pIter = NULL ;
- m_nIndex = 0;
- return *this;
- }
- iterator operator ++(int)
- {
- iterator tmp( m_pMap, m_pIter, m_nIndex);
- if ( m_pIter->m_pNext )
- {
- m_pIter = m_pIter->m_pNext;
- return tmp;
- }
- for ( ULONG i=m_nIndex+1; i<m_pMap->m_nHashTableSize; ++i)
- {
- if ( m_pMap->m_pHashTable[i] )
- {
- m_pIter = m_pMap->m_pHashTable[i];
- m_nIndex = i;
- return tmp;
- }
- }
- m_pIter = NULL ;
- m_nIndex = 0;
- return tmp;
- }
- _T* operator ->()
- {
- return &(m_pIter->m_data) ;
- }
- _T& operator *()
- {
- return m_pIter->m_data ;
- }
- bool operator != (const iterator & obj )
- {
- return m_pMap != obj.m_pMap || m_pIter != obj.m_pIter ;
- }
- bool operator == (const iterator & obj )
- {
- return m_pMap == obj.m_pMap && m_pIter == obj.m_pIter ;
- }
- protected:
- iterator(StrMap* pMap, LPAssoc pAssoc = NULL, ULONG nIndex = 0)
- {
- m_pMap = pMap;
- m_pIter = pAssoc ;
- m_nIndex = nIndex;
- }
- LPAssoc m_pIter ;
- ULONG m_nIndex ;
- StrMap* m_pMap;
- };
- public:
- ///构造方法
- /**
- /param ULONG nSize =199 哈希表默认的初始大小
- /param bool bAutoIncr = true 哈希表是否自动增加
- */
- StrMap(ULONG nSize = 199, bool bAutoIncr = true)
- {
- m_pHashTable = NULL ;
- m_nHashTableSize = 0;
- m_bAutoIncr = bAutoIncr;
- m_nCount = 0 ;
- m_nMaxLength = _STR_LEN ;
- InitMap(nSize);
- }
- ///析构方法
- virtual ~StrMap(void)
- {
- Destory();
- }
- /// 查找某个元素是否存在
- /**
- /param const char * pKey 关键字
- /param _T& obj 元素的数据值
- /return bool 是否查找正确
- */
- bool Find(const char * pKey, _T & obj)
- {
- if ( NULL == pKey )
- {
- return false;
- }
- if ( strlen(pKey) > m_nMaxLength )
- {
- return false ;
- }
- for(LPAssoc pAssoc = m_pHashTable[HashKey(pKey)%m_nHashTableSize] ;
- pAssoc;
- pAssoc = pAssoc->m_pNext )
- {
- if ( 0 == strcmp(pAssoc->m_key, pKey) )
- {
- obj = pAssoc->m_data ;
- return true;
- }
- }
- return false;
- }
- /// 查找某个元素是否存在
- /**
- /param const char* pKey 输入的字符串
- /return bool 是否包含 pKey
- */
- bool Contains(const char* pKey)
- {
- if ( NULL == pKey )
- {
- return false;
- }
- if ( strlen(pKey) > m_nMaxLength )
- {
- return false ;
- }
- for(LPAssoc pAssoc = m_pHashTable[HashKey(pKey)%m_nHashTableSize] ;
- pAssoc;
- pAssoc = pAssoc->m_pNext )
- {
- if ( 0 == strcmp(pAssoc->m_key, pKey) )
- {
- return true;
- }
- }
- return false;
- }
- /// 设置一个key
- /**
- /param const char* pKey 要设置的键值
- /param const _T & obj 键值对应的值
- */
- bool Set(const char * pKey, const _T & obj)
- {
- if( NULL == pKey )
- {
- return false;
- }
- if ( strlen(pKey) >= m_nMaxLength )
- {
- return false ;
- }
- ULONG nHashKey = HashKey(pKey)%m_nHashTableSize ;
- if ( NULL == m_pHashTable[nHashKey] )
- {
- //新建一个
- m_pHashTable[nHashKey] = new Assoc ;
- strcpy(m_pHashTable[nHashKey]->m_key, pKey);
- m_pHashTable[nHashKey]->m_data = obj;
- m_pHashTable[nHashKey]->m_pNext = NULL;
- ++m_nCount;
- }
- else //查找有没有相等的
- {
- LPAssoc pAssoc = m_pHashTable[nHashKey] ;
- LPAssoc pPervAssoc = pAssoc;
- while( pAssoc )
- {
- if ( strcmp(pAssoc->m_key, pKey) == 0 )
- {
- pAssoc->m_data = obj;
- break;
- }
- pPervAssoc = pAssoc;
- pAssoc = pAssoc->m_pNext;
- }
- if( NULL == pAssoc)
- {
- pAssoc = pPervAssoc->m_pNext = new Assoc;
- strcpy(pAssoc->m_key, pKey);
- pAssoc->m_data = obj;
- pAssoc->m_pNext = NULL;
- ++m_nCount;
- }
- }
- if ( (m_nCount > m_nHashTableSize) && m_bAutoIncr )
- {
- ReSetTableSize( AdjustSize(m_nCount));
- }
- return true;
- }
- /// 从哈希表里面删除一个键值
- /**
- /param const char* pKey 要删除的 key
- */
- void RemoveKey(const char * pKey)
- {
- if( NULL == pKey )
- return ;
- ULONG nHashKey = HashKey(pKey)%m_nHashTableSize ;
- LPAssoc pAssoc = m_pHashTable[nHashKey];
- if ( pAssoc && (strcmp( pAssoc->m_key, pKey)==0))
- {
- m_pHashTable[nHashKey] = pAssoc->m_pNext ;
- delete pAssoc;
- -- m_nCount;
- }
- else if ( pAssoc )
- {
- LPAssoc pPrevAssoc = pAssoc ;
- pAssoc = pAssoc->m_pNext;
- while ( pAssoc )
- {
- if ( strcmp( pAssoc->m_key, pKey) == 0 )
- {
- pPrevAssoc->m_pNext = pAssoc->m_pNext;
- delete pAssoc;
- --m_nCount;
- break ;
- }
- pPrevAssoc = pAssoc ;
- pAssoc = pAssoc->m_pNext;
- }
- }
- }
- /// 清空map
- /**
- /brief 清空 map 里面的数据,但是并不销毁哈希表所分配的内存。
- */
- void Clear()
- {
- LPAssoc pAssoc ;
- LPAssoc pDelAssoc ;
- for ( ULONG i=0; i<m_nHashTableSize; ++i)
- {
- pAssoc = m_pHashTable[i];
- while( pAssoc )
- {
- pDelAssoc = pAssoc ;
- pAssoc = pAssoc->m_pNext;
- delete pDelAssoc;
- }
- m_pHashTable[i] = NULL;
- }
- m_nCount = 0;
- }
- /// 返加前数据元素的个数
- ULONG Size()
- {
- return m_nCount;
- }
- /// 得到迭代器的开始
- iterator Begin()
- {
- for ( ULONG i=0; i<m_nHashTableSize; ++i)
- {
- if ( m_pHashTable[i] )
- {
- return iterator(this, m_pHashTable[i], i);
- }
- }
- return iterator(this, NULL, 0);
- }
- /// 迭代器的结束
- iterator End()
- {
- return iterator(this, NULL, 0);
- }
- protected:
- bool InitMap(ULONG nSize)
- {
- m_nHashTableSize = AdjustSize(nSize);
- if (m_pHashTable)
- {
- delete [] m_pHashTable;
- m_pHashTable = NULL ;
- }
- m_pHashTable = new LPAssoc[m_nHashTableSize];
- memset( m_pHashTable, 0 , sizeof( LPAssoc) * m_nHashTableSize);
- return true;
- }
- /// 重新设置哈希表的大小
- /**
- /param ULONG nSize 新的哈希表的大小
- /param bool 是否成功
- */
- bool ReSetTableSize(ULONG nSize)
- {
- LPAssoc* pNewTable ;
- pNewTable = new LPAssoc[nSize];
- memset(pNewTable, 0, sizeof(LPAssoc) * nSize );
- ULONG nHashKey ;
- LPAssoc pNewAssoc, pOldAssoc ;
- for ( ULONG i=0; i<m_nHashTableSize; ++i)
- {
- pOldAssoc = m_pHashTable[i] ;
- while( pOldAssoc )
- {
- nHashKey = HashKey( pOldAssoc->m_key ) % nSize ;
- pNewAssoc = pNewTable[nHashKey];
- if ( pNewAssoc == NULL )
- {
- pNewTable[nHashKey] = pOldAssoc;
- pOldAssoc = pNewTable[nHashKey]->m_pNext;
- pNewTable[nHashKey]->m_pNext = NULL;
- }
- else
- {
- while ( NULL != pNewAssoc->m_pNext )
- pNewAssoc = pNewAssoc->m_pNext;
- pNewAssoc->m_pNext = pOldAssoc ;
- pOldAssoc = pOldAssoc->m_pNext;
- pNewAssoc->m_pNext->m_pNext = NULL ;
- }
- }
- }
- delete [] m_pHashTable ;
- m_pHashTable = pNewTable ;
- m_nHashTableSize = nSize;
- return true;
- }
- /// 计算字符串的哈希值
- /**
- /param const char* key 字符串
- /param ULONG 哈希值
- */
- ULONG HashKey(const char* key)
- {
- if ( NULL == key )
- return 0;
- ULONG nHash = 0;
- while (*key)
- nHash = (nHash<<5) + nHash + *key++;
- return nHash;
- }
- /// 根据当前的大小,调整新的哈希表的大小
- /**
- /param ULONG nSize 老的表的大小
- /return ULONG 返回新的表大小
- */
- ULONG AdjustSize(ULONG nSize)
- {
- const ULONG prime_list[] =
- {
- 53, 97, 193, 389, 769,
- 1543, 3079, 6151, 12289, 24593,
- 49157, 98317, 196613, 393241, 786443,
- 1572869, 3145739, 6291469, 12582917, 25165842,
- 50331553, 100663319, 201326611, 402653189, 805306457,
- 1610612741, 3221225473ul, 4294967291ul
- };
- int nArrSize = sizeof(prime_list) / sizeof(prime_list[0]);
- int i;
- for ( i=0; i<nArrSize; ++i)
- {
- if ( prime_list[i] >= nSize )
- break ;
- }
- if ( i == nArrSize )
- --i;
- return prime_list[i];
- }
- // 销毁map包括分配的内存。
- void Destory()
- {
- Clear();
- delete [] m_pHashTable ;
- m_pHashTable = NULL ;
- m_nHashTableSize = 0;
- }
- private:
- /// 为了安全,不能直接调用S拷贝构造方法
- StrMap(const StrMap& obj)
- {
- //nothing
- }
- /// 为了安全,不能调用赋值方法
- StrMap& operator = (const StrMap&obj)
- {
- //nothing
- return *this ;
- }
- protected:
- LPAssoc* m_pHashTable ; ///< 哈希表的地址指针
- ULONG m_nHashTableSize; ///< 表大小
- ULONG m_nCount ; ///< 当前数据元素的个数
- bool m_bAutoIncr; ///< 自动增长
- ULONG m_nMaxLength; //< 字符串的最大长度
- };
下面的代码是 StrMap 的测试以及和hash_map 的性能比较的代码。
- #include "stdafx.h"
- #include <iostream>
- #include "StrMap.h"
- #include <string>
- #include <list>
- #include <hash_map>
- using namespace std ;
- void CompMap(int nItemCount )
- {
- printf("+++++++++++++++++++++++++++++++++++++++++++++++++++++++/n");
- printf("开始测试 %ld 个数据的设置。/n", nItemCount);
- StrMap<int> map;
- std::hash_map<string, int> hashMap ;
- list<string> listStr ;
- char buf[128];
- int len ;
- ULONG beg = ::GetTickCount();
- for ( ULONG i=0; i<nItemCount; ++i)
- {
- len = rand()%14 + 1;
- for ( int j=0; j<len; ++j)
- {
- buf[j] = rand() % 26 + 'a';
- }
- buf[len] = 0 ;
- listStr.push_back(buf);
- }
- printf("构造字符串用时:/t%d/n", ::GetTickCount()- beg);
- // test my map
- list<string>::iterator iter = listStr.begin();
- beg = ::GetTickCount();
- while (iter != listStr.end())
- {
- map.Set(iter->c_str(), 12);
- ++iter ;
- }
- printf("设置 StrMap 用时:/t%d/n", ::GetTickCount()-beg);
- iter = listStr.begin();
- beg = ::GetTickCount();
- while (iter != listStr.end())
- {
- hashMap[(*iter) ] = 12 ;
- ++iter ;
- }
- printf("设置 hash_map 用时:/t%d/n", ::GetTickCount()-beg);
- iter = listStr.begin();
- int t;
- int nIndex = 0;
- int nFind = 0;
- int nLost =0;
- beg = GetTickCount();
- iter = listStr.begin();
- while (iter != listStr.end())
- {
- if (map.Contains(iter->c_str()) )
- ++nFind;
- else
- ++nLost;
- ++nIndex;
- ++iter ;
- }
- printf("遍历查找 strMap Key 用时:/t%d/n", ::GetTickCount() - beg);
- printf("查询次数 %d /t 查询到 %d次/t 丢失 %d/n", nIndex, nFind, nLost);
- nIndex = 0;
- nFind = 0;
- nLost =0;
- beg = GetTickCount();
- iter = listStr.begin();
- while (iter != listStr.end())
- {
- if ( hashMap.find( (*iter)) != hashMap.end() )
- ++nFind;
- else
- ++nLost;
- ++nIndex;
- ++iter ;
- }
- printf("遍历查找 hash_map Key 用时:/t%d/n", ::GetTickCount() - beg);
- printf("查询次数 %d /t 查询到 %d次/t 丢失 %d/n", nIndex, nFind, nLost);
- beg = GetTickCount();
- StrMap<int>::iterator iterStrMap = map.Begin();
- nIndex = 0;
- while ( iterStrMap != map.End() )
- {
- ++ nIndex;
- ++ iterStrMap;
- }
- printf("遍历 strMap 用时:/t%d/t 查找到个数:%ld/n", ::GetTickCount() - beg, nIndex );
- beg = GetTickCount();
- std::hash_map<string, int>::iterator iterHashMap = hashMap.begin();
- nIndex = 0;
- while ( iterHashMap != hashMap.end() )
- {
- ++ nIndex;
- ++ iterHashMap;
- }
- printf("遍历 hash_map 用时:/t%d/t 查找到个数:%ld/n", ::GetTickCount() - beg, nIndex );
- }
- int main(int argc, char* argv[])
- {
- CompMap(1000);
- CompMap(10000);
- CompMap(20000);
- return 0;
- }
比较的结果:
- +++++++++++++++++++++++++++++++++++++++++++++++++++++++
- 开始测试 1000 个数据的设置。
- 构造字符串用时: 15
- 设置 StrMap 用时: 0
- 设置 hash_map 用时: 32
- 遍历查找 strMap Key 用时: 0
- 查询次数 1000 查询到 1000次 丢失 0
- 遍历查找 hash_map Key 用时: 15
- 查询次数 1000 查询到 1000次 丢失 0
- 遍历 strMap 用时: 0 查找到个数:950
- 遍历 hash_map 用时: 0 查找到个数:950
- +++++++++++++++++++++++++++++++++++++++++++++++++++++++
- 开始测试 10000 个数据的设置。
- 构造字符串用时: 63
- 设置 StrMap 用时: 15
- 设置 hash_map 用时: 344
- 遍历查找 strMap Key 用时: 16
- 查询次数 10000 查询到 10000次 丢失 0
- 遍历查找 hash_map Key 用时: 156
- 查询次数 10000 查询到 10000次 丢失 0
- 遍历 strMap 用时: 0 查找到个数:9011
- 遍历 hash_map 用时: 0 查找到个数:9011
- +++++++++++++++++++++++++++++++++++++++++++++++++++++++
- 开始测试 20000 个数据的设置。
- 构造字符串用时: 110
- 设置 StrMap 用时: 47
- 设置 hash_map 用时: 1296
- 遍历查找 strMap Key 用时: 32
- 查询次数 20000 查询到 20000次 丢失 0
- 遍历查找 hash_map Key 用时: 468
- 查询次数 20000 查询到 20000次 丢失 0
- 遍历 strMap 用时: 0 查找到个数:17797
- 遍历 hash_map 用时: 16 查找到个数:17797
- Press any key to continue