GI STL中散列表采用链接法解决冲突。结构中维护了一个vector,vector中每一个元素称为一个桶(bucket),它包含的是一个链表的第一个节点。
下面代码展示了自己编程实现的hash table,C++模板类封装。
如有错误,还请包涵和指正(E-Mail:xiajunhust@gmail.com)!
HashTable.h:
- //《STL源码剖析》5.7 hashtable编程实现
- //Author:江南烟雨
- //E-Mail:xiajunhust@gmail.com
- #include <iostream>
- #include <algorithm>
- #include <vector>
- //hash table中链表节点数据结构定义
- template <class ValueType>
- struct __hashtable_node{
- __hashtable_node *next;
- ValueType val;
- };
- static const int __stl_num_primes = 28;//表格大小(28个质数)
- //28个质数
- static const unsigned long __stl_prime_list[__stl_num_primes] = {
- 53, 97, 193, 389, 769,
- 1543, 3079, 6151, 12289, 24593,
- 49157, 98317, 196613, 393241, 786433,
- 1572869, 3145739, 6291469, 12582917, 25165843,
- 50331653, 100663319, 201326611, 402653189, 805306457,
- 1610612741, 3221225473ul, 4294967291ul
- };
- //得出28个质数中不小于n的那个质数
- inline unsigned long __get_next_prime(unsigned long n)
- {
- const unsigned long *first = __stl_prime_list;
- const unsigned long *last = __stl_prime_list + __stl_num_primes;
- const unsigned long *pos = std::lower_bound(first,last,n);
- return pos == last ? *(last - 1) : *pos;
- }
- template <class T1,class T2>
- void construct(T1 *p,const T2 &value)
- {
- new (p) T1(value);
- }
- template <class T>
- void destroy(T* pointer)
- {
- pointer->~T();//调用析构函数
- }
- //hash函数定义
- //都是仿函数
- template <class KeyType>
- struct hash{};
- //字符串要进行映射
- inline size_t __stl_hash_string(const char *s)
- {
- unsigned long h = 0;
- for (;*s;++s)
- {
- h = 5 * h + *s;
- }
- return size_t(h);
- }
- //类模板显示特化定义
- template <>
- struct hash<int>{
- size_t operator()(int x) const {return x;}
- };
- template <>
- struct hash<char *>{
- size_t operator()(const char *s) const {return __stl_hash_string(s);}
- };
- template <>
- struct hash<const char *>{
- size_t operator()(const char *s) const {return __stl_hash_string(s);}
- };
- template <>
- struct hash<char>{
- size_t operator()(char s) const {return s;}
- };
- template <>
- struct hash<unsigned char>{
- size_t operator()(unsigned char s) const {return s;}
- };
- template <>
- struct hash<signed char>{
- size_t operator()(signed char s) const {return s;}
- };
- // C++ Standard 规定,每一个 Adaptable Unary Function 都必须继承此类别
- template <class Arg,class Result>
- struct unary_function{
- typedef Arg argument_type;
- typedef Result result_type;
- };
- // C++ Standard 规定,每一个 Adaptable Binary Function 都必须继承此类别
- template <class Arg1,class Arg2,class Result>
- struct binary_function{
- typedef Arg1 first_argument_type;
- typedef Arg2 second_argument_type;
- typedef Result result_type;
- };
- //从节点中取出键值的仿函数定义
- //identity function;任何数值通过此函数式后,不会发生任何改变
- template <class T>
- struct identity:public unary_function<T,T>
- {
- const T& operator()(const T& x) const{return x;}
- };
- //判断键值是否相等的仿函数定义
- template <class T>
- struct equal_to:public binary_function<T,T,bool>{
- bool operator()(const T& x,const T& y) const{return x == y;}
- };
- //比较字符串是否相等的仿函数
- struct eqstr{
- bool operator()(const char *s1,const char *s2)const
- {
- return strcmp(s1,s2) == 0;
- }
- };
- //hash table数据结构定义
- //模板参数:
- //ValueType:节点的实值型别
- //KeyType:节点的键值型别
- //HashFcn:hash function的函数型别
- //ExtractKey:从节点中取出键值的方法
- //EqualKey:判断键值是否相同
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- class HashTableClass{
- public:
- typedef struct __hashtable_node<ValueType> node;//hash table内部链表节点定义
- //hash table没有提供默认构造函数
- HashTableClass(size_t n,
- const HashFcn &hf,
- const EqualKey &eql,
- const ExtractKey &ext)
- :hasher(hf),equals(eql),get_key(ext),num_elements(0)
- {
- initialize_buckets(n);
- }
- HashTableClass(size_t n,
- const HashFcn &hf,
- const EqualKey &eql)
- :hasher(hf),equals(eql),get_key(ExtractKey()),num_elements(0)
- {
- initialize_buckets(n);
- }
- HashTableClass(const HashTableClass &ht)
- :hasher(ht.hasher),equals(ht.equals),get_key(ht.get_key),num_elements(0)
- {
- copy_from(&ht);
- }
- ~HashTableClass(){clear();};
- //赋值操作符
- HashTableClass& operator= (const HashTableClass &ht)
- {
- if (&ht != this)
- {
- clear();
- hasher = ht.hasher;
- equals = ht.equals;
- get_key = ht.get_key;
- copy_from(&ht);
- }
- }
- //返回元素个数
- size_t size(){return num_elements;}
- //最大元素数目
- size_t max_size(){return size_t(-1);}
- //返回bucket vector大小
- size_t bucket_count(){return buckets.size();}
- //返回bucket vector可能的最大值
- size_t max_bucket_count(){return __stl_prime_list[__stl_num_primes - 1];}
- //插入元素,不允许重复
- std::pair<node *,bool> insert_unique(const ValueType &obj);
- //插入元素,允许重复
- node* insert_equal(const ValueType &obj);
- //打印所有节点
- void printAllNodes();
- //遍历所有buckets
- void printAllBuckets();
- //查找某一键值的节点
- std::pair<node*,bool> find(const KeyType &key);
- //判断某一值出现的次数
- size_t count(const KeyType &key);
- //整体删除
- void clear();
- //复制hash表
- void copy_from(const HashTableClass *ht);
- private:
- std::vector<node*> buckets;//由桶组成的vector
- size_t num_elements;//总的元素个数
- HashFcn hasher;
- ExtractKey get_key;
- EqualKey equals;
- //节点配置和释放函数
- node* new_node(const ValueType &obj)
- {
- node *tempNode = new node;
- tempNode->next = NULL;
- try
- {
- construct(&tempNode->val,obj);
- }
- catch (...)
- {
- delete tempNode;
- throw;
- return NULL;
- }
- return tempNode;
- }
- void delete_node(node *n)
- {
- destroy(&n->val);
- delete n;
- }
- //初始化buckets vector
- void initialize_buckets(size_t n);
- //返回最接近n并大于等于n的质数
- size_t next_size(size_t n)const{return __get_next_prime(n);}
- //判断是否需要扩充buckets vector,如有需要则进行扩充
- void resize(size_t num_elements_hint);
- //判断元素落在哪个bucket
- //提供两个版本
- //版本一:只接受实值
- size_t bkt_num(const ValueType &obj) const{return bkt_num_key(get_key(obj));}
- //版本二:接受实值和buckets个数
- size_t bkt_num(const ValueType &obj,size_t n) const {return bkt_num_key(get_key(obj),n);}
- //版本一:只接受键值
- size_t bkt_num_key(const KeyType &Key) const{return hasher(Key) % (buckets.size());}
- //版本二:接受键值和buckets个数
- size_t bkt_num_key(const KeyType &Key,size_t n) const {return hasher(Key) % n;}
- //在不需要重新分配bucket vector的情况下插入元素,元素不允许重复
- std::pair<node *,bool> insert_unique_noresize(const ValueType &obj);
- //在不需要重新分配bucket vector的情况下插入元素,元素不允许重复
- node* insert_equal_noresize(const ValueType &obj);
- };
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>
- HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_unique(const ValueType &obj)
- {
- resize(num_elements + 1);
- return insert_unique_noresize(obj);
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *
- HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_equal(const ValueType &obj)
- {
- resize(num_elements + 1);
- return insert_equal_noresize(obj);
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::printAllNodes()
- {
- cout << endl;
- cout << "Current node in hash table : " << endl;
- for (size_t i = 0;i < buckets.size();++i)
- {
- typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode = buckets[i];
- while(curNode)
- {
- cout << curNode->val << " ";
- curNode = curNode->next;
- }
- }
- cout << endl;
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::printAllBuckets()
- {
- cout << endl;
- cout << "Current buckets in hash table : " << endl;
- for (size_t i = 0;i < buckets.size();++i)
- {
- typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode = buckets[i];
- if(NULL == curNode)
- cout << "buckets[" << i << "] is empty!" << endl;
- else
- {
- size_t count = 0;
- while(curNode)
- {
- ++count;
- curNode = curNode->next;
- }
- cout << "buckets[" << i << "] has " << count << " elements : ";
- curNode = buckets[i];
- while(curNode)
- {
- cout << curNode->val << " ";
- curNode = curNode->next;
- }
- cout << endl;
- }
- }
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>
- HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::find(const KeyType &key)
- {
- size_t bucket_index = bkt_num_key(key);
- node*first = buckets[bucket_index];
- while(first)
- {
- if (equals(key,get_key(first->val)))
- {
- cout << "find the element " << key << " success" << endl;
- return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>(first,true);
- }
- first = first->next;
- }
- cout << "cannot find the element " << key << endl;
- return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>(NULL,false);
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- size_t HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::count(const KeyType &key)
- {
- size_t bucket_index = bkt_num_key(key);
- node*first = buckets[bucket_index];
- size_t num = 0;
- while(first)
- {
- if (equals(key,get_key(first->val)))
- {
- ++num;
- }
- first = first->next;
- }
- cout << "The element " << key << " appears " << num << " times" << endl;
- return num;
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::clear()
- {
- for (size_t i = 0;i < buckets.size();++i)
- {
- node* first = buckets[i];
- //删除bucket list中的每个节点
- while(first)
- {
- node *next = first->next;
- delete_node(first);
- first = next;
- }
- buckets[i] = 0;
- }
- //总元素个数置0
- num_elements = 0;
- //vector并未释放掉空间(自动回收)
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::copy_from(const HashTableClass *ht)
- {
- buckets.clear();//清除已有vector
- //使得bucket vector空间和对方相同
- buckets.reserve(ht->buckets.size());
- //插入n个元素,null
- buckets.insert(buckets.end(),ht->buckets.size(),(typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *)0);
- for (size_t i = 0;i < ht->buckets.size();++i)
- {
- if (const node *cur = ht->buckets[i])
- {
- node *tempNode = new_node(cur->val);
- buckets[i] = tempNode;
- for (node *next = cur->next;next;next = next->next)
- {
- tempNode->next = new_node(next->val);
- tempNode = tempNode->next;
- }
- }
- }
- num_elements = ht->num_elements;
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::initialize_buckets(size_t n)
- {
- const size_t n_buckets = next_size(n);
- buckets.reserve(n_buckets);
- buckets.insert(buckets.end(),n_buckets,(node*)0);
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::resize(size_t num_elements_hint)
- {
- //buckets vector重建与否的标准:
- //比较新的总元素个数和原buckets vector大小
- const size_t old_n_vector = buckets.size();
- //需要重新配置
- if (num_elements_hint > old_n_vector)
- {
- const size_t n = next_size(num_elements_hint);
- if (n > old_n_vector)//有可能vector已到达最大
- {
- vector<node*> tempVec(n,(node *)0);
- for (size_t bucketIndex = 0;bucketIndex < old_n_vector;++bucketIndex)
- {
- node *first = buckets[bucketIndex];//指向节点对应之串行的起始节点
- while(first)
- {
- //计算节点落在哪一个新的bucket内
- size_t new_bucket_index = bkt_num_key(first->val,n);
- buckets[bucketIndex] = first->next;
- first->next = tempVec[new_bucket_index];
- tempVec[new_bucket_index] = first;
- first = buckets[bucketIndex];
- }
- }
- //交换新旧两个bucket vector
- buckets.swap(tempVec);
- }
- }
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>
- HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_unique_noresize(const ValueType &obj)
- {
- size_t bucket_index = bkt_num(obj);
- node *first = buckets[bucket_index];
- //搜索当前链表
- for (node *curNode = first;curNode;curNode = curNode->next)
- {
- if(equals(get_key(obj),get_key(curNode->val)))
- return pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(curNode,false);
- }
- node *tempNode = new_node(obj);
- tempNode->next = first;
- buckets[bucket_index] = tempNode;
- ++num_elements;
- return pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(tempNode,true);
- }
- template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
- typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*
- HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_equal_noresize(const ValueType &obj)
- {
- size_t bucket_index = bkt_num(obj);
- node *first = buckets[bucket_index];
- for (node *curNode = first;curNode;curNode = curNode->next)
- {
- //发现与链表中的某键值相等,马上插入,然后返回
- if (equals(get_key(obj),get_key(curNode->val)))
- {
- node *tempNode = new_node(obj);
- tempNode->next = curNode->next;
- curNode->next = tempNode;
- ++num_elements;
- return tempNode;
- }
- }
- //如果没发现键值相等的元素
- node *tempNode = new_node(obj);
- //将其插入链表头部
- tempNode->next = first;
- buckets[bucket_index] = tempNode;
- ++num_elements;
- return tempNode;
- }
HashTable.cpp:
- #include "HashTable.h"
- using namespace std;
- int main()
- {
- HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> > *hashTableObj = new HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> >(50,hash<int>(),equal_to<int>());
- cout << "Hash Table size : " << hashTableObj->size() << endl;
- cout << "Hash Table bucket count : " << hashTableObj->bucket_count() << endl;
- cout << "Hash Table max bucket count : " << hashTableObj->max_bucket_count() << endl;
- hashTableObj->insert_unique(59);
- hashTableObj->insert_unique(63);
- hashTableObj->insert_unique(108);
- hashTableObj->insert_unique(2);
- hashTableObj->insert_unique(53);
- hashTableObj->insert_unique(55);
- cout << "Hash Table size : " << hashTableObj->size() << endl;
- hashTableObj->printAllNodes();
- hashTableObj->printAllBuckets();
- //继续插入元素,使总元素个数达到54个
- for(int i = 0;i <= 47;++i)
- hashTableObj->insert_equal(i);
- cout << endl;
- cout << "Hash Table size : " << hashTableObj->size() << endl;
- cout << "Hash Table bucket count : " << hashTableObj->bucket_count() << endl;
- hashTableObj->printAllNodes();
- hashTableObj->printAllBuckets();
- hashTableObj->find(2);
- hashTableObj->count(2);
- HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> > *hashTableObj2 = new HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> >(20,hash<int>(),equal_to<int>());
- cout << "Hash Table 2 size : " << hashTableObj2->size() << endl;
- cout << "Hash Table 2 bucket count : " << hashTableObj2->bucket_count() << endl;
- cout << "Hash Table 2 max bucket count : " << hashTableObj2->max_bucket_count() << endl;
- hashTableObj2->copy_from(hashTableObj);
- cout << "Hash Table 2 size : " << hashTableObj2->size() << endl;
- cout << "Hash Table 2 bucket count : " << hashTableObj2->bucket_count() << endl;
- cout << "Hash Table 2 max bucket count : " << hashTableObj2->max_bucket_count() << endl;
- HashTableClass<int,int,hash<int>,identity<int>,equal_to<int> > hashTableObj3(*hashTableObj2);
- cout << "Hash Table 3 size : " << hashTableObj3.size() << endl;
- cout << "Hash Table 3 bucket count : " << hashTableObj3.bucket_count() << endl;
- cout << "Hash Table 3 max bucket count : " << hashTableObj3.max_bucket_count() << endl;
- HashTableClass<char *,char *,hash<char *>,identity<char*>,eqstr > *hashTableObjString = new HashTableClass<char *,char *,hash<char *>,identity<char*>,eqstr >(20,hash<char *>(),eqstr());
- hashTableObjString->insert_unique("jun");
- hashTableObjString->insert_unique("hust");
- cout << "Hash Table hashTableObjString size : " << hashTableObjString->size() << endl;
- cout << "Hash Table hashTableObjString bucket count : " << hashTableObjString->bucket_count() << endl;
- cout << "Hash Table hashTableObjString max bucket count : " << hashTableObjString->max_bucket_count() << endl;
- hashTableObjString->printAllNodes();
- //hashTableObjString->printAllBuckets();
- hashTableObjString->find("juu");
- delete hashTableObj;
- delete hashTableObj2;
- delete hashTableObjString;
- return 0;
- }
运行结果(VS2008+Win7):