STL源码剖析（四）：容器（8） hashtable

最新推荐文章于 2023-10-11 12:00:43 发布

曾格0

最新推荐文章于 2023-10-11 12:00:43 发布

阅读量1k

点赞数

分类专栏： C++ 文章标签： c++ 容器

本文链接：https://blog.csdn.net/youaremyalllove/article/details/124085270

版权

C++ 专栏收录该内容

25 篇文章 1 订阅

订阅专栏

本文详细解析了哈希表如何通过哈希链表实现，探讨了set和map基于hashtable的无序性，以及为何选择hashtable作为高效查找的数据结构。同时介绍了hash函数、迭代器特性和hashtable的扩容策略，以及如何在实际应用中权衡性能与排序需求。

摘要由CSDN通过智能技术生成

set和map也可以基于hashtable实现，但是这样set和map是无序的，因为hashtable本身就是无序的。

hashtable节点结构

以哈希链表为例，其结构就是一个buckets vector+多个 list，每个vector 元素代表一个key，list为key值相同的节点集合。

采用vector可以扩充key值，在list链较长时（哈希冲突较大）进行vector扩充。

hash table节点定义<hashtable.h>

template <class _Val>
struct _Hashtable_node
{
  _Hashtable_node* _M_next;
  _Val _M_val;
};

hashtable并不是采用 stl 的list 或 slist，而是自己维护上述的 hash table node

hashtable迭代器

// 模板参数：val，key,计算key的哈希函数,提取key的规则，两个key的比较规则，空间配置器
// _ExtractKey一般是identity<T>
template <class _Val, class _Key, class _HashFcn,
          class _ExtractKey, class _EqualKey, class _Alloc>
struct _Hashtable_iterator {
  typedef hashtable<_Val,_Key,_HashFcn,_ExtractKey,_EqualKey,_Alloc>
          _Hashtable;
  typedef _Hashtable_iterator<_Val, _Key, _HashFcn, 
                              _ExtractKey, _EqualKey, _Alloc>
          iterator;
  typedef _Hashtable_const_iterator<_Val, _Key, _HashFcn, 
                                    _ExtractKey, _EqualKey, _Alloc>
          const_iterator;
  typedef _Hashtable_node<_Val> _Node;

  typedef forward_iterator_tag iterator_category;
  typedef _Val value_type;
  typedef ptrdiff_t difference_type;
  typedef size_t size_type;
  typedef _Val& reference;
  typedef _Val* pointer;

  _Node* _M_cur;     // 迭代器目前所指的节点
  _Hashtable* _M_ht;   // 指向vector元素，实现bucket的跳转

  _Hashtable_iterator(_Node* __n, _Hashtable* __tab) 
    : _M_cur(__n), _M_ht(__tab) {}
  _Hashtable_iterator() {}
  reference operator*() const { return _M_cur->_M_val; }
#ifndef __SGI_STL_NO_ARROW_OPERATOR
  pointer operator->() const { return &(operator*()); }
#endif /* __SGI_STL_NO_ARROW_OPERATOR */
  iterator& operator++();
  iterator operator++(int);
  bool operator==(const iterator& __it) const
    { return _M_cur == __it._M_cur; }
  bool operator!=(const iterator& __it) const
    { return _M_cur != __it._M_cur; }
};

hashtable的迭代器没有后退操作，所以没有重载 -- 操作符，也没有定义逆向迭代器 reverse iterator。

hashtable结构

// 申明,那为啥不把代码都写这里了？
template <class _Val, class _Key, class _HashFcn,
          class _ExtractKey, class _EqualKey, class _Alloc = alloc>  // 指定空间配置器
class hashtable;

// ....

// 模板参数：和节点参数一致
template <class _Val, class _Key, class _HashFcn,
          class _ExtractKey, class _EqualKey, class _Alloc>
class hashtable {
public:
  typedef _Key key_type;
  typedef _Val value_type;
  typedef _HashFcn hasher;
  typedef _EqualKey key_equal;

  typedef size_t            size_type;   
  typedef ptrdiff_t         difference_type;
  typedef value_type*       pointer;
  typedef const value_type* const_pointer;
  typedef value_type&       reference;
  typedef const value_type& const_reference;

  hasher hash_funct() const { return _M_hash; }
  key_equal key_eq() const { return _M_equals; }

private:
  typedef _Hashtable_node<_Val> _Node;  // 节点类型

private:
  hasher                _M_hash;      // 注意这三个都是函数对象
  key_equal             _M_equals;
  _ExtractKey           _M_get_key;  
  vector<_Node*,_Alloc> _M_buckets;     // vector头节点数组
  size_type             _M_num_elements;   // 节点数量，根据这个值和bucket_count()会进行hash扩容

public:
  typedef _Hashtable_iterator<_Val,_Key,_HashFcn,_ExtractKey,_EqualKey,_Alloc>
          iterator;
  typedef _Hashtable_const_iterator<_Val,_Key,_HashFcn,_ExtractKey,_EqualKey,
                                    _Alloc>
          const_iterator;
//  .......
};

Resize()：在插入时，可能会触发这个函数（hashtable同样有unique和equal两个插入版本）

template <class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
void hashtable<_Val,_Key,_HF,_Ex,_Eq,_All>
  ::resize(size_type __num_elements_hint)
{
  const size_type __old_n = _M_buckets.size();
  if (__num_elements_hint > __old_n) {  //元素个数大于vector的大小，就扩展
    const size_type __n = _M_next_size(__num_elements_hint);  // 找取余质数（为了key分布均匀）
    if (__n > __old_n) {
      vector<_Node*, _All> __tmp(__n, (_Node*)(0),   // 新建 vector bucket
                                 _M_buckets.get_allocator());
      __STL_TRY {
        for (size_type __bucket = 0; __bucket < __old_n; ++__bucket) {
          _Node* __first = _M_buckets[__bucket];
          // 挨个处理每个旧 bucket，旧bucket从头遍历，新bucket从头插入
          while (__first) {
            size_type __new_bucket = _M_bkt_num(__first->_M_val, __n);
            _M_buckets[__bucket] = __first->_M_next;
            __first->_M_next = __tmp[__new_bucket];
            __tmp[__new_bucket] = __first;
            __first = _M_buckets[__bucket];          
          }
        }
        _M_buckets.swap(__tmp);  // 新旧两个bucket对调,
      }
      // 释放对调后的tmp内存，而就是旧 bucket的内存
#         ifdef __STL_USE_EXCEPTIONS
      catch(...) {
        for (size_type __bucket = 0; __bucket < __tmp.size(); ++__bucket) {
          while (__tmp[__bucket]) {
            _Node* __next = __tmp[__bucket]->_M_next;
            _M_delete_node(__tmp[__bucket]);
            __tmp[__bucket] = __next;
          }
        }
        throw;
      }
#         endif /* __STL_USE_EXCEPTIONS */
    }
  }
}

hash function:

double、string这些类型，stl没有为他们提供哈希函数，需要自己写，但char *不用，stl自身有写。

基于hashtable的（multi-）set和map：

查找速度更快，因为元素个数大于bucket的数量就扩容，所以一个元素查找次数一般是1~2次，比红黑树快，但红黑树有优秀的排序的功能。所以在使用set和map时，追求高效的执行速度，就基于hashtable，需要排序，就基于红黑树