GCC中libstdc++源码剖析之Hashtable

参考:
https://github.com/Light-City/CPlusPlusThings/blob/master/src_analysis/stl/hashtable.md

基于最新的github上的gcc源码来剖析Hashtable
https://github.com/gcc-mirror/gcc/tree/master/libstdc%2B%2B-v3/include/bits

Hashtable源码部分

https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable.h

169行,可知_Hashtable的模板声明如下:

template<typename _Key, typename _Value, typename _Alloc,
	   typename _ExtractKey, typename _Equal,
	   typename _H1, typename _H2, typename _Hash,
	   typename _RehashPolicy, typename _Traits>
    class _Hashtable
    : public __detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal,
				       _H1, _H2, _Hash, _Traits>,
      public __detail::_Map_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
				 _H1, _H2, _Hash, _RehashPolicy, _Traits>,
      public __detail::_Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal,
			       _H1, _H2, _Hash, _RehashPolicy, _Traits>,
      public __detail::_Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
				    _H1, _H2, _Hash, _RehashPolicy, _Traits>,
      public __detail::_Equality<_Key, _Value, _Alloc, _ExtractKey, _Equal,
				 _H1, _H2, _Hash, _RehashPolicy, _Traits>,
      private __detail::_Hashtable_alloc<
	__alloc_rebind<_Alloc,
		       __detail::_Hash_node<_Value,
					    _Traits::__hash_cached::value>>>
    {
    ...
    };

_Hashtable的基类

https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h
1735行,_Hashtable的模板定义如下

  1. _Hashtable_base
/**
   *  Primary class template _Hashtable_base.
   *
   *  Helper class adding management of _Equal functor to
   *  _Hash_code_base type.
   *
   *  Base class templates are:
   *    - __detail::_Hash_code_base
   *    - __detail::_Hashtable_ebo_helper
   */

  template<typename _Key, typename _Value,
	   typename _ExtractKey, typename _Equal,
	   typename _H1, typename _H2, typename _Hash, typename _Traits>
  struct _Hashtable_base
  : public _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2, _Hash,
			   _Traits::__hash_cached::value>,
    private _Hashtable_ebo_helper<0, _Equal>
    {
    ...
    };
template<typename _Key, typename _Value, typename _ExtractKey,
	   typename _H1, typename _H2, typename _Hash,
	   bool __cache_hash_code>
    struct _Hash_code_base;

根据是否缓存,得到其偏特化版本( __cache_hash_code=false/true)

  • 使用范围哈希(实际上就是我们通常说的除留余数法),不缓存hash code。
  /// Specialization: ranged hash function, no caching hash codes.  H1
  /// and H2 are provided but ignored.  We define a dummy hash code type.
  template<typename _Key, typename _Value, typename _ExtractKey,
	   typename _H1, typename _H2, typename _Hash>
    struct _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2, _Hash, false>
    : private _Hashtable_ebo_helper<0, _ExtractKey>,
      private _Hashtable_ebo_helper<1, _Hash>
    {
    ...
    };
  • 使用范围哈希(实际上就是我们通常说的除留余数法),缓存hash code。
    从注释看到,这个偏特化没有用,所以只有声明没有定义
    // No specialization for ranged hash function while caching hash codes.
  // That combination is meaningless, and trying to do it is an error.

  /// Specialization: ranged hash function, cache hash codes.  This
  /// combination is meaningless, so we provide only a declaration
  /// and no definition.
  template<typename _Key, typename _Value, typename _ExtractKey,
	   typename _H1, typename _H2, typename _Hash>
    struct _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2, _Hash, true>;
  • 有哈希函数以及范围哈希函数,不缓存hash code
  /// Specialization: hash function and range-hashing function, no
  /// caching of hash codes.
  /// Provides typedef and accessor required by C++ 11.
  template<typename _Key, typename _Value, typename _ExtractKey,
	   typename _H1, typename _H2>
    struct _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2,
			   _Default_ranged_hash, false>
    : private _Hashtable_ebo_helper<0, _ExtractKey>,
      private _Hashtable_ebo_helper<1, _H1>,
      private _Hashtable_ebo_helper<2, _H2>
    {
    ...
    };
  • 有哈希函数以及范围哈希函数,缓存hash code
 /// Specialization: hash function and range-hashing function,
  /// caching hash codes.  H is provided but ignored.  Provides
  /// typedef and accessor required by C++ 11.
  template<typename _Key, typename _Value, typename _ExtractKey,
	   typename _H1, typename _H2>
    struct _Hash_code_base<_Key, _Value, _ExtractKey, _H1, _H2,
			   _Default_ranged_hash, true>
    : private _Hashtable_ebo_helper<0, _ExtractKey>,
      private _Hashtable_ebo_helper<1, _H1>,
      private _Hashtable_ebo_helper<2, _H2>
    {
    ...
    };

_Hashtable_ebo_helper
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h
1119行,_Hashtable_ebo_helper的模板定义如下:

备注:EBO:"Empty base optimization"空白基类最优化
当我们的派生类继承于一个Empty 类时,使用EBO将大大减小内存使用。
Empty 类不是真正的”空”类,只是不包含non-static成员变量,这个类里面往往还包含有enums、typedefs、static、non-virtual函数。

/**
   *  Primary class template _Hashtable_ebo_helper.
   *
   *  Helper class using EBO when it is not forbidden (the type is not
   *  final) and when it is worth it (the type is empty.)
   */
  template<int _Nm, typename _Tp,
	   bool __use_ebo = !__is_final(_Tp) && __is_empty(_Tp)>
    struct _Hashtable_ebo_helper;

  /// Specialization using EBO.
  template<int _Nm, typename _Tp>
    struct _Hashtable_ebo_helper<_Nm, _Tp, true>
    : private _Tp
    {
     ...
     };

 /// Specialization not using EBO.
  template<int _Nm, typename _Tp>
    struct _Hashtable_ebo_helper<_Nm, _Tp, false>
    {
    ...
    };

关于上面的H1和H2

https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h
422行,默认的范围哈希函数定义如下:

默认哈希函数:除留余数法

 h(k, N) = h2(h1(k), N)
/// Default range hashing function: use division to fold a large number
  /// into the range [0, N).
  struct _Mod_range_hashing
  {
    typedef std::size_t first_argument_type;
    typedef std::size_t second_argument_type;
    typedef std::size_t result_type;

    result_type
    operator()(first_argument_type __num,
	       second_argument_type __den) const noexcept
    { return __num % __den; }
  };

/// Default ranged hash function H.  In principle it should be a
  /// function object composed from objects of type H1 and H2 such that
  /// h(k, N) = h2(h1(k), N), but that would mean making extra copies of
  /// h1 and h2.  So instead we'll just use a tag to tell class template
  /// hashtable to do that composition.
  struct _Default_ranged_hash { };

重载了()操作符,就是个仿函数。

/// Range hashing function assuming that second arg is a power of 2.
  struct _Mask_range_hashing
  {
    typedef std::size_t first_argument_type;
    typedef std::size_t second_argument_type;
    typedef std::size_t result_type;

    result_type
    operator()(first_argument_type __num,
	       second_argument_type __den) const noexcept
    { return __num & (__den - 1); }
  };

第二个参数为2的幂时的范围哈希函数

rehash操作

散列表为防止碰撞导致效率下降,在存入数据过多时就必须扩容。

先说结论:当总的节点个数大于桶的个数就会扩容(说明每个桶最多能装的节点数是桶的总数,这跟侯捷老师的《STL源码剖析》中的hashtable是一致的),每次扩容都保证桶的个数是素数
这里声明了_Prime_rehash_policy结构体:
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h
445行,

/// Default value for rehash policy. 
/// 桶的大小(bucket size) 默认通常是最小的素数,从而保证装载因子足够小
/// load factor: 容器当前元素数量与桶数量之比。最大加载因子默认值为1.0
  struct _Prime_rehash_policy
  {
    using __has_load_factor = true_type;

    _Prime_rehash_policy(float __z = 1.0) noexcept
    : _M_max_load_factor(__z), _M_next_resize(0) { }

    float
    max_load_factor() const noexcept
    { return _M_max_load_factor; }

    // Return a bucket size no smaller than n.
    //该函数会返回一个不小于n的素数作为桶的数目。
    std::size_t
    _M_next_bkt(std::size_t __n) const;

    // Return a bucket count appropriate for n elements
    //返回适合存储n个元素的桶的数目
    std::size_t
    _M_bkt_for_elements(std::size_t __n) const
    { return __builtin_ceill(__n / (long double)_M_max_load_factor); }

    // __n_bkt is current bucket count, __n_elt is current element count,
    // and __n_ins is number of elements to be inserted.  Do we need to
    // increase bucket count?  If so, return make_pair(true, n), where n
    // is the new bucket count.  If not, return make_pair(false, 0).
    
    //判断是否需要rehash,
    // __n_bkt是当前桶数,__ n_elt是当前元素数目,__ n_ins是准备插入的元素数目。
    //如果需要rehash,则返回make_pair(true,n),其中n是新的桶数。
    //否则,则返回make_pair(false,0)。
    std::pair<bool, std::size_t>
    _M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt,
		   std::size_t __n_ins) const;

    typedef std::size_t _State;

    _State
    _M_state() const
    { return _M_next_resize; }

    void
    _M_reset() noexcept
    { _M_next_resize = 0; }

    void
    _M_reset(_State __state)
    { _M_next_resize = __state; }

    static const std::size_t _S_growth_factor = 2;

    float		_M_max_load_factor;
    mutable std::size_t	_M_next_resize;
  };

_Prime_rehash_policy结构体的定义在:gcc/libstdc+±v3/src/c++11/hashtable_c++0x.cc文件中

https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/src/c%2B%2B11/hashtable_c%2B%2B0x.cc
43行,

namespace __detail
{
  // Return a prime no smaller than n.
  //该函数会返回一个不小于n的素数。
  std::size_t
  _Prime_rehash_policy::_M_next_bkt(std::size_t __n) const
  {
    // Optimize lookups involving the first elements of __prime_list.
    // (useful to speed-up, eg, constructors)
    //优化涉及__prime_list的前几个元素的查找。
    static const unsigned char __fast_bkt[]
      = { 2, 2, 2, 3, 5, 5, 7, 7, 11, 11, 11, 11, 13, 13 };

    //__n<14时
    if (__n < sizeof(__fast_bkt))
      {
	if (__n == 0)
	  return 1;
	  
    //long double __builtin_floorl(long double);是GCC内置的向下取整函数,类似常见的floor函数
	_M_next_resize =
	  __builtin_floorl(__fast_bkt[__n] * (long double)_M_max_load_factor);
	return __fast_bkt[__n];
      }

    // Number of primes (without sentinel).
    //查看素数表__prime_list可知__n_primes=256 or 256 + 48 
    constexpr auto __n_primes
      = sizeof(__prime_list) / sizeof(unsigned long) - 1;

    // Don't include the last prime in the search, so that anything
    // higher than the second-to-last prime returns a past-the-end
    // iterator that can be dereferenced to get the last prime.
    constexpr auto __last_prime = __prime_list + __n_primes - 1;
    
    //__n<14的情况前面已经处理,下面处理__n>=14的情况
    //lower_bound函数查找[__prime_list + 6, __last_prime)中大于等于__n的第一个数
    const unsigned long* __next_bkt =
      std::lower_bound(__prime_list + 6, __last_prime, __n);
      
    //最大的桶数为unsigned long表示的最大数,针对不同机器,unsigned long可能是4Bytes或8Bytes
    //4Bytes,则最大的桶数4294967291ul
    //8Bytes,则最大的桶数18446744073709551557ul
    if (__next_bkt == __last_prime)
      // Set next resize to the max value so that we never try to rehash again
      // as we already reach the biggest possible bucket number.
      // Note that it might result in max_load_factor not being respected.
      _M_next_resize = numeric_limits<size_t>::max();
    else
      _M_next_resize =
	__builtin_floorl(*__next_bkt * (long double)_M_max_load_factor);

    return *__next_bkt;
  }
  // Finds the smallest prime p such that alpha p > __n_elt + __n_ins.
  // If p > __n_bkt, return make_pair(true, p); otherwise return
  // make_pair(false, 0).  In principle this isn't very different from
  // _M_bkt_for_elements.

  // The only tricky part is that we're caching the element count at
  // which we need to rehash, so we don't have to do a floating-point
  // multiply for every insertion.
  //判断是否需要rehash,
  // __n_bkt是当前桶数,__ n_elt是当前元素数目,__ n_ins是准备插入的元素数目。
  //如果需要rehash,则返回make_pair(true,n),其中n是新的桶数。
  //否则,则返回make_pair(false,0)。
  std::pair<bool, std::size_t>
  _Prime_rehash_policy::
  _M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt,
		 std::size_t __n_ins) const
  {
    if (__n_elt + __n_ins > _M_next_resize)//桶的数目不够
      {
	// If _M_next_resize is 0 it means that we have nothing allocated so
	// far and that we start inserting elements. In this case we start
	// with an initial bucket size of 11.
	
	//未分配桶数就插入数据时(_M_next_resize=0),__min_bkts=max(__n_elt + __n_ins, 11)/ (long double)_M_max_load_factor;
	//已分配桶数再插入数据时(_M_next_resize!=0),__min_bkts=max(__n_elt + __n_ins, 0)/ (long double)_M_max_load_factor;
	long double __min_bkts
	  = std::max<std::size_t>(__n_elt + __n_ins, _M_next_resize ? 0 : 11)
	  / (long double)_M_max_load_factor;
	if (__min_bkts >= __n_bkt)//如果__min_bkts>=当前桶数
	//这里是先找出__builtin_floorl(__min_bkts) + 1,和2*__n_bkt的最大值max
	//然后返回大于等于max的第一个素数
	//这可以看出正常情况rehash是2倍扩容,若2倍扩容还不够就扩容到__builtin_floorl(__min_bkts) + 1
	  return { true,
	    _M_next_bkt(std::max<std::size_t>(__builtin_floorl(__min_bkts) + 1,
					      __n_bkt * _S_growth_factor)) };//前面可知_S_growth_factor = 2;

	_M_next_resize
	  = __builtin_floorl(__n_bkt * (long double)_M_max_load_factor);
	return { false, 0 };
      }
    else
      return { false, 0 };
  }
} // namespace __detail

其中的__prime_list为素数表
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/src/shared/hashtable-aux.cc
里面总共有256+1或者256+49个。 如果sizeof(unsigned long)!=8 就是256+1个,否则就是256+49个。

namespace __detail

{

  // The sentinel value is kept only for abi backward compatibility.

  extern const unsigned long __prime_list[] = // 256 + 1 or 256 + 48 + 1

  {

    2ul, 3ul, 5ul, 7ul, 11ul, 13ul, 17ul, 19ul, 23ul, 29ul, 31ul,

    37ul, 41ul, 43ul, 47ul, 53ul, 59ul, 61ul, 67ul, 71ul, 73ul, 79ul,

    83ul, 89ul, 97ul, 103ul, 109ul, 113ul, 127ul, 137ul, 139ul, 149ul,

    157ul, 167ul, 179ul, 193ul, 199ul, 211ul, 227ul, 241ul, 257ul,

    277ul, 293ul, 313ul, 337ul, 359ul, 383ul, 409ul, 439ul, 467ul,

    503ul, 541ul, 577ul, 619ul, 661ul, 709ul, 761ul, 823ul, 887ul,

    953ul, 1031ul, 1109ul, 1193ul, 1289ul, 1381ul, 1493ul, 1613ul,

    1741ul, 1879ul, 2029ul, 2179ul, 2357ul, 2549ul, 2753ul, 2971ul,

    3209ul, 3469ul, 3739ul, 4027ul, 4349ul, 4703ul, 5087ul, 5503ul,

    5953ul, 6427ul, 6949ul, 7517ul, 8123ul, 8783ul, 9497ul, 10273ul,

    11113ul, 12011ul, 12983ul, 14033ul, 15173ul, 16411ul, 17749ul,

    19183ul, 20753ul, 22447ul, 24281ul, 26267ul, 28411ul, 30727ul,

    33223ul, 35933ul, 38873ul, 42043ul, 45481ul, 49201ul, 53201ul,

    57557ul, 62233ul, 67307ul, 72817ul, 78779ul, 85229ul, 92203ul,

    99733ul, 107897ul, 116731ul, 126271ul, 136607ul, 147793ul,

    159871ul, 172933ul, 187091ul, 202409ul, 218971ul, 236897ul,

    256279ul, 277261ul, 299951ul, 324503ul, 351061ul, 379787ul,

    410857ul, 444487ul, 480881ul, 520241ul, 562841ul, 608903ul,

    658753ul, 712697ul, 771049ul, 834181ul, 902483ul, 976369ul,

    1056323ul, 1142821ul, 1236397ul, 1337629ul, 1447153ul, 1565659ul,

    1693859ul, 1832561ul, 1982627ul, 2144977ul, 2320627ul, 2510653ul,

    2716249ul, 2938679ul, 3179303ul, 3439651ul, 3721303ul, 4026031ul,

    4355707ul, 4712381ul, 5098259ul, 5515729ul, 5967347ul, 6456007ul,

    6984629ul, 7556579ul, 8175383ul, 8844859ul, 9569143ul, 10352717ul,

    11200489ul, 12117689ul, 13109983ul, 14183539ul, 15345007ul,

    16601593ul, 17961079ul, 19431899ul, 21023161ul, 22744717ul,

    24607243ul, 26622317ul, 28802401ul, 31160981ul, 33712729ul,

    36473443ul, 39460231ul, 42691603ul, 46187573ul, 49969847ul,

    54061849ul, 58488943ul, 63278561ul, 68460391ul, 74066549ul,

    80131819ul, 86693767ul, 93793069ul, 101473717ul, 109783337ul,

    118773397ul, 128499677ul, 139022417ul, 150406843ul, 162723577ul,

    176048909ul, 190465427ul, 206062531ul, 222936881ul, 241193053ul,

    260944219ul, 282312799ul, 305431229ul, 330442829ul, 357502601ul,

    386778277ul, 418451333ul, 452718089ul, 489790921ul, 529899637ul,

    573292817ul, 620239453ul, 671030513ul, 725980837ul, 785430967ul,

    849749479ul, 919334987ul, 994618837ul, 1076067617ul, 1164186217ul,

    1259520799ul, 1362662261ul, 1474249943ul, 1594975441ul, 1725587117ul,

    1866894511ul, 2019773507ul, 2185171673ul, 2364114217ul, 2557710269ul,

    2767159799ul, 2993761039ul, 3238918481ul, 3504151727ul, 3791104843ul,

    4101556399ul, 4294967291ul,

    // Sentinel, so we don't have to test the result of lower_bound,

    // or, on 64-bit machines, rest of the table.

#if __SIZEOF_LONG__ != 8

    4294967291ul

#else

    6442450933ul, 8589934583ul, 12884901857ul, 17179869143ul,

    25769803693ul, 34359738337ul, 51539607367ul, 68719476731ul,

    103079215087ul, 137438953447ul, 206158430123ul, 274877906899ul,

    412316860387ul, 549755813881ul, 824633720731ul, 1099511627689ul,

    1649267441579ul, 2199023255531ul, 3298534883309ul, 4398046511093ul,

    6597069766607ul, 8796093022151ul, 13194139533241ul, 17592186044399ul,

    26388279066581ul, 35184372088777ul, 52776558133177ul, 70368744177643ul,

    105553116266399ul, 140737488355213ul, 211106232532861ul, 281474976710597ul,

    562949953421231ul, 1125899906842597ul, 2251799813685119ul,

    4503599627370449ul, 9007199254740881ul, 18014398509481951ul,

    36028797018963913ul, 72057594037927931ul, 144115188075855859ul,

    288230376151711717ul, 576460752303423433ul,

    1152921504606846883ul, 2305843009213693951ul,

    4611686018427387847ul, 9223372036854775783ul,

    18446744073709551557ul, 18446744073709551557ul

#endif

  };

} // namespace __detail

_Hashtable_ebo_helper就是EBO空基类, Helper class 为 _Hash_code_base 类型添加了相等比较的仿函数_Equal functor

_Map_base主要是通过偏特化,实现重载操作符[]与at。

_Insert主要完成插入相关。

_Rehash_base主要完成上述rehash中的最大加载因子值的传递。

_Equality_base主要是为类_Equality提供公共类型与函数。

hashtable中链表的节点结构

_Hash_node_base类:

**
   *  struct _Hash_node_base
   *
   *  Nodes, used to wrap elements stored in the hash table.  A policy
   *  template parameter of class template _Hashtable controls whether
   *  nodes also store a hash code. In some cases (e.g. strings) this
   *  may be a performance win.
   */
  struct _Hash_node_base
  {
    _Hash_node_base* _M_nxt;

    _Hash_node_base() noexcept : _M_nxt() { }

    _Hash_node_base(_Hash_node_base* __next) noexcept : _M_nxt(__next) { }
  };

继承自_Hash_node_base的类 _Hash_node_value_base :

/**
   *  struct _Hash_node_value_base
   *
   *  Node type with the value to store.
   */
  template<typename _Value>
    struct _Hash_node_value_base : _Hash_node_base
    {
      typedef _Value value_type;
     //返回一个能容纳_Value类型值的一个unsigned char数组,并且保证数组是按_Value类型类型字节对齐的
      __gnu_cxx::__aligned_buffer<_Value> _M_storage;

      _Value*
      _M_valptr() noexcept
      { return _M_storage._M_ptr(); }

      const _Value*
      _M_valptr() const noexcept
      { return _M_storage._M_ptr(); }

      _Value&
      _M_v() noexcept
      { return *_M_valptr(); }

      const _Value&
      _M_v() const noexcept
      { return *_M_valptr(); }
    };

是否带有hash code的相关类_Hash_node:

  /**
   *  Primary template struct _Hash_node.
   */
  template<typename _Value, bool _Cache_hash_code>
    struct _Hash_node;

  /**
   *  Specialization for nodes with caches, struct _Hash_node.
   *
   *  Base class is __detail::_Hash_node_value_base.
   */
  template<typename _Value>
    struct _Hash_node<_Value, true> : _Hash_node_value_base<_Value>
    {
      std::size_t  _M_hash_code;

      _Hash_node*
      _M_next() const noexcept
      { return static_cast<_Hash_node*>(this->_M_nxt); }
    };

  /**
   *  Specialization for nodes without caches, struct _Hash_node.
   *
   *  Base class is __detail::_Hash_node_value_base.
   */
  template<typename _Value>
    struct _Hash_node<_Value, false> : _Hash_node_value_base<_Value>
    {
      _Hash_node*
      _M_next() const noexcept
      { return static_cast<_Hash_node*>(this->_M_nxt); }
    };

关于

//返回一个能容纳_Value类型值的一个unsigned char数组,并且保证数组是按_Value类型类型字节对齐的
__gnu_cxx::__aligned_buffer<_Value> _M_storage;

__gnu_cxx::__aligned_buffer相关文件定义在:
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/ext/aligned_buffer.h

迭代器

https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable_policy.h

  • 节点迭代器基类_Node_iterator_base:
 /// Base class for node iterators.
  template<typename _Value, bool _Cache_hash_code>
    struct _Node_iterator_base
    {
      using __node_type = _Hash_node<_Value, _Cache_hash_code>;

      __node_type*  _M_cur;

      _Node_iterator_base(__node_type* __p) noexcept
      : _M_cur(__p) { }

      void
      _M_incr() noexcept
      { _M_cur = _M_cur->_M_next(); }
    };
  • //operator==和operator!=
  template<typename _Value, bool _Cache_hash_code>
    inline bool
    operator==(const _Node_iterator_base<_Value, _Cache_hash_code>& __x,
	       const _Node_iterator_base<_Value, _Cache_hash_code >& __y)
    noexcept
    { return __x._M_cur == __y._M_cur; }

  template<typename _Value, bool _Cache_hash_code>
    inline bool
    operator!=(const _Node_iterator_base<_Value, _Cache_hash_code>& __x,
	       const _Node_iterator_base<_Value, _Cache_hash_code>& __y)
    noexcept
    { return __x._M_cur != __y._M_cur; }
  • 继承自_Node_iterator_base的类_Node_iterator:
    重载了++,–,*,->,这四个操作符

类成员访问运算符( -> )可以被重载,但它较为麻烦。它被定义用于为一个类赋予"指针"行为。运算符 -> 必须是一个成员函数。如果使用了 -> 运算符,返回类型必须是指针或者是类的对象。

示例:
class Ptr{
//…
X * operator->();
};

类 Ptr 的对象可用于访问类 X 的成员,使用方式与指针的用法十分相似。例如:
void f(Ptr p )
{
p->m = 10 ; // (p.operator->())->m = 10
}
语句 p->m 被解释为 (p.operator->())->m

  /// Node iterators, used to iterate through all the hashtable.
  template<typename _Value, bool __constant_iterators, bool __cache>
    struct _Node_iterator
    : public _Node_iterator_base<_Value, __cache>
    {
    private:
      using __base_type = _Node_iterator_base<_Value, __cache>;
      using __node_type = typename __base_type::__node_type;

    public:
      typedef _Value					value_type;
      typedef std::ptrdiff_t				difference_type;
      typedef std::forward_iterator_tag			iterator_category;

      using pointer = typename std::conditional<__constant_iterators,
						const _Value*, _Value*>::type;

      using reference = typename std::conditional<__constant_iterators,
						  const _Value&, _Value&>::type;

      _Node_iterator() noexcept
      : __base_type(0) { }

      explicit
      _Node_iterator(__node_type* __p) noexcept
      : __base_type(__p) { }

      reference
      operator*() const noexcept
      { return this->_M_cur->_M_v(); }

      pointer
      operator->() const noexcept
      { return this->_M_cur->_M_valptr(); }

      _Node_iterator&
      operator++() noexcept
      {
	this->_M_incr();
	return *this;
      }
      
//  _Node_iterator& operator++();      //用于++前置形式
// _Node_iterator operator++( int );   //用于后置++形式
      _Node_iterator
      operator++(int) noexcept
      {
	_Node_iterator __tmp(*this);
	this->_M_incr();
	return __tmp;//自身++,返回未++前的迭代器
      }
    };
  • 继承自_Node_iterator_base的类_Node_const_iterator:
  /// Node const_iterators, used to iterate through all the hashtable.
  template<typename _Value, bool __constant_iterators, bool __cache>
    struct _Node_const_iterator
    : public _Node_iterator_base<_Value, __cache>
    {
    private:
      using __base_type = _Node_iterator_base<_Value, __cache>;
      using __node_type = typename __base_type::__node_type;

    public:
      typedef _Value					value_type;
      typedef std::ptrdiff_t				difference_type;
      typedef std::forward_iterator_tag			iterator_category;

      typedef const _Value*				pointer;
      typedef const _Value&				reference;

      _Node_const_iterator() noexcept
      : __base_type(0) { }

      explicit
      _Node_const_iterator(__node_type* __p) noexcept
      : __base_type(__p) { }

      _Node_const_iterator(const _Node_iterator<_Value, __constant_iterators,
			   __cache>& __x) noexcept
      : __base_type(__x._M_cur) { }

      reference
      operator*() const noexcept
      { return this->_M_cur->_M_v(); }

      pointer
      operator->() const noexcept
      { return this->_M_cur->_M_valptr(); }

      _Node_const_iterator&
      operator++() noexcept
      {
	this->_M_incr();
	return *this;
      }

      _Node_const_iterator
      operator++(int) noexcept
      {
	_Node_const_iterator __tmp(*this);
	this->_M_incr();
	return __tmp;
      }
    };

hashtable的内部结构

hashtable定义如下:
去掉了很多内容,只关注于里面少数有助于理解的部分

template<typename _Key, typename _Value, typename _Alloc,
	   typename _ExtractKey, typename _Equal,
	   typename _H1, typename _H2, typename _Hash,
	   typename _RehashPolicy, typename _Traits>
    class _Hashtable
    : public __detail::_Hashtable_base<_Key, _Value, _ExtractKey, _Equal,
				       _H1, _H2, _Hash, _Traits>,
      public __detail::_Map_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
				 _H1, _H2, _Hash, _RehashPolicy, _Traits>,
      public __detail::_Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal,
			       _H1, _H2, _Hash, _RehashPolicy, _Traits>,
      public __detail::_Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
				    _H1, _H2, _Hash, _RehashPolicy, _Traits>,
      public __detail::_Equality<_Key, _Value, _Alloc, _ExtractKey, _Equal,
				 _H1, _H2, _Hash, _RehashPolicy, _Traits>,
      private __detail::_Hashtable_alloc<
	__alloc_rebind<_Alloc,
		       __detail::_Hash_node<_Value,
					    _Traits::__hash_cached::value>>>
    {
 ...
       using __node_base = typename __hashtable_alloc::__node_base;
       using __bucket_type = typename __hashtable_alloc::__bucket_type;
....//hashtable_policy.h中1998行可以看出
    //  using __node_base = __detail::_Hash_node_base;
    //  using __bucket_type = __node_base*;      
      
    private:
 //这里的二重指针_M_buckets要理解为指向动态指针数组的指针,就跟vector<__node_base*>差不多了
      __bucket_type*		_M_buckets		= &_M_single_bucket;//__node_base** _M_buckets是指向__node_base节点指针的指针
      size_type			_M_bucket_count		= 1;//桶的个数
      __node_base		_M_before_begin;
      size_type			_M_element_count	= 0;//元素个数
      _RehashPolicy		_M_rehash_policy; //resh策略

      // A single bucket used when only need for 1 bucket. Especially
      // interesting in move semantic to leave hashtable with only 1 bucket
      // which is not allocated so that we can have those operations noexcept
      // qualified.
      // Note that we can't leave hashtable with 0 bucket without adding
      // numerous checks in the code to avoid 0 modulus.
      __bucket_type		_M_single_bucket	= nullptr;//__node_base* _M_single_bucket;指向__node_base的指针
     
 ....

}
  • 构造函数_Hashtable() 与析构函数~_Hashtable()
   // Constructor, destructor, assignment, swap
      _Hashtable() = default;
      _Hashtable(size_type __bkt_count_hint,
		 const _H1&, const _H2&, const _Hash&,
		 const _Equal&, const _ExtractKey&,
		 const allocator_type&);

      template<typename _InputIterator>
	_Hashtable(_InputIterator __first, _InputIterator __last,
		   size_type __bkt_count_hint,
		   const _H1&, const _H2&, const _Hash&,
		   const _Equal&, const _ExtractKey&,
		   const allocator_type&);

      _Hashtable(const _Hashtable&);

      _Hashtable(_Hashtable&&) noexcept;

      _Hashtable(const _Hashtable&, const allocator_type&);

      _Hashtable(_Hashtable&&, const allocator_type&);

      // Use delegating constructors.
      explicit
      _Hashtable(const allocator_type& __a)
      : __hashtable_alloc(__node_alloc_type(__a))
      { }

      explicit
      _Hashtable(size_type __bkt_count_hint,
		 const _H1& __hf = _H1(),
		 const key_equal& __eql = key_equal(),
		 const allocator_type& __a = allocator_type())
      : _Hashtable(__bkt_count_hint, __hf, _H2(), _Hash(), __eql,
		   __key_extract(), __a)
      { }

      template<typename _InputIterator>
	_Hashtable(_InputIterator __f, _InputIterator __l,
		   size_type __bkt_count_hint = 0,
		   const _H1& __hf = _H1(),
		   const key_equal& __eql = key_equal(),
		   const allocator_type& __a = allocator_type())
	: _Hashtable(__f, __l, __bkt_count_hint, __hf, _H2(), _Hash(), __eql,
		     __key_extract(), __a)
	{ }

      _Hashtable(initializer_list<value_type> __l,
		 size_type __bkt_count_hint = 0,
		 const _H1& __hf = _H1(),
		 const key_equal& __eql = key_equal(),
		 const allocator_type& __a = allocator_type())
      : _Hashtable(__l.begin(), __l.end(), __bkt_count_hint,
		   __hf, _H2(), _Hash(), __eql,
		   __key_extract(), __a)
      { }

      ~_Hashtable() noexcept;

分析其中一个构造函数
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable.h
990行,

template<typename _Key, typename _Value,
	   typename _Alloc, typename _ExtractKey, typename _Equal,
	   typename _H1, typename _H2, typename _Hash, typename _RehashPolicy,
	   typename _Traits>
    _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
	       _H1, _H2, _Hash, _RehashPolicy, _Traits>::
    _Hashtable(size_type __bkt_count_hint,
	       const _H1& __h1, const _H2& __h2, const _Hash& __h,
	       const _Equal& __eq, const _ExtractKey& __exk,
	       const allocator_type& __a)
    : _Hashtable(__h1, __h2, __h, __eq, __exk, __a)
    {
    //__bkt_count = 大于等于__bkt_count_hint的第一个素数
      auto __bkt_count = _M_rehash_policy._M_next_bkt(__bkt_count_hint);
      if (__bkt_count > _M_bucket_count)//桶的个数不够,需要扩容
	{
	  _M_buckets = _M_allocate_buckets(__bkt_count);//扩容后的新桶链
	  _M_bucket_count = __bkt_count;//扩容后桶个数
	}
    }

分析其中另外一个基于迭代器范围的构造函数
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/hashtable.h
1010行,

  template<typename _Key, typename _Value,
	   typename _Alloc, typename _ExtractKey, typename _Equal,
	   typename _H1, typename _H2, typename _Hash, typename _RehashPolicy,
	   typename _Traits>
    template<typename _InputIterator>
      _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
		 _H1, _H2, _Hash, _RehashPolicy, _Traits>::
      _Hashtable(_InputIterator __f, _InputIterator __l,
		 size_type __bkt_count_hint,
		 const _H1& __h1, const _H2& __h2, const _Hash& __h,
		 const _Equal& __eq, const _ExtractKey& __exk,
		 const allocator_type& __a)
      : _Hashtable(__h1, __h2, __h, __eq, __exk, __a)
      {
	auto __nb_elems = __detail::__distance_fw(__f, __l);//迭代器范围内的元素数目
	//考虑到有装填因子存在,
	//_M_rehash_policy._M_bkt_for_elements(__nb_elems)内部是
	//__builtin_ceill(__nb_elems / (long double)_M_max_load_factor);
	//其实就是按装填因子_M_max_load_factor来扩大
	//最后__bkt_count是一个大于等于max( __builtin_ceill(__nb_elems / (long double)_M_max_load_factor)  ,  __bkt_count_hint)的第一个素数
	auto __bkt_count =
	  _M_rehash_policy._M_next_bkt(
	    std::max(_M_rehash_policy._M_bkt_for_elements(__nb_elems),
		     __bkt_count_hint));

	if (__bkt_count > _M_bucket_count)//需要扩容
	  {
	    _M_buckets = _M_allocate_buckets(__bkt_count);//扩容后的新桶链
	    _M_bucket_count = __bkt_count;//扩容后桶个数
	  }

	for (; __f != __l; ++__f)
	  this->insert(*__f);//依次插入元素
      }
  • 插入节点的函数
    _M_insert_bucket_begin(size_type __bkt, __node_type* __node);
    将指针__node指向的节点插入到__bkt号桶中
template<typename _Key, typename _Value,
	   typename _Alloc, typename _ExtractKey, typename _Equal,
	   typename _H1, typename _H2, typename _Hash, typename _RehashPolicy,
	   typename _Traits>
    void
    _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
	       _H1, _H2, _Hash, _RehashPolicy, _Traits>::
    _M_insert_bucket_begin(size_type __bkt, __node_type* __node)
    {
      if (_M_buckets[__bkt])//__bkt号桶非空
	{
	  __node->_M_nxt = _M_buckets[__bkt]->_M_nxt;//链表头插入
	  _M_buckets[__bkt]->_M_nxt = __node;
	}
      else  //__bkt号桶空
	{
	  // The bucket is empty, the new node is inserted at the
	  // beginning of the singly-linked list and the bucket will
	  // contain _M_before_begin pointer.
	  
	  //前面定义using __node_base = __detail::_Hash_node_base;
	  //__node_base  	_M_before_begin;
	  /*将_Hash_node_base的定义摘抄如下:
	  struct _Hash_node_base
      {
          _Hash_node_base* _M_nxt;
          _Hash_node_base() noexcept : _M_nxt() { }
          _Hash_node_base(_Hash_node_base* __next) noexcept : _M_nxt(__next) { }
      };
     */
     //可以把_M_before_begin想象成一个head节点,_M_before_begin的下一个节点就是桶链的第一个节点???
    // 这样看待的话,下面的操作感觉没法解释
     //没看懂这里在干啥?__bkt号桶空,为啥不直接插入,这里不知道在干啥?
	  __node->_M_nxt = _M_before_begin._M_nxt;
	  _M_before_begin._M_nxt = __node;
	  if (__node->_M_nxt)
	    // We must update former begin bucket that is pointing to  _M_before_begin.
	    /* _M_next()的函数体摘抄如下:
	    _Hash_node* _M_next() const noexcept
             { return static_cast<_Hash_node*>(this->_M_nxt); }
           */
      
	    _M_buckets[_M_bucket_index(__node->_M_next())] = __node;
	  _M_buckets[__bkt] = &_M_before_begin;
	}
    }

那下面看看_M_bucket_index函数

  • _M_bucket_index
    根据参数获取桶号(index)
std::size_t
      _M_bucket_index(const _Key&, __hash_code __c,
		      std::size_t __bkt_count) const
      { return _M_h2()(__c, __bkt_count); }

传入_Key的 __hash_code和std::size_t __bkt_count后直接用除留余数法算桶号,

_Mod_range_hashing,定义如下:

 /// Default range hashing function: use division to fold a large number
  /// into the range [0, N).
  struct _Mod_range_hashing
  {
    typedef std::size_t first_argument_type;
    typedef std::size_t second_argument_type;
    typedef std::size_t result_type;

    result_type
    operator()(first_argument_type __num,
	       second_argument_type __den) const noexcept
    { return __num % __den; }
  };
      std::size_t
      _M_bucket_index(const __node_type* __p, std::size_t __bkt_count) const
	noexcept( noexcept(declval<const _H2&>()((__hash_code)0,
						 (std::size_t)0)) )
	/*
    从_Hash_code_base的定义中可知
    typedef _Hash_node<_Value, false>			__node_type;
    所以const __node_type* __p就是_Hash_node<_Value, false>*__p;
    可是_Hash_node<_Value, false>的定义中并没有 _M_hash_code成员呀!
    只有_Hash_node<_Value, true>中才有 _M_hash_code成员
    */
      { return _M_h2()(__p->_M_hash_code, __bkt_count); }

//上面的_M_bucket_index感觉不会被调用,下面还找到了一份_M_bucket_index函数定义。
     std::size_t
      _M_bucket_index(const __node_type* __p, std::size_t __bkt_count) const
	noexcept( noexcept(declval<const _Hash&>()(declval<const _Key&>(),
						   (std::size_t)0)) )
      { return _M_ranged_hash()(_M_extract()(__p->_M_v()), __bkt_count); }
      //__p->_M_v()返回节点中Value的引用
      /*
      const _ExtractKey&
          _M_extract() const { return __ebo_extract_key::_M_cget(); }
     */

这里没咋看懂,有兴趣的看看参考博文https://github.com/Light-City/CPlusPlusThings/blob/master/src_analysis/stl/hashtable.md
反正上述bucket获取桶的index对应的数学表达式就是:

h(k,hash(k))=h(k,hash(k),n)=h(k,hash(k)%n)

就是先由给定的hash函数去计算key的hash值,然后用除留余数法将hash值约束在[0, N)
实际上就是最终的:

hash(k)%n

这个就是桶的index计算。

  • 下面摘录的_Hash_node<_Value, false>的定义
 /**
   *  Specialization for nodes with caches, struct _Hash_node.
   *
   *  Base class is __detail::_Hash_node_value_base.
   */
  template<typename _Value>
    struct _Hash_node<_Value, true> : _Hash_node_value_base<_Value>
    {
      std::size_t  _M_hash_code;

      _Hash_node*
      _M_next() const noexcept
      { return static_cast<_Hash_node*>(this->_M_nxt); }
    };


/**
   *  Specialization for nodes without caches, struct _Hash_node.
   *
   *  Base class is __detail::_Hash_node_value_base.
   */
  template<typename _Value>
    struct _Hash_node<_Value, false> : _Hash_node_value_base<_Value>
    {
      _Hash_node*
      _M_next() const noexcept
      { return static_cast<_Hash_node*>(this->_M_nxt); }
    };

待续

  • 其他的成员函数

      // hash_function, if present, comes from _Hash_code_base.

      // Bucket operations
      size_type
      bucket_count() const noexcept
      { return _M_bucket_count; }
      
      size_type
      max_bucket_count() const noexcept
      { return max_size(); }
      
      size_type
      bucket_size(size_type __bkt) const
      { return std::distance(begin(__bkt), end(__bkt)); }
 
      float
      load_factor() const noexcept
      {
	return static_cast<float>(size()) / static_cast<float>(bucket_count());
      }

    protected:
      // Bucket index computation helpers.
      size_type
      _M_bucket_index(__node_type* __n) const noexcept
      { return __hash_code_base::_M_bucket_index(__n, _M_bucket_count); }

      size_type
      _M_bucket_index(const key_type& __k, __hash_code __c) const
      { return __hash_code_base::_M_bucket_index(__k, __c, _M_bucket_count); }

      // Find and insert helper functions and types
      // Find the node before the one matching the criteria.
      __node_base*
      _M_find_before_node(size_type, const key_type&, __hash_code) const;

      // Insert with hint, not used when keys are unique.
      template<typename _Arg, typename _NodeGenerator>
	iterator
	_M_insert(const_iterator, _Arg&& __arg,
		  const _NodeGenerator& __node_gen, true_type __uk)
	{
	  return
	    _M_insert(std::forward<_Arg>(__arg), __node_gen, __uk).first;
	}
	
      // Erase
      iterator
      erase(const_iterator);

总结

待续

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值