glib GHashTable 哈希表代码研究

最新推荐文章于 2022-03-13 00:12:45 发布

steadfast

最新推荐文章于 2022-03-13 00:12:45 发布

阅读量2.3k

点赞数

分类专栏：数据结构文章标签： table null insert structure struct 数据结构

本文链接：https://blog.csdn.net/steadfast/article/details/5442600

版权

数据结构专栏收录该内容

0 篇文章 0 订阅

订阅专栏

一、哈希表介绍
哈希表与线性表的区别在于查找效率。线性表查找的时间复杂度通常为O(n)，二分查找则达到了O(logn)，哈希表可达到O(1)，速度大为提高。这是一种以空间换时间的方法。那么哈希表是怎么做到的呢？对于线性表的查找通常是使用遍历的方法，使用二分查找构造时要麻烦一些。哈希表则使用了哈希值（或哈希值求模）来作为数组下标来直接定位，从而实现了高效率。

二、glib哈希表的实现
glib的哈希表的所有实现代码在源码目录下glib/ghash.c文件中，它实现了对任意类型key和任意类型value的存取。这里参考了glib-2.16的代码。

1、数据结构
struct _GHashNode
{
gpointer   key;         // key值，可以是任意类型
gpointer   value;       // key对应的值
GHashNode *next;    // 指向下个节点
guint      key_hash;   // hash_func(key)得到的uint值
};

struct _GHashTable
{
gint             size;                   // 哈希表大小
gint             nnodes;              // 哈希表节点数量，即总共有多少对key/value
GHashNode      **nodes;      // 节点链表指针数组
GHashFunc        hash_func;   // 哈希函数，由使用者提供
GEqualFunc       key_equal_func;    // 当key_hash相同时用这个判断key是否相等
volatile gint    ref_count;
#ifndef G_DISABLE_ASSERT
/*
   * Tracks the structure of the hash table, not its contents: is only
   * incremented when a node is added or removed (is not incremented
   * when the key or data of a node is modified).
   */
int              version;
#endif
GDestroyNotify   key_destroy_func;        // 释放key的函数
GDestroyNotify   value_destroy_func;      // 释放value的函数
};

2、新建
hash_func和key_equal_func由调用者提供，也可以为空。如果hash_func为空会使用g_direct_hash方法来替代，实际上是用指针作为key值；如果key_equal_func为空则使用“==”来代替，实质上是指针比较，如整型值保存为指针就可不提供 key_equal_func方法。
key_destroy_func和value_destroy_func通常是需要的，g_hash_table_destroy或g_hash_table_remove时会调用这两个函数。当然，如果哈希表中的key和value指向的值不需要释放或在其他地方管理就不需要了。
GHashTable*
g_hash_table_new_full (GHashFunc       hash_func,
                       GEqualFunc      key_equal_func,
                       GDestroyNotify key_destroy_func,
                       GDestroyNotify value_destroy_func)
{
GHashTable *hash_table;

hash_table = g_slice_new (GHashTable);
hash_table->size               = HASH_TABLE_MIN_SIZE;    // MIN_SIZE为11
hash_table->nnodes             = 0;
hash_table->hash_func          = hash_func ? hash_func : g_direct_hash;
hash_table->key_equal_func     = key_equal_func;
hash_table->ref_count          = 1;
#ifndef G_DISABLE_ASSERT
hash_table->version            = 0;
#endif
hash_table->key_destroy_func   = key_destroy_func;
hash_table->value_destroy_func = value_destroy_func;
hash_table->nodes              = g_new0 (GHashNode*, hash_table->size);

return hash_table;
}

3、插入
哈希表是不允许key值有重复的，如果遇到重复的情况会根据keep_new_key来决定保存原值还是新值。在g_hash_table_insert() 里keep_new_key为TRUE值，即销毁老数据，g_hash_table_replace()里则keep_new_key为FALSE。
static void
g_hash_table_insert_internal (GHashTable *hash_table,
                              gpointer    key,
                              gpointer    value,
                              gboolean    keep_new_key)
{
GHashNode **node_ptr, *node;
guint key_hash;

g_return_if_fail (hash_table != NULL);
g_return_if_fail (hash_table->ref_count > 0);

node_ptr = g_hash_table_lookup_node (hash_table, key, &key_hash);

if ((node = *node_ptr))
    {
      if (keep_new_key)
        {
          if (hash_table->key_destroy_func)
            hash_table->key_destroy_func (node->key);
          node->key = key;
        }
      else
        {
          if (hash_table->key_destroy_func)
            hash_table->key_destroy_func (key);
        }

      if (hash_table->value_destroy_func)
        hash_table->value_destroy_func (node->value);

      node->value = value;
    }
else
    {
      node = g_slice_new (GHashNode);

      node->key = key;
      node->value = value;
      node->key_hash = key_hash;
      node->next = NULL;

      *node_ptr = node;
      hash_table->nnodes++;
      g_hash_table_maybe_resize (hash_table);

#ifndef G_DISABLE_ASSERT
      hash_table->version++;
#endif
    }
}

4、删除
先查找key值对应的节点，如果未找到则直接返回，找到则删除该节点。
static gboolean
g_hash_table_remove_internal (GHashTable    *hash_table,
                              gconstpointer key,
                              gboolean       notify)
{
GHashNode **node_ptr;

g_return_val_if_fail (hash_table != NULL, FALSE);

node_ptr = g_hash_table_lookup_node (hash_table, key, NULL);
if (*node_ptr == NULL)
    return FALSE;

g_hash_table_remove_node (hash_table, &node_ptr, notify);
g_hash_table_maybe_resize (hash_table);

#ifndef G_DISABLE_ASSERT
hash_table->version++;
#endif

return TRUE;
}

5、查找
先用hash_func(key)得到key的哈希值，用这个哈希值对size求模来找到对应哈希值的节点链表头。然后再用key_equal_func比较key后找到节点位置。
static inline GHashNode **
g_hash_table_lookup_node (GHashTable    *hash_table,
                          gconstpointer key,
                          guint         *hash_return)
{
GHashNode **node_ptr, *node;
guint hash_value;

hash_value = (* hash_table->hash_func) (key);
node_ptr = &hash_table->nodes[hash_value % hash_table->size];

if (hash_return)
    *hash_return = hash_value;

/* Hash table lookup needs to be fast.
   * We therefore remove the extra conditional of testing
   * whether to call the key_equal_func or not from
   * the inner loop.
   *
   * Additional optimisation: first check if our full hash
   * values are equal so we can avoid calling the full-blown
   * key equality function in most cases.
   */
if (hash_table->key_equal_func)
    {
      while ((node = *node_ptr))
        {
          if (node->key_hash == hash_value &&
              hash_table->key_equal_func (node->key, key))
            break;

          node_ptr = &(*node_ptr)->next;
        }
    }
else
    {
      while ((node = *node_ptr))
        {
          if (node->key == key)
            break;

          node_ptr = &(*node_ptr)->next;
        }
    }

return node_ptr;
}

6、调整大小
GHashTable结构中的size是哈希表的大小，nnodes是节点数量（即哈希表中有多少对值）。如果size大于等于3倍nnodes或nnodes大于3倍size，哈希表就会重新构造。重构会带来不小的开销，所有节点的 key_hash都要重新生成，因此选择一个好的hash_func很重要。
static inline void
g_hash_table_maybe_resize (GHashTable *hash_table)
{
gint nnodes = hash_table->nnodes;
gint size = hash_table->size;

if ((size >= 3 * nnodes && size > HASH_TABLE_MIN_SIZE) ||
      (3 * size <= nnodes && size < HASH_TABLE_MAX_SIZE))
    g_hash_table_resize (hash_table);
}

steadfast

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
glib GHashTable 哈希表代码研究

一、哈希表介绍哈希表与线性表的区别在于查找效率。线性表查找的时间复杂度通常为O(n)，二分查找则达到了O(logn)，哈希表可达到O(1)，速度大为提高。这是一种以空间换时间的方法。那么哈希表是怎么做到的呢？对于线性表的查找通常是使用遍历的方法，使用二分查找构造时要麻烦一些。哈希表则使用了哈希值（或哈希值求模）来作为数组下标来直接定位，从而实现了高效率。
复制链接

扫一扫