深入了解哈希表操作（开放定址法-拉链法）

最新推荐文章于 2024-08-16 09:16:07 发布

置顶 Triorwy

最新推荐文章于 2024-08-16 09:16:07 发布

阅读量1.1k

点赞数

分类专栏：数据结构与算法文章标签：哈希表开放地址法拉链法基本操作

本文链接：https://blog.csdn.net/triorwy/article/details/79465406

版权

数据结构与算法专栏收录该内容

9 篇文章 0 订阅

订阅专栏

解决哈希冲突两种常见的方法是：闭散列和开散列

闭散列

闭散列：也叫开放地址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，那么可以把key存放到表中
“下一个” 空位中去
用线性探测来寻找下一个空余位置？
设关键码集合为{37, 25, 14, 36, 49, 68, 57, 11}，散列表的大小初始值定为11，假设哈希函数为：Hash(x) = x %p，（p = 11，是最接近m的质数），就有：
Hash(37) = 4
Hash(25) = 3
Hash(14) = 3
Hash(36) = 3
Hash(49) = 5
Hash(68) = 2
Hash(57) = 2
Hash(11) = 0
其中25，14，36以及68，57发生哈希冲突，一旦冲突必须要找出下一个空余位置
线性探测找的处理为：从发生冲突的位置开始，依次继续向后探测，当探测到表尾时，之前还有位置没有探测，那么可以从头开始再探测，直到找到空位置为止
这里写图片描述

负载因子：
散列表的载荷因子定义为: α= 填入表中的元素个数/散列表的长度
α是散列表装满程度的标志因子。由于表长是定值,α与“填入表中的
元素个数”成正比，所以，α越大，表明填入表中的元素越多,
产生冲突的可能性就越大; 反之，α越小，标明填入表中的元素越少，
产生冲突的可能性就越小。实际上，散列表的平均查找长度是载荷因子α的函数
只是不同处理冲突的方法有不同的函数。对于开放定址法，荷载因子
是特别重要因素，应严格限制在0.7-0.8以下。超过0.8
查表时的CPU缓存不命中(cachemissing) 按照指数曲线上升。
因此，一些采用开放定址法的hash库，如Java的系统库限制了荷载因子为0.75
超过此值将resize散列表。

void HashTableInit(HashTable *ht,size_t size)//初始化
{
    ht->_size = 0;
    ht->N = size;
    ht->_table = (HashNode *)malloc(sizeof(HashNode)*(ht->N));
    assert(ht->_table);
    for (size_t i = 0; i < ht->N; i++)//初始化状态
    {
        ht->_table[i]._status = Empty;
    }
}
size_t GetNextPrimeNum(size_t cur)// 使用素数表对齐做哈希表的容量，降低哈希冲突，ul代表这些值得类型
{
    static const unsigned long _PrimeList[28] =
    {
        53ul, 97ul, 193ul, 389ul, 769ul,
        1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
        49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
        1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
        50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
        1610612741ul, 3221225473ul, 4294967291ul
    };
    for (int i = 0; i < 28; i++)
    {
        if (cur < _PrimeList[i])//如果当前容量小于数组中第一个值，则将第一个值赋给cur
        {
            cur = _PrimeList[i];
            return cur;
        }
    }
    return _PrimeList[27];
}
size_t HashFunc(KeyType key, size_t N)
{
    return key%N;//返回存储在数组里位置的下标
}
int HashTableInsert(HashTable *ht, KeyType key, ValueType value)//插入
{
    if (10 * ht->_size / ht->N > 7)//超过负载因子，扩容
    {
        size_t newN = GetNextPrimeNum(ht->N);//得到新的扩容大小
        HashTable Newht;//创建新的结构体
        HashTableInit(&Newht, newN);
        for (size_t i = 0; i < ht->_size; i++)
        {
            //将原来结构体中存储的数字重新排布在新的结构体里
            HashTableInsert(&Newht, ht->_table[i]._key, ht->_table[i]._value);
        }
        free(ht->_table);//释放掉原来结构体里数组的内容
        ht->N = newN;//将新值赋给原来结构体
        ht->_table = Newht._table;

    }
    size_t Index = HashFunc(key, ht->N);//得到存储数字的下标
    while (ht->_table[Index]._status == Exits)//解决哈希冲突，使用开放地址法
    {//当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，
        //那么可以把key存放到表中“下一个” 空位中去
        if (ht->_table[Index]._key == key)
        {
            return -1;
        }
        Index++;
        if (Index > ht->N)
        {
            Index = 0;
        }
    }
    ht->_table[Index]._key = key;
    ht->_table[Index]._value = value;
    ht->_table[Index]._status = Exits;
    ht->_size++;
    return 0;
}
void HashTablePrint(HashTable *ht)//打印函数
{
    assert(ht);
    for (size_t i = 0; i < ht->N; i++)
    {
        if (ht->_table[i]._status == Exits)
        {
            printf("[%d]->Exits->%d\n", i,ht->_table[i]._key);
        }
        else if (ht->_table[i]._status == Delete)
        {
            printf("[%d]->Delte\n", i);
        }
        else
        {
            printf("[%d]->Empty\n", i);
        }
    }
    printf("\n");

}
int HashTableRemove(HashTable *ht, KeyType key)//删除
{
    assert(ht);
    if (HashTableFind(ht, key) != NULL)//找到后将下标改为Delete，代表删除
    {

        HashTableFind(ht, key)->_status = Delete;
        return 0;
    }
    return -1;
}
HashNode* HashTableFind(HashTable *ht, KeyType key)//查找
{
    assert(ht);
    size_t Index = HashFunc(key, ht->N);
    if (key == ht->_table[Index]._key)
    {
        return &(ht->_table[Index]);//找到返回该节点
    }
    else
    {
        for (size_t i = 1; i <= ht->N; i++)
        {
            Index ++;//找下一个下标是否为该值
            if (Index > ht->N)//如果下标超过数组总大小，从头开始
            {
                Index = 0;
            }
            if (key == ht->_table[Index]._key)
            {
                return &(ht->_table[Index]);
            }
            if (Empty == ht->_table[Index]._status)//如果状态为空表示没有
            {
                return NULL;

            }
        }
        return NULL;
    }
}
int HashTableDestory(HashTable *ht)//释放销毁
{
    free(ht->_table);
    ht->N = 0;
    ht->_size = 0;
    ht->_table = NULL;
    return 0;
}

开散列
开散列法又叫拉链地址法(开链法)。
开散列方法的一种简单形式是把散列表中的每个桶定义为一个链表的表头。散列到一个特定桶的所有记录都放到这个桶的链表中。
例：一个开散列的散列表，这个表中每一个桶存储一个记录和一个指向链表其余部分的指针，设元素的关键码为37, 25, 14, 36, 49, 68, 57, 11, 散列函数为Hash(x) = x % 11
Hash(37)=4
Hash(25)=3
Hash(14)=3
Hash(36)=3
Hash(49)=5
Hash(68)=2
Hash(57)=2
Hash(11)=0
使用哈希函数计算出每个元素所在的桶号，同一个桶的链表中存放哈希冲突的元素

通常，每个桶对应的链表结点都很少，将n个关键码通过某一个散列函数，存放到散列表中的m个桶中，那么每一个桶中链表的平均
长度为。以搜索平均长度为的链表代替了搜索长度为 n 的顺序表，搜索效率快的多。
应用拉链地址法处理溢出，需要增设链接指针，似乎增加了存储开销。事实上：由于开地址法必须保持大量的空闲空间以确保搜索效率，如二次探查法要求装载因子a <= 0.7，而表项所占空间又比指针大的多，
所以使用链地址法反而比开地址法节省存储空间

#include"Hashtable.h"

size_t GetNextPrimeNum(size_t cur)// 使用素数表对齐做哈希表的容量，降低哈希冲突,ul代表这些值得类型
{
    static const unsigned long _PrimeList[28] =
    {
        53ul, 97ul, 193ul, 389ul, 769ul,
        1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
        49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
        1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
        50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
        1610612741ul, 3221225473ul, 4294967291ul
    };
    for (int i = 0; i < 28; i++)
    {
        if (cur < _PrimeList[i])
        {
            cur = _PrimeList[i];
            return cur;
        }
    }
    return _PrimeList[27];
}
size_t HashFunc(KeyType key, size_t N)
{
    return key%N;//返回存储在数组里位置的下标
}
void HashTableInit(HashTable *ht)
{
    ht->_size = 0;
    ht->N = 11;
    //这块强转时用二级指针，因为结构体里定义为HashNode** _tables;
    ht->_tables = (HashNode**)malloc(sizeof(HashNode*)*ht->N);
    assert(ht->_tables);
    for (size_t i = 0; i < ht->N; i++)
    {
        ht->_tables[i] = NULL;//将数组里都初始化为NULL
    }
}
HashNode* BuyHashNode(KeyType key, ValueType value)
{
    HashNode *node = (HashNode*)malloc(sizeof(HashNode));
    assert(node);
    node->_key = key;
    node->_value = value;
    return node;
}
int HashTableInsert(HashTable *ht, KeyType key, ValueType value)//插入
{
    if (10 * ht->_size / ht->N > 7)//超过负载因子，扩容
    {
        HashTable Newht;//重新创一个新的结构体变量
        size_t newN = GetNextPrimeNum(ht->N);//得到新的扩容后的容量
        Newht._tables = (HashNode**)malloc(sizeof(HashNode*)*newN);
        assert(Newht._tables);
        memset(Newht._tables, 0, sizeof(HashNode*)*newN);//初始化新结构体
        for (size_t i = 0; i < ht->N; i++)
        {
            if (ht->_tables[i] != NULL)
            {
                HashNode *cur = ht->_tables[i];//得到链表表头节点的地址

                while (cur)
                {
                    HashNode *next = cur->_next;//保存下一个节点

                    size_t Index = HashFunc(cur->_key, newN);//重新计算原表中的值在新表中的位置
                    //将原表头结点的下一个（即原链表插入的第一个值）指向新表头结点
                    cur->_next = Newht._tables[Index];
                    Newht._tables[Index] = cur;//原表头结点赋给新表头节点实现原表值映射到新表中
                    cur = next;

                }

            }

        }
        free(ht->_tables);//释放原表
        ht->N = newN;//将新表扩容后值赋给原表
        ht->_tables = Newht._tables;//新表重新赋给原表
    }

    size_t Index = HashFunc(key,ht->N);
    HashNode *next = NULL;
    HashNode *tmp = NULL;
    tmp = ht->_tables[Index];//得到链表表头节点的地址
    if (tmp)
    {
        while (tmp)//头结点不为空
        {
            if (key == tmp->_key)//插入时发现与插入值相等
            {
                return -1;//返回-1
            }
            tmp = tmp->_next;
        }
    }
    HashNode* cur = BuyHashNode(key, value);//得到插入值得新节点
    cur->_next = ht->_tables[Index];//新节点的下一个指向链表头节点（即插入值得第一个节点）头插
    ht->_tables[Index] =cur;//将新节点赋给链表头结点
    ht->_size++;
    return 0;
}
void HashtablePrint(HashTable *ht)//打印
{
    for (size_t i = 0; i < ht->N; i++)
    {
        printf("[%d]->",i);
        HashNode* node = ht->_tables[i];
        while (node)
        {
            printf("%d->", node->_key);
            node = node->_next;
        }
        printf("NULL\n");
    }

}
HashNode* HashTableFind(HashTable* ht, KeyType key)//查找
{
    assert(ht);
    size_t Index = HashFunc(key, ht->N);
    HashNode *cur = ht->_tables[Index];
    while (cur)
    {
        if (key == cur->_key)
        {
            return cur;
        }
        cur = cur->_next;
    }
    return NULL;

}
int HashTableRemove(HashTable* ht, KeyType key)//删除
{
    assert(ht);
    if (HashTableFind(ht, key) != NULL)
    {
        size_t Index = HashFunc(key, ht->N);
        HashNode* cur = ht->_tables[Index];
        HashNode* prev = NULL;
        while (cur)
        {

            HashNode *next = cur->_next;//保存下一个节点
            if (key == ht->_tables[Index]->_key)//如果头结点就是要删除的节点
            {
                ht->_tables[Index] = next;//将下一个节点直接赋给头结点
                ht->_size--;
                free(cur);
                cur = NULL;
                return 0;
            }
             if (key == cur->_key)//删除节点不是头结点
            {
                prev->_next = next->_next;//将前一个节点与当前节点的下一个节点链接
                ht->_size--;
                free(cur);//释放当前节点
                cur = NULL;
                return 0;
            }
            prev = cur;
            cur = next;
        }
        return -1;
    }
    return -1;
}
void HashTableDestory(HashTable* ht)//销毁
{
    free(ht->_tables);
    ht->_tables = NULL;
    ht->_size = 0;
    ht->N = 0;
}