算法学习Day1——C之哈希表

最新推荐文章于 2024-05-06 03:09:53 发布

郑烯烃快去学习

最新推荐文章于 2024-05-06 03:09:53 发布

阅读量1.4k

点赞数 3

文章标签：散列表算法哈希算法 c语言数据结构

本文链接：https://blog.csdn.net/Alkaid2000/article/details/123266012

版权

思想：使用哈希函数将键映射到存储桶。

想要插入一个新的键，哈希函数将决定该键该分配到哪个桶中，并将该键存储在相应的桶中。

想要搜索一个键的时候，哈希表使用哈希函数来查找相应的桶，并在特定的桶中进行搜索。

设计哈希函数：取决于键值的范围和桶的数量。完美的哈希函数可以使键和桶之间的一对一映射。在大多数情况下，哈希函数并不完美，会产生哈希冲突。
冲突解决：在同一个桶中存在了多个键。
如何解决冲突？
1. 链表：将相同的哈希值的元素都放入这一链表当中。
2. 开放地址：在h处产生冲突，则要从h出发找到下一个不冲突的位置。不断地检测h+1、h+2、h+3...
3. 再哈希：再使用另外一个哈希函数产生一个新的地址。

什么地方适用哈希表？

二分法；快速删除、添加、查找....

一、链地址法

设计哈希表的大小为base。开辟一个大小为base的数组，数组的每一个位置是一个链表。

为了尽可能避免冲突，应该将base取为一个质数。

在这里取base为769。

先声明一个链表：

struct List{
	int val;				//保存键值使用
	struct List *next;		//指向下一个
};

声明一个哈希函数，作用是求得余数后将其放入对应的数组中:

const int base = 769;
int hash(int key)
{
	return key%base;
}

写出一些方便调用的子函数：

添加值到数组元素的函数：

//@explain: struct传进来前面定义的结构体，x为要添加的变量
void listPush(struct List *head,int x)
{
    //分配空间
    struct List* tmp = malloc(sizeof(struct List));
    //将读取进来的值，存放在这个结构体的变量中
    tmp -> val = x;
    //指向下一个值
    tmp -> next = head -> next;
    //保存头指针使用
    head -> next = tmp;
}

删除链表中的值函数：

//@explain:struct要操作的结构体类型，x为要删除的变量
void listDelete(struct List*head,int x)
{
    //遍历数组中某一项的结构体
    for(struct List* it=head;it->next;it=it->next)
    {
        if(it->next->val==x)
        {
            //修改其指向的节点
            //保存当前指向的那个节点，为了下面的删除
            struct List*tmp = it->next;
            it->next = tmp->next;
            //释放其所在的内存，相当于删除元素
            free(tmp);
            break;
        }
    }
}

释放单个数组下标的空间：

void listFree(struct List*head)
{
	//当头头指向NULL时，说明已经释放完毕
	while(head->next)
	{
		struct List*tmp = head->next;
		head->next = tmp->next;
		free(tmp);
	}
}

检验键值是否在当前的数组元素中：

//@explain:struct 用来操作的结构体，x将要检验的值
bool listContains(struct List*head,int x)
{
    //遍历
	for(struct List*it = head;it->next;it = it->next)
	{
		if(it->next->val==x)
		{
			return true;
		}
	}
	return false;
}

以上的函数即可满足：添加、删除、检测，其实已经够哈希表的一些使用了。接下来构造结构体数组（相当于初始化哈希表）：

typedef struct{
	struct List*data;	//构造哈希结构体数组
}MyHashSet;

初始化：

为哈希表申请空间内存。
初始化数组，为数组申请base（数组长度）的空间。
将里面的val值初始化为0，指针next指向null值。

MyHashSet* myHashInit()
{
    //使用结构体，申请内存
	MyHashSet* ret = malloc(sizeof(MyHashSet));
    //为数组申请内存，array[base]
	ret -> data = malloc(sizeof(struct List)*base);
    //对数组中的所有元素进行初始化，指针的下一个元素需要为空指针
	for(int i=0;i<base;i++)
	{
		ret->data[i].val=0;
		ret->data[i].next=NULL;
	} 
	return ret;
}

构造哈希添加元素函数：

void myHashSetAdd(MyHashSet* obj,int key)
{
	int keyvalue = hash(key);
    //检测该行中是否存在与之重复的数字，防止重复
	if(!listContains(&(obj->data[h]),key))
	{
		listPush(&(obj->data[h]),key);
	}
}

构造哈希删除函数：

void myHashSetRemove(MyHashSet* obj,int key)
{
	int h=hash(key);
	listDelete(&(obj->data[h]),key);
}

构造检测函数：

bool myHashContains(MyHashSet* obj,int key)
{
	int h=hash(key);
	return listContains(&(obj->data[h]),key);
}

构造释放整个哈希表的函数，为了防止一直在占用内存：

void myHashSetFree(MyHashSet* obj)
{
	for(int i=0;i<base;i++)
	{
		listFree(&(obj->data[i]));
	}
	free(obj->data);
}

时间复杂度：O(n/b)。其中n为哈希表中的元素数量，b为链表数量。
空间复杂度：O(n+b)

对于整个函数：

struct List{
	int val;				//保存键值使用
	struct List *next;		//指向下一个
};


typedef struct {
    struct List*data;	//构造哈希结构体数组
} MyHashSet;

const int base = 769;
int hash(int key)
{
	return key%base;
}

//@explain: struct传进来前面定义的结构体，x为要添加的变量
void listPush(struct List *head,int x)
{
    //分配空间
    struct List* tmp = malloc(sizeof(struct List));
    //将读取进来的值，存放在这个结构体的变量中
    tmp -> val = x;
    //指向下一个值
    tmp -> next = head -> next;
    //保存头指针使用
    head -> next = tmp;
}

//@explain:struct要操作的结构体类型，x为要删除的变量
void listDelete(struct List*head,int x)
{
    //遍历数组中某一项的结构体
    for(struct List* it=head;it->next;it=it->next)
    {
        if(it->next->val==x)
        {
            //修改其指向的节点
            //保存当前指向的那个节点，为了下面的删除
            struct List*tmp = it->next;
            it->next = tmp-> next;
            //释放其所在的内存，相当于删除元素
            free(tmp);
            break;
        }
    }
}

//@explain:struct 用来操作的结构体，x将要检验的值
bool listContains(struct List*head,int x)
{
    //遍历
	for(struct List*it = head;it->next;it = it->next)
	{
		if(it->next->val==x)
		{
			return true;
		}
	}
	return false;
}

void listFree(struct List*head)
{
	//当头头指向NULL时，说明已经释放完毕
	while(head->next)
	{
		struct List*tmp = head->next;
		head->next = tmp->next;
		free(tmp);
	}
}

MyHashSet* myHashSetCreate() {
    //使用结构体，申请内存
	MyHashSet* ret = malloc(sizeof(MyHashSet));
    //为数组申请内存，array[base]
	ret -> data = malloc(sizeof(struct List)*base);
    //对数组中的所有元素进行初始化，指针的下一个元素需要为空指针
	for(int i=0;i<base;i++)
	{
		ret->data[i].val=0;
		ret->data[i].next=NULL;
	} 
	return ret;
}

void myHashSetAdd(MyHashSet* obj, int key) {
    int keyvalue = hash(key);
    //检测该行中是否存在与之重复的数字，防止重复
	if(!listContains(&(obj->data[keyvalue]),key))
	{
		listPush(&(obj->data[keyvalue]),key);
	}
}

void myHashSetRemove(MyHashSet* obj, int key) {
    int h=hash(key);
	listDelete(&(obj->data[h]),key);
}

bool myHashSetContains(MyHashSet* obj, int key) {
    int h=hash(key);
	return listContains(&(obj->data[h]),key);
}

void myHashSetFree(MyHashSet* obj) {
    for(int i=0;i<base;i++)
	{
		listFree(&(obj->data[i]));
	}
	free(obj->data);
}

/**
 * Your MyHashSet struct will be instantiated and called as such:
 * MyHashSet* obj = myHashSetCreate();
 * myHashSetAdd(obj, key);
 
 * myHashSetRemove(obj, key);
 
 * bool param_3 = myHashSetContains(obj, key);
 
 * myHashSetFree(obj);
*/

二、开放地址

其实本质就是申请一个很大很大空间的数组，当存在这个值的时候，返回1，如果没有就返回-1。

这种方法时间复杂度较低，但是看见复杂度很高。而链地址方法则时间复杂度较高，而空间复杂度较低。

申请一个存放了1000001元素的数组：

#define SIZE 1000001
typedef struct{
	int val[SIZE];
}MyHashSet;

初始化：

MyHashSet* myHashSetCreate()
{
	MyHashSet *ret;
	//申请内存空间
	ret = (MyHashSet*)malloc(sizeof(MyHashSet));
	//将所有元素初始化为0
	memset(ret->val,0,sizeof(int)*SIZE);
	return ret;
}

当需要添加元素时，就在相应的元素值的坐标中，置一。

void myHashSetAdd(MyHashSet *obj,int key)
{
	obj->val[key]=1;
}

如果要移出该值，那就将其置为-1。检测其是否存在，那就检测该位的值是1还是-1就可以了。

以上建立的即为哈希集合。它有什么用？

向哈希表中插入一个值key
返回是否表中是否存在这个值？
可以快速在数组中将该值删除，如果没有这个值那么什么都不做。

接下来看看哈希映射：

为什么要使用哈希映射？

当我们需要更多的信息时，而不仅仅是键。然后通过哈希映射建立密钥与信息之间的映射关系。

用在哪？

给定一个数组，要求返回两个数字的索引，让他们相加可以得到特定的目标值。

对于这道题，可以使用暴力解法可以解决：

int* twoSum(int* nums, int numsSize, int target, int* returnSize){

        for(int i=0;i<numsSize;++i)
        {
            for(int j=i+1;j<numsSize;++j)
            {
                if(nums[i]+nums[j]==target)
                {
                    int *ret = malloc(sizeof(int)*2);
                    ret[0]=i;
                    ret[1]=j;
                    //要注意函数块定义域的问题
                    *returnSize = 2;
                    return ret;
                    
                }
            }
        }
        * returnSize = 0;
        return NULL;

}

这种方法方便快捷，但是时间复杂度过高。所以可以使用哈希表来解决这个问题，能够快速寻找数组是否存在目标元素，如果存在那么就找出索引，找到target-x的时间复杂度从O(N)降低到O(1)。

思想：对于每一个x，首先先查询哈希表中是否存在target-x，然后将x插入到哈希表中，即可保证不会让x和自己匹配。

库<uthash.h>

关于这个库的知识可以在这篇博客中学到：开源库uthash第一弹uthash.h_小银子-CSDN博客_uthash.h

这个库所实现的hash表中可以提供类似于双向链表的操作，可以通过结构体hh.prev和hh.next当前节点的上一个节点或者下一个节点。

声明结构体hashTable：

struct hashTable{
	int key;	//键值
	int val;	//所在位置
	UT_hash_handle hh;
}

关于UT_hash_handle hh：

hh是内部使用hash处理句柄，在使用过程中，只需要在结构体中定义一个UT_hash_handle类型的变量即可，不需要为该句柄变量赋值，但必须在该结构体中定义该变量。

UT_hash_handle源码：

struct UT_hash_handle {
   struct UT_hash_table *tbl;
   void *prev;                       /* prev element in app order      */
   void *next;                       /* next element in app order      */
   struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
   struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
   const void *key;                  /* ptr to enclosing struct's key  */
   unsigned keylen;                  /* enclosing struct's key len     */
   unsigned hashv;                   /* result of hash-fcn(key)        */
} UT_hash_handle;

全局定义一个结构体数组：

struct hashTable* hashtable;

查找元素使用：HASH_FIND_INT():

//声明了一个返回值为hashTable类型指针的函数find，find函数的返回值为一个地址
struct hashTable* find(int key)
{
	struct hashTable* tmp;
	HASH_FIND_INT(hashtable,&key,tmp);
	return tmp;
}

HASH_FIND_INT用法：参数1：哈希表；参数2：要查找的键值的地址；参数3：哈希表结构的结构体指针。
返回值：输入一个键值，哈希表则返回对应键的结构体，没有就返回NULL。

哈希表思路：

数组第一个元素肯定不在哈希表中，所以将数组的第一个元素存入哈希表。
第二个元素使用y=target-x，如果检测到y的值是否在map中，如果没有，则将此值加入到哈希表中，后面的元素也是一样的操作。

举例：数组[6 , 3 , 8 , 2 , 1]、target=8

编写寻找函数：

int* twoSum(int* nums,int numSize,int target,int* returnSize)
{
	hashtable =NULL;
	for(int i=0;i<numsSize;i++)
	{
		struct hashTable* it = find(target-nums[i]);
		if(it != NULL)
		{
			int* ret = malloc(sizeof(int)*2);
            //返回当前找到的坐标，还有正在遍历的坐标就可以啦
			ret[0] = it->val,ret[1]=i;
			*returnSize = 2;
			return ret;
		}
		//如果没有那就插入
		insert(nums[i],i);
	}
	*returnSize = 0;
	return NULL;
}

编写插入函数：

void insert(int ikey,int ival)
{
	struct hashTable* it=fine(ikey);
	if(it==NULL)
	{
		//申请空间，将数组再扩大
		struct hashTable* tmp = malloc(sizeof(struct hasTable));
		tmp->key=ikey;
		tmp->val=ival;
		HASH_ADD_INT(hashtable,key,tmp);
	}
	else
	{
		it->val = ival;
	}
}

时间复杂度：O(N)，其中N是数组中的元素数量。对于每一个元素x，可以O(1)的寻址target-x。
空间复杂度：O(N)，其中N是数组中的元素数量，主要为哈希表的开销。

代码：

struct hashTable {
    int key;
    int val;
    UT_hash_handle hh;
};

struct hashTable* hashtable;

struct hashTable* find(int ikey) {
    struct hashTable* tmp;
    HASH_FIND_INT(hashtable, &ikey, tmp);
    return tmp;
}

void insert(int ikey, int ival) {
    struct hashTable* it = find(ikey);
    if (it == NULL) {
        struct hashTable* tmp = malloc(sizeof(struct hashTable));
        tmp->key = ikey, tmp->val = ival;
        HASH_ADD_INT(hashtable, key, tmp);
    } else {
        it->val = ival;
    }
}

int* twoSum(int* nums, int numsSize, int target, int* returnSize) {
    hashtable = NULL;
    for (int i = 0; i < numsSize; i++) {
        struct hashTable* it = find(target - nums[i]);
        if (it != NULL) {
            int* ret = malloc(sizeof(int) * 2);
            ret[0] = it->val, ret[1] = i;
            *returnSize = 2;
            return ret;
        }
        insert(nums[i], i);
    }
    *returnSize = 0;
    return NULL;
}

好了，经历了这么多，该写一个哈希映射了，这是传统的哈希映射的写法，其实也跟前面的是差不多的，只不过多了几个功能：

//声明一节哈希表
struct List{
    int key;
    int val;
    struct List* next;
};

//结构体数组的声明
typedef struct{
    struct List* data;
}MyHashMap;

//添加元素到对应的下标
void listPush(struct List*head,int key,int val)
{
    //申请空间
    struct List* tmp = malloc(sizeof(struct List));
    tmp->key = key;
    tmp->val = val;
    tmp->next = head->next;
    head->next = tmp;
}

//删除对应下标元素
void listDelete(struct List*head,int key)
{
    for(struct List* it=head;it->next;it=it->next)
    {
        if(it->next->key==key)
        {
            struct List* tmp = it->next;
            it->next = tmp->next;
            free(tmp);
            break;
        }
    }
}

//查找元素子函数
struct List* listFind(struct List*head,int key)
{
    for(struct List*it = head;it->next;it=it->next)
    {
        if(it->next->key==key)
        return it->next;
    }
    return NULL;
}

//删除子函数
void listFree(struct List* head)
{
    while(head->next)
    {
        struct List*tmp = head->next;
        head->next = tmp->next;
        free(tmp);
    }
}

//规定数组大小
const int base = 769;

//规定哈希函数
int hash(int key)
{
    return key%base;
}

//哈希表的初始化
MyHashMap* myHashMapCreate() {
    MyHashMap* ret = malloc(sizeof(MyHashMap));
    ret -> data = malloc(sizeof(struct List)*base);
    for(int i=0;i<base;i++)
    {
        ret->data[i].key = 0;
        ret->data[i].val = 0;
        ret->data[i].next = NULL;
    }
    return ret;
}

void myHashMapPut(MyHashMap* obj, int key, int value) {
    int h = hash(key);
    //根据哈希函数分配内容
    struct List* rec = listFind(&(obj->data[h]), key);
    //创建链表
    if (rec == NULL) {
        listPush(&(obj->data[h]), key, value);
    } else {
        rec->val = value;
    }
}

int myHashMapGet(MyHashMap* obj, int key) {
    int h = hash(key);
    //得到下标后去查找
    struct List* rec = listFind(&(obj->data[h]), key);
    if (rec == NULL) {
        return -1;
    } else {
        return rec->val;
    }
}

void myHashMapRemove(MyHashMap* obj, int key) {
    int h = hash(key);
    //得到对应下标后删除
    listDelete(&(obj->data[h]), key);
}

void myHashMapFree(MyHashMap* obj) {
    //逐个释放
     for (int i = 0; i < base; i++) {
        listFree(&(obj->data[i]));
    }
    free(obj->data);
}

/**
 * Your MyHashMap struct will be instantiated and called as such:
 * MyHashMap* obj = myHashMapCreate();
 * myHashMapPut(obj, key, value);
 
 * int param_2 = myHashMapGet(obj, key);
 
 * myHashMapRemove(obj, key);
 
 * myHashMapFree(obj);
*/

上面的程序的原题目为：