数据结构---hash表

L7256

已于 2022-11-02 19:15:13 修改

阅读量1k

点赞数 1

分类专栏：数据结构文章标签：哈希算法算法

于 2022-08-13 11:00:00 首次发布

本文链接：https://blog.csdn.net/m0_37844072/article/details/126277137

版权

数据结构专栏收录该内容

8 篇文章 0 订阅

订阅专栏

1.基本概念

给定表M，存在函数f(key)，对任意给定的关键字值key，代入函数后若能得到包含该关键字的记录在表中的地址，则称表M为哈希(Hash）表，函数f(key)为哈希(Hash) 函数。

2.hash冲突

比如

f(x) = x % 5; //哈希函数

int hashTble[5]; //哈希表

int value[5]; //存放数据的数组

先忘value[0]存入 'A' [对应ascii码为65] ， value[1]存入'F' [对应ascii码为70]
f(65) = 0; 那么hashTable[0] = 0;
f(70) = 0; 那么hashTable[1] = 0;

这就产生了冲突（hanhTable的0和1是指向的一样的位置）

解决方法：让存入hash表的值不一样

3.hash的简单实现

需要实现以下几个接口：

struct hash *hash_create(void); //创建一个hash表
void hash_destroy(struct hash *ht); //销毁hash表
int hash_insert(struct hash *ht, const char* key, void *data); //在hash表ht插入一个元素（key, data）
void *hash_lookup(struct hash *ht, const char* key); //在hash表ht中，通过key得到value的指针

以下摘自linuxptp的hash实现

/**
 * @file hash.c
 * @brief Implements a simple hash table.
 * @note Copyright (C) 2015 Richard Cochran <richardcochran@gmail.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
#include <stdlib.h>
#include <string.h>

#include "hash.h"

#define HASH_TABLE_SIZE 200

struct node {
	char *key;
	void *data;
	struct node *next;
};

struct hash {
	struct node *table[HASH_TABLE_SIZE];
};

static unsigned int hash_function(const char* s)
{
	unsigned int i;

	for (i = 0; *s; s++) {
		i = 131 * i + *s;
	}
	return i % HASH_TABLE_SIZE;
}

//创建hash表
struct hash *hash_create(void)
{
	struct hash *ht = calloc(1, sizeof(*ht));
	return ht;
}

//清除hash表
void hash_destroy(struct hash *ht, void (*func)(void *))
{
	unsigned int i;
	struct node *n, *next, **table = ht->table;

	for (i = 0; i < HASH_TABLE_SIZE; i++) {
		for (n = table[i] ; n; n = next) {
			next = n->next;
			if (func) {
				func(n->data);
			}
			free(n->key);
			free(n);
		}
	}

	free(ht);
}

//向hash表插入元素
int hash_insert(struct hash *ht, const char* key, void *data)
{
	unsigned int h;
	struct node *n, **table = ht->table;
// 通过hash函数，得到key对应的value【value是hash表的位置下标】
	h = hash_function(key);
//避免得到的value值
	for (n = table[h] ; n; n = n->next) {
		if (!strcmp(n->key, key)) {
			/* reject duplicate keys */
			return -1;
		}
	}
//为插入的新元素（链表节点）分配内存
	n = calloc(1, sizeof(*n));
	if (!n) {
		return -1;
	}
//为链表节点赋值
	n->key = strdup(key);
	if (!n->key) {
		free(n);
		return -1;
	}
	n->data = data;
	n->next = table[h];
//将这个新元素的内存地址存入hash表
	table[h] = n;
	return 0;
}
//通过key在hash表ht中得到data
void *hash_lookup(struct hash *ht, const char* key)
{
	unsigned int h;
	struct node *n, **table = ht->table;

	h = hash_function(key);

	for (n = table[h] ; n; n = n->next) {
		if (!strcmp(n->key, key)) {
			return n->data;
		}
	}
	return NULL;
}

比较关键的逻辑就是:hash_insert函数，主要分为几步

通过hash_function()得到key对应的value，即在hashTable的位置
把传入的key,data存入一个node
把hashTable[value]赋值为node的内存地址

通过hash_lookup()找到key对应的data

先通过hash_function()算出value,即在hashTable的位置
通过hashTable[value]，找到key对应的那个node
通过node可找到key对应的data
返回data作为hash_lookup()的返回值

hash表的实现

uthash: uthash简介_Fan Upward的博客-CSDN博客_uthash

4. hash表的应用场景

C语言实现的数据结构之------哈希表_smstong的博客-CSDN博客_c语言实现哈希表

如果需要从链表中找到一个元素，那么就需要遍历链表。
为了解决根据关键字快速找到元素的存放地址，哈希表应运而生。它通过某种算法（哈希函数）直接根据关键字计算出元素的存放地址。
当不同的关键字可能产生出同样的hash值。解决方法可以是链式扩展：即把相同的hash值元素放到一个链表中。

5.hash表和链表扩展的实现

参考实现：C语言实现的数据结构之------哈希表_smstong的博客-CSDN博客_c语言实现哈希表
hashTable中的Header存储了tablesize个Node（10个链表），根据hash函数得到访问下标
hh->Head[i].next来访问链表的第一个节点，根据next依次访问链表下一个节点
查找：通过key得到hash值，通过hash值遍历对应的链表。【即可实现链表的查找由遍历所有数据--->只需要遍历一部分数据！】

代码实现


#include <stdio.h>
#include <stdlib.h>
#include <string.h>


typedef struct Element
{
	int key;
	int value;  // 可以是任何结构的数据
} Element;

//数据存入单链表
typedef struct LinkTable
{
	Element data;
    struct LinkTable* next;
}Node;

typedef struct HashTable {
    Node* Head;
    int tablesize;
	int elementsize; 
} HashTable;


// 打印哈希表。
void PrintTable(HashTable *hh)
{
	int ii;
	printf("=========\n");

	for (ii = 0; ii < hh->tablesize; ii++)
	{
		Node *pp = hh->Head[ii].next;
		while (pp)
		{
			printf("[%d-%d] ", pp->data.key, pp->data.value);
			// printf("[%d-%s] ",pp->data.key,pp->data.value);
			pp = pp->next;
		}

		printf("\n");
	}

	printf("=========\n");
}

HashTable* InitHashTable(int size)
{
    HashTable* hh = (HashTable*)malloc(sizeof(HashTable));

    //分配单链表的头结点---size是几就有几个头链表
	hh->Head = malloc(sizeof(Node) * size);
	memset(hh->Head, sizeof(Node)* size, 0x00);

    hh->tablesize = size;
	hh->elementsize = 0;
}

unsigned int hash(HashTable *hh, int key)
{
	return key % hh->tablesize;  // 对表长取余。
}

Node* lookup(HashTable* hh, int key)
{
    // 根据key算出hashvalue
	int hashvalue = hash(hh, key);

	// 遍历单链表看是否有key
	Node* tmp = hh->Head[hashvalue].next;
    while (tmp != NULL) {
		if (tmp->data.key == key) 
		{
			printf("has key [%d]\n", key);
			return tmp; 
		}
		tmp = tmp->next;
	}
	return NULL;


}

void Insert(HashTable* hh, Element* lt)
{
    // find exist in hashtable?
    Node* result = lookup(hh, lt->key);
    if (result) {
		return;
	}
    // 根据key算出hashvalue
	int hashvalue = hash(hh, lt->key);
	printf("hashvalue = [%d]\n", hashvalue);

    // 更新链表结点的数据
	Node* nd = malloc(sizeof(Node));
	nd->data.key = lt->key;
	nd->data.value = lt->value;

    //根据hashvalue得到在链表的头结点数组的下标位置, 并插入nd结点
	nd->next = hh->Head[hashvalue].next;
    hh->Head[hashvalue].next = nd;

	hh->elementsize++;
}

void delete(HashTable* hh, int key)
{
    // find exist in hashtable?
    // Node* result = lookup(hh, key);
    // if (!result) {
	// 	printf("no this node\n");
	// 	return;
	// }

    // 根据key算出hashvalue
	int hashvalue = hash(hh, key);

	// 遍历单链表看是否有key
	Node* tmp = hh->Head[hashvalue].next;
	Node *last = tmp;
	Node *next = NULL;
    while (tmp != NULL) {
		if (tmp->data.key == key) 
		{
			printf("delete key [%d]\n", key);
			free(tmp);
			tmp = NULL;

			last->next = next;
			hh->elementsize--;
			break;
		}
        // update last and next node
		last = tmp;
		tmp = tmp->next;
		if (tmp) { next = tmp->next;}
	}


}

void freehashtable(HashTable *hh)
{
	if (hh == NULL)	return;
    
	//删除每个节点的链表
    for (int i = 0; i < hh->tablesize; i++)
	{
		printf("hash pos[%d]\n", i);
		Node* tmp = hh->Head[i].next;
		Node* tmp2;
		while (tmp != NULL) {
           printf("key %d val = %d\n", tmp->data.key, tmp->data.value);
		   tmp2 = tmp->next;
		   free(tmp);
		   tmp = tmp2;

		   hh->elementsize--;
		}
		PrintTable(hh);
		hh->Head[i].next = NULL;
	}
	printf("hash count[%d]\n", hh->elementsize);

}

int main()
{
	// 初始化哈希表。
	HashTable *hh = InitHashTable(10);


	Element ee;

	{
		ee.key = 10; ee.value = 110; Insert(hh, &ee);
		ee.key = 10; ee.value = 110; Insert(hh, &ee);
		printf("count=%d\n", hh->elementsize);
		PrintTable(hh);    // 打印哈希表 
	}

	{
		ee.key = 12; ee.value = 112; Insert(hh, &ee);
		PrintTable(hh);    // 打印哈希表 

		delete(hh, 12);     // 删除哈希表中关键字为12的数据元素。
		PrintTable(hh);    // 打印哈希表 
	}

	// 插入数据元素，关键字从10到20。
	ee.key = 10; ee.value = 110; Insert(hh, &ee);
	ee.key = 11; ee.value = 111; Insert(hh, &ee);
	ee.key = 13; ee.value = 113; Insert(hh, &ee);
	ee.key = 14; ee.value = 114; Insert(hh, &ee);
	ee.key = 15; ee.value = 115; Insert(hh, &ee);
	ee.key = 16; ee.value = 116; Insert(hh, &ee);
	ee.key = 17; ee.value = 117; Insert(hh, &ee);
	ee.key = 18; ee.value = 118; Insert(hh, &ee);
	ee.key = 19; ee.value = 119; Insert(hh, &ee);

	// // 插入数据元素，关键字从20到30。
	ee.key = 20; ee.value = 120; Insert(hh, &ee);
	ee.key = 21; ee.value = 121; Insert(hh, &ee);
	ee.key = 22; ee.value = 122; Insert(hh, &ee);
	ee.key = 23; ee.value = 123; Insert(hh, &ee);
	ee.key = 24; ee.value = 124; Insert(hh, &ee);
	ee.key = 25; ee.value = 125; Insert(hh, &ee);
	ee.key = 26; ee.value = 126; Insert(hh, &ee);
	ee.key = 27; ee.value = 127; Insert(hh, &ee);
	ee.key = 28; ee.value = 128; Insert(hh, &ee);
	ee.key = 29; ee.value = 129; Insert(hh, &ee);

	// // 插入数据元素，关键字从30到32。
	ee.key = 30; ee.value = 130; Insert(hh, &ee);
	ee.key = 31; ee.value = 131; Insert(hh, &ee);
	ee.key = 32; ee.value = 132; Insert(hh, &ee);


	// delete(hh, 12);     // 删除哈希表中关键字为12的数据元素。

	printf("count=%d\n", hh->elementsize);
	PrintTable(hh);    // 打印哈希表 

	// 在哈希表中查找关键字18。
	Node *pp = lookup(hh, 18);
	if (pp == 0) printf("LookUp(18) failed.\n");
	else printf("key=18,value=%d.\n", pp->data.value);

	freehashtable(hh);  // 销毁哈希表 

	return 0;
}

斯顿