哈希函数:是元素的存储位置与它的关键码之间能够建立一 一映射的关系,那么在查找时通过该函数可以很快找到该数据。 key
Hash (key) = key % m (m:存储单元的个数) 。
哈希冲突:对于两个数据元素的关键字K1和K2 (K1 != K2) ,但是有:HashFunc (ki) == HashFunc (Kj) ,函数即不同关键字通过相
同的哈希函数计算出相同的哈希地址,这种现象称为哈希冲突或哈希碰撞,把具有不同关键码而具有相同哈希地址
的数据元素称为“同义词”。
负载因子:a =填入表中的元素个数 / 散列表的长度
a :散列表装满程度的因子,由于表长是定值,所以a与“填入表中的元素个数”成正比;实际上,散列表的平均查找长
度是负载因子a的函数,对于闭散列,负载因子特别重要,应严格控制在0.7 - 0.8以下。
哈希函数的构造方法:1.直接定制法。 2.除留余数法 3.数字分析法 4.折叠法
5. 平方取中法 6.随机数法
常见哈希冲突的解决办法:1.闭散列(也称开放地址法)
1). 线性探测:从发生冲突的位置开始,依次继续向后探测,直到找到空位置为止。
2).二次探测(其实是二次方探测)
发生冲突时,二次探测法在查找“下一个”空位置的公式为:Hi = (H0 + i ^2) %m, Hi =(H0 - i^2)%m,i =1,2,3,...(m -1)/2H0是通过散列函数Hash(x)对元素的关键码key进行计算的到的位 置,m时表的大小。
2.开散列(也称拉链法)。
3.伪随机探测法
4.再散列 (双重散列,多重散列)
5. 建立一个公共溢出区
哈希表:
#include<stdio.h>
#include<assert.h>
#include<stdlib.h>
typedef int Key;
typedef int(*HashFunc)(Key, int);
typedef enum{
EMPTY,
EXIST,
DELETED,
}State;
typedef struct Element{
Key key;
State state;
}Element;
typedef struct HashTable{
Element *array;
int size;
int capacity;
HashFunc hashFunc;
}HashTable;
void HTInit(HashTable *pHT, int capacity, HashFunc hashFunc)
{
assert(pHT);
pHT->array = (Element*)malloc(sizeof(Element)* capacity);
assert(pHT->array);
pHT->size = 0;
pHT->capacity = capacity;
pHT->hashFunc = hashFunc;
for (int i = 0; i < capacity; i++)
{
pHT->array[i].state = EMPTY;
}
}
int Search(HashTable *pHT, Key key)
{
int index = pHT->hashFunc(key, pHT->capacity);
int count = 1;
while (pHT->array[index].state != EMPTY){
if (pHT->array[index].key == key&&pHT->array[index].state == EXIST){
return count;
}
index = (index + 1) % pHT->capacity;
//哈希表因为负载因子的原因一般不会满
count++;
}
return -1;
}
// 成功返回 1, 失败返回 -1
int Insert(HashTable *pHT, Key key)
{
// TODO: 扩容
//ExpandIfRequired(pHT);
int index = pHT->hashFunc(key, pHT->capacity);
int i = index;
int count = 1;
while (pHT->array[i].state == EXIST) {
if (pHT->array[i].key == key && pHT->array[i].state == EXIST) {
// 找到了
return -1;
}
i = (index + count * count) % pHT->capacity;
count++;
//index = (index + 1) % pHT->capacity; // 怎么实现循环
// 隐患: 找了一圈都没找到,可能会死循环
}
// 这里开始插入
pHT->array[i].key = key;
pHT->array[i].state = EXIST;
pHT->size++;
return 1;
}
void ExpandIfRequired(HashTable *pHT)
{
//哈希表需要考虑负载因子
if (pHT->size * 10 / pHT->capacity < 7){
return;
}
HashTable newHT;
HTInit(&newHT, pHT->capacity * 2, pHT->hashFunc);
for (int i = 0; i < pHT->capacity; i++){
if (pHT->array[i].state == EXIST){
Insert(&newHT,pHT->array[i].key);
}
}
free(pHT->array);
pHT->array = newHT.array;
pHT->capacity = newHT.capacity;
}
// 成功返回 1,失败返回 -1
int Remove(HashTable *pHT, Key key)
{
int index = pHT->hashFunc(key, pHT->capacity);
while (pHT->array[index].state != EMPTY) {
if (pHT->array[index].key == key && pHT->array[index].state == EXIST) {
pHT->array[index].state = DELETED;
pHT->size--;
return 1;
}
index = (index + 1) % pHT->capacity; // 怎么实现循环
// 隐患: 找了一圈都没找到,可能会死循环
}
return -1;
}
void HTDestroy(HashTable *pHT)
{
free(pHT->array);
}
int 除留余数法(Key key, int capacity)
{
return key % capacity;
}
void Test()
{
HashTable ht;
HTInit(&ht, 13, 除留余数法);
Insert(&ht, 3);
Insert(&ht, 7);
Insert(&ht, 19);
Insert(&ht, 25);
Insert(&ht, 26);
Insert(&ht, 6);
Insert(&ht, 12);
Insert(&ht, 39);
Insert(&ht, 41);
Insert(&ht, 32);
Insert(&ht, 45);
}
哈希桶:
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
typedef char * Key;
typedef struct ListNode {
Key key;
struct ListNode *pNext;
} ListNode;
typedef struct Element {
ListNode * pFirst;
} Element;
typedef int(*HashFunc)(Key, int);
typedef struct HashBucket {
Element *array;
int size;
int capacity;
HashFunc hashFunc;
} HashBucket;
void HBInit(HashBucket *pHB, int capacity, HashFunc hashFunc)
{
pHB->array = (Element *)malloc(sizeof(Element)* capacity);
assert(pHB->array);
pHB->size = 0;
pHB->capacity = capacity;
pHB->hashFunc = hashFunc;
// 所有地址初始化为 NULL
memset(pHB->array, 0x0, sizeof(Element)* capacity);
}
// 插入,如果已经存在,插入失败返回 -1
// 否则插入成功,返回 1
int Insert(HashBucket *pHB, Key key);
// 返回正数表示找到,并且是查找次数
// 返回 -1 表示没有找到
int Search(HashBucket *pHB, Key key)
{
int index = pHB->hashFunc(key, pHB->capacity);
ListNode *pFirst = pHB->array[index].pFirst;
ListNode *pNode;
int count = 1;
for (pNode = pFirst; pNode != NULL; pNode = pNode->pNext) {
if (pNode->key == key) {
return count;
}
count++;
}
return -1;
}
int ListSearch(ListNode *pFirst, Key key);
void ListPushFront(ListNode **ppFirst, Key key);
void ExpandIfRequired(HashBucket *pHB)
{
// 哈希桶一般不考虑负载因子
if (pHB->size < pHB->capacity) {
return;
}
int newCapacity = pHB->capacity * 2;
HashBucket newHB;
HBInit(&newHB, newCapacity, pHB->hashFunc);
// 整个哈希表里的数据到新哈希表里做插入
// 首先遍历是顺序结构,其次遍历的是每个链表
for (int i = 0; i < pHB->capacity; i++) {
ListNode *pFirst = pHB->array[i].pFirst;
ListNode *pNode = pFirst;
for (; pNode; pNode = pNode->pNext) {
// 真正要插入的
Insert(&newHB, pNode->key);
}
}
pHB->array = newHB.array;
pHB->capacity = newCapacity;
}
// 插入,如果已经存在,插入失败返回 -1
// 否则插入成功,返回 1
int Insert(HashBucket *pHB, Key key)
{
ExpandIfRequired(pHB);
int index = pHB->hashFunc(key, pHB->capacity);
if (ListSearch(pHB->array[index].pFirst, key) != -1) {
return -1;
}
ListPushFront(&(pHB->array[index].pFirst), key);
pHB->size++;
return 1;
}
int ListSearch(ListNode *pFirst, Key key)
{
ListNode *pNode;
for (pNode = pFirst; pNode != NULL; pNode = pNode->pNext) {
if (pNode->key == key) {
return 1;
}
}
return -1;
}
void ListPushFront(ListNode **ppFirst, Key key)
{
ListNode *pNew = (ListNode *)malloc(sizeof(ListNode));
assert(pNew);
pNew->key = key;
pNew->pNext = (*ppFirst);
*ppFirst = pNew;
}
// 如果删除成功,返回 1
// 如果没有找到,删除失败,返回 -1
int Remove(HashBucket *pHB, Key key)
{
int index = pHB->hashFunc(key, pHB->capacity);
ListNode *pFirst = pHB->array[index].pFirst;
ListNode *pNode, *pPrev = NULL;
for (pNode = pFirst; pNode != NULL; pNode = pNode->pNext) {
if (pNode->key == key) {
pHB->size--;
if (pNode == pFirst) {
pHB->array[index].pFirst = pNode->pNext;
free(pNode);
return 1;
}
pPrev->pNext = pNode->pNext;
free(pNode);
return 1;
}
pPrev = pNode;
}
return -1;
}
int 除留余数法(Key key, int capacity)
{
return 1;
//return key % capacity;
}
位图:
#pragma once
#include<stdio.h>
#include<stdlib.h>
#include<assert.h>
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef struct BitArray{
uint32_t *array;
unsigned int capacity;//数组大小
unsigned int size;
}BitArray;
void BitArrayInit(BitArray *pBA, unsigned int size)
{
unsigned int capacity = size / 32;
if (size %sizeof(uint32_t) != 0){
capacity += 1;
}
//直接用calloc进行初始化为0
pBA->array = (uint32_t *)calloc(sizeof(uint32_t), capacity);
assert(pBA->array);
pBA->capacity = capacity;
pBA->size = size;
}
void BitArrayDestroy(BitArray *pBA)
{
free(pBA->array);
}
void Set1(BitArray *pBA, unsigned int which)
{
unsigned int index = which / 32;
unsigned int shift = which % 32;
pBA->array[index] = pBA->array[index] | (1 << shift);
}
void Set0(BitArray *pBA, unsigned int which)
{
unsigned int index = which / 32;
unsigned int shift = which % 32;
pBA->array[index] &= ~(1 << shift);
}
int IsSet(BitArray *pBA, unsigned int which)
{
unsigned int index = which / 32;
unsigned int shift = which % 32;
return (pBA->array[index] & 1 << shift) >> shift;
}
void Test()
{
BitArray ba;
BitArrayInit(&ba, (unsigned int)-1);
unsigned int array[] = { 3, 5, 7, 9 };
for (unsigned int i = 0; i < sizeof(array) / sizeof(unsigned int); i++){
Set1(&ba, array[i]);
}
unsigned int n = 8;
printf("%d\n", IsSet(&ba, n));
n = 9;
printf("%d\n", IsSet(&ba, n));
}