哈希概念
如果构造一种存储结构,通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系,那么在查找时通过该函数可以很快找到该元素。当向该结构中:
- 插入元素时:根据待插入元素的关键码,以此函数计算出该元素的存储位置并按此位置进行存放
搜索元素时:对元素的关键码进行同样的计算,把求得的函数值当做元素的存储位置,在结构中按此位置取元素比较,若关键码相等,则搜索成功。
该方式即为哈希(散列)方法。哈希方法中使用的转换函数称为哈希(散列)函数,构造出来的结构称为哈希表(散列表)。哈希冲突
对于两个数据元素的关键字Ki和Kj (i!=j),有Ki !=Kj,但有:HashFunc(Ki)==HashFunc(Kj),即不同关键字通过相同哈希函数计算出相同的哈希地址,该种现象称为哈希冲突 或哈希碰撞,把具有不同关键码而具有相同哈希地址的数据元素称为“同义词”。
常见的哈希函数
- 直接定制法
取关键字的某个线性函数为散列地址:Hash(key)=A*key+B - 除留余数法
设散列表中允许的地址数为m,取一个不大于m,但最接近或者等于m的质数p作为除数,按照哈希函数:Hash(key)=key%p(p<=m),将关键码转换成哈希地址。 - 平方取中法
假设关键字为1234,对它平方就是1522756,抽取中间的3位227作为哈希地址。 - 折叠法
折叠法是将关键字从左到右分割成位数相等的几部分(最后一部分位数可以短些),然后将这几部分叠加求和,并按散列表表长,取后几位作为散列地址。 - 随机数法
选择一个随机函数,取关键字的随机函数值为它的哈希地址,即H(key)=random(key),其中random为随机数函数。 - 数字分析法
设有n个d位数,每一位可能有r种不同的符号,这r种不同的符号在各位上出现频率不一定相同,可能在某些位上分布较均匀,每种符号出现机会均等,在某些位上分布不均匀只有某几种符号经常出现。根据散列表大小,选择其中各种符号分布均匀的若干位作为散列地址。
处理哈希冲突
闭散列–线性探测相关代码:
typedef int Key;
typedef int(*HashFunc)(Key, int);
typedef enum//状态
{
EMPTY,
EXIST,
DELETED
}State;
typedef struct Element
{
Key key;
State state;
}Element;
typedef struct HashTable
{
Element *array;
int size;
int capacity;
HashFunc hashFunc;
}HashTable;
//初始化
void HashInit(HashTable *pHT, int capacity, HashFunc hashFunc)
{
int i;
assert(pHT);
pHT->array = (Element*)malloc(sizeof(Element)*capacity);
assert(pHT->array);
pHT->size = 0;
pHT->capacity = capacity;
pHT->hashFunc = hashFunc;
for (i = 0; i < capacity; i++)
{
pHT->array[i].state = EMPTY;
}
}
//销毁
void HashDestroy(HashTable *pHT)
{
free(pHT->array);
}
//查找,找到返回查找的次数,找不到返回-1
int Search(HashTable *pHT, Key key)
{
int count = 1;
int index = pHT->hashFunc(key, pHT->capacity);//用哈希函数找到地址下标
while (pHT->array[index].state != EMPTY)
{
if (pHT->array[index].key == key&&pHT->array[index].state == EXIST)
{
return count;
}
index = (index + 1) % pHT->capacity;
count++;
}
return -1;
}
int Insert(HashTable *pHT, Key key);
void ExpandIfRequired(HashTable *pHT)
{
HashTable newHT;
if (pHT->size * 10 / pHT->capacity < 7)
return;
HashInit(&newHT, pHT->capacity * 2, pHT->hashFunc);
int i = 0;
for (i = 0; i < pHT->capacity; i++)
{
if (pHT->array[i].state == EXIST)
{
Insert(&newHT, pHT->array[i].key);
}
}
free(pHT->array);
pHT->array = newHT.array;
pHT->capacity = newHT.capacity;
}
// 成功返回 1, 失败返回 -1
int Insert(HashTable *pHT, Key key)
{
ExpandIfRequired(pHT);
int index = pHT->hashFunc(key, pHT->capacity);
int count = 1;
while (pHT->array[index].state == EXIST)
{
if (pHT->array[index].key == key&&pHT->array[index].state == EXIST)
{
return -1;
}
index = (index + 1) % pHT->capacity;
count++;
}
pHT->array[index].key = key;
pHT->array[index].state = EXIST;
pHT->size++;
return 1;
}
// 成功返回 1,失败返回 -1
int Remove(HashTable *pHT, Key key)
{
int index = pHT->hashFunc(key, pHT->capacity);
while (pHT->array[index].state != EMPTY)
{
if (pHT->array[index].key == key&&pHT->array[index].state == EXIST)
{
pHT->array[index].state = DELETED;
pHT->size--;
return 1;
}
index = (index + 1) % pHT->capacity;
}
return -1;
}
int 除留余数法(Key key, int capacity)
{
return key % capacity;
}
开散列相关代码:
typedef int Key;
typedef int(*HashFunc)(Key, int);
typedef struct ListNode
{
Key key;
struct ListNode *pNext;
}ListNode;
typedef struct Element
{
ListNode *pFirst;
}Element;
typedef struct HashBucket
{
Element *array;
int size;
int capacity;
HashFunc hashFunc;
}HashBucket;
//初始化
void HashBucketInit(HashBucket *pHB, int capacity, HashFunc hashFunc)
{
assert(pHB);
pHB->array = (Element *)malloc(sizeof(Element)*capacity);
pHB->size = 0;
pHB->capacity = capacity;
pHB->hashFunc = hashFunc;
memset(pHB->array, 0x00, sizeof(Element)*capacity);
}
// 返回正数表示找到,并且是查找次数
// 返回 -1 表示没有找到
int Search(HashBucket *pHB, Key key)
{
int index = pHB->hashFunc(key, pHB->capacity);
ListNode *pFirst = pHB->array[index].pFirst;
ListNode *pNode;
int count = 1;
for (pNode = pFirst; pNode; pNode->pNext)
{
if (pNode->key == key)
{
return count;
}
count++;
}
}
void ExpandIfRequired(HashBucket *pHB)
{
HashBucket newHB;
if (pHB->size > pHB->capacity)
return;
int newCapacity = pHB->capacity * 2;
HashBucketInit(&newHB, newCapacity, pHB->hashFunc);
for (int i = 0; i < pHB->capacity; i++)
{
ListNode *pFirst = pHB->array[i].pFirst;
ListNode *pNode = pFirst;
for (; pNode != NULL; pNode = pNode->pNext)
{
Insert(&newHB, pNode->key);
}
}
pHB->array = newHB.array;
pHB->capacity = newHB.capacity;
}
int Insert(HashBucket *pHB, Key key)
{
ExpandIfRequired(pHB);
int index = pHB->hashFunc(key, pHB->capacity);
ListNode **pFirst = &(pHB->array[index].pFirst);
ListNode *pNode;
for (pNode = *pFirst; pNode; pNode->pNext)
{
if (pNode->key == key)
{
return -1;
}
else
{
ListNode *pNewNode = (ListNode *)malloc(sizeof(ListNode));
pNewNode->key = key;
pNewNode->pNext = *pFirst;
*pFirst = pNewNode;
}
return 1;
}
}
int Remove(HashBucket *pHB, Key key)
{
int index = pHB->hashFunc(key, pHB->capacity);
ListNode *pFirst = pHB->array[index].pFirst;
ListNode *pNode, *pPrev = NULL;
for (pNode = pFirst; pNode; pNode = pNode->pNext)
{
if (pNode->key == key)
{
pHB->size--;
if (pNode = pFirst)
{
pHB->array[index].pFirst = pNode->pNext;
free(pNode);
return 1;
}
pPrev->pNext = pNode->pNext;
free(pNode);
return 1;
}
pPrev = pNode;
}
return -1;
}
哈希变形—位图
unsigned int bit[N];
在这个数组里面,可以存储 N * sizeof(int) * 8个数据,但是最大的数只能是N * sizeof(int) * 8 - 1。假如,我们要存储的数据范围为0-15,则我们只需要使得N=1,这样就可以把数据存进去。如下图:
位图的相关操作:
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef struct BitArray {
uint32_t *array;
unsigned int capacity; // 数组的大小
unsigned int size; // 有效 Bit
} BitArray;
//初始化
void BitArrayInit(BitArray *pBA, unsigned int size)
{
unsigned int capacity = size / 32;
if (size%sizeof(uint32_t) != 0)
{
capacity += 1;
}
pBA->capacity = capacity;
pBA->array = (uint32_t *)calloc(sizeof(uint32_t), capacity);
pBA->size = size;
}
//销毁
void Destroy(BitArray *pBA)
{
free(pBA->array);
}
//指定位置1
void Set1(BitArray *pBA, unsigned int which)
{
unsigned int index = which / 32;
unsigned int shift = which % 32;
pBA->array[index] = pBA->array[index] | (1<< shift);
}
//指定位置0
void Set0(BitArray *pBA, unsigned int which)
{
unsigned int index = which / 32;
unsigned int shift = which % 32;
pBA->array[index] = pBA->array[index] &(~(1<<shift));
}
// 如果这一位是0,返回 0,是 1 返回 非0
int IsSet(BitArray *pBA, unsigned int which)
{
unsigned int index = which / 32;
unsigned int shift = which % 32;
return pBA->array[index] & (1 << shift);
}