搜索结构之哈希
哈希概念
顺序搜索以及二叉树搜索树中,元素存储位置和元素各关键码之间没有对应的关系,因此在查找一个元素时,必须要经过关键码的 多次比较。搜索的效率取决于搜索过程中元素的比较次数。
理想的搜索方法:可以不经过任何比较,一次直接从表中得到要搜索的元素。 如果构造一种存储结构,通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系,那么在查找时通 过该函数可以很快找到该元素。
当向该结构中:
插入元素时:根据待插入元素的关键码,以此函数计算出该元素的存储位置并按此位置进行存放 搜索元素时:对元素的关键码进行同样的计算,把求得的函数值当做元素的存储位置,在结构中按此位置取元素比较,若关键 码相等,则搜索成功
该方式即为哈希(散列)方法,哈希方法中使用的转换函数称为哈希(散列)函数,构造出来的结构称为哈希表(Hash Table)(或者 称散列表)。
常见哈希函数
直接定制法 取关键字的某个线性函数为散列地址:Hash(Key)= A*Key + B 优点:简单、均匀 缺点:需要事先知道关键字的分布情况 适合查找比较小且连续的情况 面试题:找出一个字符串中第一个只出现一次的字符,要求:时间复杂度O(N),空间复杂度O(1)
除留余数法 设散列表中允许的地址数为m,取一个不大于m,但接近或者等于m的质数p作为除数,按照哈希函数:Hash(key) = key % p(p<=m),将关键码转换成哈希地址
平方取中法 假设关键字为1234,对它平方就是1522756,抽取中间的3位227作为哈希地址; 再比如关键字为4321,对它平方就是18671041,抽取中间的3位671(或710)作为哈希地址 平方取中法比较适合:不知道关键字的分布,而位数又不是很大的情况
折叠法 折叠法是将关键字从左到右分割成位数相等的几部分(后一部分位数可以短些),然后将这几部分叠加求和,并按散列表表 长,取后几位作为散列地址 折叠法适合事先不需要知道关键字的分布,适合关键字位数比较多的情况
随机数法 选择一个随机函数,取关键字的随机函数值为它的哈希地址,即H(key) = random(key),其中random为随机数函数 通常应用于关键字长度不等时采用此法
数学分析法 设有n个d位数,每一位可能有r种不同的符号,这r种不同的符号在各位上出现的频率不一定相同,可能在某些位上分布比较均 匀,每种符号出现的机会均等,在某些位上分布不均匀只有某几种符号经常出现。可根据散列表的大小,选择其中各种符号分 布均匀的若干位作为散列地址。
代码实现:
common.h
------
#pragma once
#include <stdlib.h>
typedef size_t (*PDTInt)(int data);
size_t GetNextPrime(size_t capacity);
size_t DataToIntStr(const char * str);
size_t DataToIntInt(int data);
common.c
--------
#define _CRT_SECURE_NO_WARNINGS 1
#include "Common.h"
#define _PrimeSize 28
static const unsigned long _PrimeList[_PrimeSize] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
size_t GetNextPrime(size_t capacity)
{
int i = 0;
for (; i < _PrimeSize; ++i)
{
if (_PrimeList[i] > capacity)
{
return _PrimeList[i];
}
}
return _PrimeList[_PrimeSize - 1];
}
size_t DataToIntStr(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{ hash = hash * seed + (*str++); }
return (hash & 0x7FFFFFFF);
}
size_t DataToIntInt(int data)
{
return data;
}
HashBucket.h
------
#pragma once
#include "Common.h"
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <assert.h>
typedef int DataType;
typedef struct HashBucketElem
{
struct HashBucketElem* _pNext;
DataType _data;
}HBE;
typedef struct HashBucket
{
HBE** _table;
int _capacity;
int _size;
PDTInt _pDTInt;
}HashBucket;
void HashBucketInit(HashBucket* ht,int capacity, PDTInt _pDTInt);
void HashBucketInsertUnique(HashBucket* ht, DataType data);
void HashBucketDeleteUnique(HashBucket* ht, DataType data);
void HashBucketInsertEqual(HashBucket* ht, DataType data);
void HashBucketDeleteEqual(HashBucket* ht, DataType data);
int HashBucketSize(HashBucket* ht);
int HashBucketEmpty(HashBucket* ht);
int HashBucketBucketCount(HashBucket* ht);
int HashBucketBucketSize(HashBucket* ht, int bucketNo);
void HashBucketDestroy(HashBucket* ht);
void TESTHBT();
HashBucket.c
------------
#define _CRT_SECURE_NO_WARNINGS 1
#include "HashBucket.h"
int HashFunc1(HashBucket* ht, DataType data)
{
return ht->_pDTInt(data) % ht->_capacity;
}
HBE* BuyHashBucketNode(DataType data)
{
HBE* pNewNode = (HBE*)malloc(sizeof(HBE));
if (NULL == pNewNode)
{
assert(0);
return NULL;
}
pNewNode->_data = data;
pNewNode->_pNext = NULL;
return pNewNode;
}
void HashBucketInit(HashBucket* ht, int capacity, PDTInt pDTInt)
{
assert(ht);
capacity = GetNextPrime(capacity);
ht->_table = (HBE**)calloc(capacity, sizeof(HBE*));
if (NULL == ht->_table)
{
assert(0);
return;
}
ht->_capacity = capacity;
ht->_size = 0;
ht->_pDTInt = pDTInt;
}
void HashBucketInsertUnique(HashBucket* ht, DataType data)
{
int bucketNo = -1;
HBE* pCur = NULL;
assert(ht);
bucketNo = HashFunc1(ht,data);
pCur = ht->_table[bucketNo];
while (pCur)
{
if (data == pCur->_data)
return;
pCur = pCur->_pNext;
}
//插入元素
pCur = BuyHashBucketNode(data);
pCur->_pNext = ht->_table[bucketNo];
ht->_table[bucketNo] = pCur;
ht->_size++;
}
void HashBucketDeleteUnique(HashBucket* ht, DataType data)
{
int bucketNo = -1;
HBE* pCur = NULL;
HBE* pPre = NULL;
assert(ht);
bucketNo = HashFunc1(ht,data);
pCur = ht->_table[bucketNo];
while (pCur)
{
if (data == pCur->_data)
{
if (pCur == ht->_table[bucketNo])
{
ht->_table[bucketNo] = pCur->_pNext;
}
else
pPre->_pNext = pCur->_pNext;
free(pCur);
ht->_size--;
return;
}
pPre = pCur;
pCur = pCur->_pNext;
}
}
void HashBucketInsertEqual(HashBucket* ht, DataType data)
{
int bucketNo = -1;
HBE* pCur = NULL;
assert(ht);
bucketNo = HashFunc1(ht,data);
//插入元素
pCur = BuyHashBucketNode(data);
pCur->_pNext = ht->_table[bucketNo];
ht->_table[bucketNo] = pCur;
ht->_size++;
}
void HashBucketDeleteEqual(HashBucket* ht, DataType data)
{
int bucketNo = -1;
HBE* pCur = NULL;
HBE* pPre = NULL;
assert(ht);
bucketNo = HashFunc1(ht,data);
pCur = ht->_table[bucketNo];
while (pCur)
{
if (data == pCur->_data)
{
if (pCur == ht->_table[bucketNo])
{
ht->_table[bucketNo] = pCur->_pNext;
free(pCur);
pCur = ht->_table[bucketNo];
}
else
{
pPre->_pNext = pCur->_pNext;
free(pCur);
pCur = pPre->_pNext;
}
}
else
{
pPre = pCur;
pCur = pCur->_pNext;
}
ht->_size--;
return;
}
pPre = pCur;
pCur = pCur->_pNext;
}
int HashBucketSize(HashBucket* ht)
{
assert(ht);
return ht->_size;
}
int HashBucketEmpty(HashBucket* ht)
{
assert(ht);
return 0 == ht->_size;
}
int HashBucketBucketCount(HashBucket* ht)
{
return ht->_capacity;
}
int HashBucketBucketSize(HashBucket* ht, int bucketNo)
{
HBE* pCur = NULL;
int count = 0;
assert(ht);
pCur = ht->_table[bucketNo];
while (pCur)
{
++count;
pCur = pCur->_pNext;
}
return count;
}
void HashBucketDestroy(HashBucket* ht)
{
assert(ht);
if (ht->_table)
{
free(ht->_table);
ht->_capacity = 0;
ht->_size = 0;
}
}
void TESTHBT()
{
HashBucket ht;
HashBucketInit(&ht, 10, DataToIntInt);
HashBucketInsertUnique(&ht, 1);
HashBucketInsertUnique(&ht, 2);
HashBucketInsertUnique(&ht, 12);
printf("size= %d \n", HashBucketSize(&ht));
HashBucketDestroy(&ht);
}
HashTable.h
-----------
#pragma once
#include "Common.h"
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <assert.h>
typedef int DataType;
typedef enum{EMPTY, EXSIT, DELETE}State;
typedef enum{LINE, DOUBLE}DETECTIVE;
typedef struct Elem
{
DataType _data;
State _state;
}Elem;
typedef struct HashTable
{
Elem* _table;
int _size;
int _capacity;
int _total;
PDTInt _pDTInt;
DETECTIVE _IsLine;
}HashTable;
void HashTableInit(HashTable* ht, int capacity, DETECTIVE IsLine,PDTInt pDTInt);
void HashTableInsert(HashTable* ht, DataType data);
void HashTableDelete(HashTable* ht, DataType data);
int HashTableFind(HashTable* ht, DataType data);
int HashTableSize(HashTable* ht);
int HashTableEmpty(HashTable* ht);
void HashTableDestroy(HashTable* ht);
void TestHashTable();
void TestHashTable1();
HashTable.c
-----------
#define _CRT_SECURE_NO_WARNINGS 1
#include "HashTable.h"
int HashFunc(HashTable* ht,DataType data)
{
return ht->_pDTInt(data) % ht->_capacity;
}
int DetectiveLine(HashTable* ht,int hashAddr)
{
assert(ht);
hashAddr++;
if (ht->_capacity == hashAddr)
hashAddr = 0;
return hashAddr;
}
int Detective2(HashTable* ht,int hashAddr, int i)
{
assert(ht);
hashAddr = hashAddr + 2 * i + 1;
if (ht->_capacity == hashAddr)
hashAddr = hashAddr % ht->_capacity;
return hashAddr;
}
void _Swap(int* left, int* right)
{
int temp;
assert(left);
assert(right);
temp = *left;
*left = *right;
*right = temp;
}
void Swap(HashTable* lht, HashTable* rht)
{
_Swap((int*)&lht->_table, (int*)&rht->_table);
_Swap(&lht->_size, &rht->_size);
_Swap(&lht->_capacity, &rht->_capacity);
_Swap(&lht->_total, &rht->_total);
_Swap((int*)&lht->_IsLine, (int*)&rht->_IsLine);
}
#if 0
int CheckCapacity(HashTable* ht)
{
assert(ht);
if (ht->_total * 10 / ht->_capacity >= 7)
{
int newCapacity = ht->_capacity * 2;
Elem* pTemp = (Elem*)malloc(sizeof(Elem) * newCapacity);
if (NULL == pTemp)
{
assert(0);
return 0;
}
for (int i = 0; i < newCapacity; ++i)
pTemp[i]._state = EMPTY;
for (int i = 0; i < newCapacity; ++i)
{
if (EXSIT == ht->_table[i]._state)
{
int hashAddr = ht->_table[i]._data % newCapacity;
while (EMPTY != ht->_table[hashAddr]._state)
{
if (ht->_IsLine)
hashAddr = DetectiveLine(ht, hashAddr);
else
hashAddr = Detective2(ht, hashAddr, ++i);
}
pTemp[hashAddr]._data = ht->_table[i]._data;
pTemp[hashAddr]._state = EXSIT;
}
}
free(ht->_table);
ht->_table = pTemp;
ht->_capacity = newCapacity;
ht->_total = ht->_size;
}
return 1;
}
#endif
int CheckCapacity(HashTable* ht)
{
assert(ht);
if (ht->_total * 10 / ht->_capacity >= 7)
{
int i = 0;
//1、开辟新空间
HashTable newHt;
HashTableInit(&newHt, GetNextPrime(ht->_capacity), ht->_IsLine,ht->_pDTInt);
//2、拷贝元素
for (int i = 0; i < ht->_capacity; ++i)
{
if (EXSIT == ht->_table[i]._state)
HashTableInsert(&newHt, ht->_table[i]._data);
}
Swap(ht, &newHt);
HashTableDestroy(&newHt);
}
return 1;
}
void HashTableInit(HashTable* ht, int capacity, DETECTIVE IsLine, PDTInt pDTInt)
{
int i = 0;
assert(ht);
ht->_total = 0;
ht->_size = 0;
capacity = GetNextPrime(capacity);
ht->_table = (Elem*)malloc(sizeof(Elem)*GetNextPrime(capacity));
if (NULL == ht->_table)
{
assert(0);
return;
}
ht->_capacity = capacity;
for (; i < capacity; ++i)
{
ht->_table[i]._state = EMPTY;
}
ht->_IsLine = IsLine;
ht->_pDTInt = pDTInt;
}
void HashTableInsert(HashTable* ht, DataType data)
{
int hashAddr = -1;
int i = 0;
if (!CheckCapacity(ht))
return;
hashAddr = HashFunc(ht, data);
while (EMPTY != ht->_table[hashAddr]._state)
{
if (EXSIT == ht->_table[hashAddr]._state)
{
if (data == ht->_table[hashAddr]._data)
return;
}
if (LINE == ht->_IsLine)
hashAddr = DetectiveLine(ht,hashAddr);
else
hashAddr = Detective2(ht,hashAddr, ++i);
}
ht->_table[hashAddr]._data = data;
ht->_table[hashAddr]._state = EXSIT;
ht->_size++;
ht->_total++;
}
void HashTableDelete(HashTable* ht, DataType data)
{
int ret = HashTableFind(ht, data);
if (-1 != ret)
{
ht->_table[ret]._state = DELETE;
ht->_size -= 1;
}
}
int HashTableFind(HashTable* ht, DataType data)
{
int hashAddr = HashFunc(ht, data);
int starAddr = hashAddr;
int i = 0;
while (EMPTY != ht->_table[hashAddr]._state)
{
if (EXSIT == ht->_table[hashAddr]._state)
{
if (data == ht->_table[hashAddr]._data)
return hashAddr;
}
if (LINE == ht->_IsLine)
{
hashAddr = DetectiveLine(ht, hashAddr);
if (hashAddr == starAddr)
{
return -1;
}
}
else
hashAddr = Detective2(ht, hashAddr, ++i);
}
return -1;
}
int HashTableSize(HashTable* ht)
{
assert(ht);
return ht->_size;
}
int HashTableEmpty(HashTable* ht)
{
assert(ht);
return 0 == ht->_size;
}
void HashTableDestroy(HashTable* ht)
{
assert(ht);
if (ht->_table)
{
free(ht->_table);
ht->_capacity = 0;
ht->_size = 0;
ht->_total = 0;
}
}
void TestHashTable()
{
HashTable ht;
HashTableInit(&ht, 10, LINE, DataToIntInt);
HashTableInsert(&ht, 1);
HashTableInsert(&ht, 2);
HashTableInsert(&ht, 3);
HashTableInsert(&ht, 4);
HashTableInsert(&ht, 5);
HashTableInsert(&ht, 6);
HashTableInsert(&ht, 7);
HashTableInsert(&ht, 8);
HashTableInsert(&ht, 12);
HashTableInsert(&ht, 9);
printf("size= %d \n", HashTableSize(&ht));
if (-1 != HashTableFind(&ht, 12))
{
printf("12 is in hash table \n");
}
else
printf("12 is not in hash table \n");
HashTableDestroy(&ht);
}
void TestHashTable1()
{
HashTable ht;
HashTableInit(&ht, 10, LINE, DataToIntInt);
HashTableInsert(&ht, 1);
HashTableInsert(&ht, 2);
HashTableInsert(&ht, 3);
HashTableInsert(&ht, 4);
HashTableInsert(&ht, 5);
HashTableInsert(&ht, 6);
HashTableInsert(&ht, 7);
HashTableInsert(&ht, 8);
HashTableInsert(&ht, 9);
printf("size= %d \n", HashTableSize(&ht));
HashTableDelete(&ht, 9);
HashTableDelete(&ht, 8);
HashTableDelete(&ht, 7);
HashTableDelete(&ht, 6);
printf("size= %d \n", HashTableSize(&ht));
HashTableDestroy(&ht);
}
test.c
------
//#include "HashBucket.h"
//int main()
//{
//
// TESTHBT();
// system("pause");
//}
#include "HashTable.h"
int main()
{
//TestHashTable();
TestHashTable1();
system("pause");
}
程序运行图:
1、TestHashTable():
2、TestHashTable1():
3、TESTHBT():