一、概述
散列表(hash table)ADT只支持二叉查找树所允许的一部分操作,散列表的实现常常叫做散列(hashing)。散列是一种以常数平均时间执行插入、删除和查找的技术。但是,那些需要元素间任何排序信息的操作将不会得到有效的支持。
二、实现
理想的散列表数据结构只不过是一个包含有关关键字的具有固定大小的数组。典型情况下,一个关键字就是一个带有相关值的字符串。我们把表的大小记作TableSize,并将其理解为散列数据结构的一部分而不仅仅是浮动于全局的某个变量。通常的习惯是让表从0到TableSize - 1变化。每个关键字被映射到从0到TableSize - 1这个范围中的某个数,并且被放到适当的单元中。这个映射就叫做散列函数(hash function),理想情况下它应该运算简单并且应该保证任何两个不同的关键字映射到不同的单元。不过,这是不可能的,因为单元的数目是有限的,而关键字实际上是用不完的。因此,我们寻找一个散列函数,该函数要在单元之间均匀地分配关键字。
这就是散列的基本想法。剩下的问题则是要选择一个函数,决定当两个关键字散列到同一个值的时候(称为冲突(collision))应该做什么以及如何确定散列表的大小。
如果输入的关键字是整数,则一般合理的方法就是直接返回“Key mod Tablesize”的结果,除非Key碰巧具有某些不理想的性质。在这种情况下,散列函数的选择需要仔细考虑。
解决冲突的方法有几种,我们将讨论其中最简单的两种:分离链接法和开放定址法。
1. 分离链接法
其做法是将散列到同一个值的所有元素保留到一个表中,为方便起见,这些表都有表头(表的大小不是素数,用在这里是为了简单),如图1所示。
图1 分离链接散列表
文件名:hashsep.h
#ifndef _HashSep_H
typedef int ElementType;
typedef unsigned int Index;
struct ListNode;
typedef struct ListNode *Position;
struct HashTbl;
typedef struct HashTbl *HashTable;
Index Hash( const int Key, int TableSize );
HashTable InitializeTable( int TableSize );
void DestroyTable( HashTable H );
Position Find( ElementType Key, HashTable H );
void Insert( ElementType Key, HashTable H );
ElementType Retrieve( Position P );
/* Routines such as Delete and MakeEmpty are omitted */
#endif /* _HashSep_H */
文件名:hashsep.c
#include "fatal.h"
#include "hashsep.h"
#define MinTableSize (10)
typedef Position List;
Index Hash( const int Key, int TableSize )
{
return Key % TableSize;
}
struct ListNode
{
ElementType Element;
Position Next;
};
/* List *TheList will be an array of lists,allocated later */
/* The lists use headers (for simplicity), */
/* though this wastes space */
struct HashTbl
{
int TableSize;
List *TheLists;
};
/* Return next prime; assume N >= 10 */
static int
NextPrime( int N )
{
int i;
if( N % 2 == 0 )
N++;
for( ; ; N += 2 )
{
for( i = 3; i * i <= N; i += 2 )
if( N % i == 0 )
goto ContOuter; /* Sorry about this! */
return N;
ContOuter: ;
}
}
HashTable
InitializeTable( int TableSize )
{
HashTable H;
int i;
if ( TableSize < MinTableSize )
{
Error( "Table size too small" );
return NULL;
}
/* Allocate table */
H = malloc( sizeof( struct HashTbl ) );
if ( H == NULL)
FatalError( "Out of space!!!" );
H->TableSize = NextPrime( TableSize );
/* Allocate array of lists */
H->TheLists = malloc( sizeof( List ) * H->TableSize );
if( H->TheLists == NULL )
FatalError( "Out of space!!!" );
/* Allocate list headers */
for( i = 0; i < H->TableSize; i++ )
{
H->TheLists[ i ] = malloc( sizeof( struct ListNode ) );
if ( H->TheLists[ i ] == NULL )
FatalError( "Out of space!!!" );
else
H->TheLists[ i ]->Next = NULL;
}
return H;
}
Position
Find( ElementType Key, HashTable H )
{
Position P;
List L;
L = H->TheLists[ Hash( Key, H->TableSize )];
P = L->Next;
while( P != NULL && P->Element != Key )
/* Probably need strcmp!! */
P = P->Next;
return P;
}
void
Insert( ElementType Key, HashTable H )
{
Position Pos, NewCell;
List L;
Pos = Find( Key, H );
if ( Pos == NULL ) /* Key is not found */
{
NewCell = malloc( sizeof( struct ListNode ) );
if ( NewCell == NULL )
FatalError( "Out of space!!!" );
else
{
L = H->TheLists[ Hash( Key, H->TableSize ) ];
NewCell->Next = L->Next;
NewCell->Element = Key; /* Probably need strcpy!! */
L->Next = NewCell;
}
}
}
void
DestroyTable( HashTable H )
{
int i;
for( i = 0; i < H->TableSize; i++ )
{
Position P = H->TheLists[ i ];
Position Tmp;
while( P != NULL )
{
Tmp = P->Next;
free( P );
P = Tmp;
}
}
free( H->TheLists );
free( H );
}
ElementType Retrieve( Position P )
{
return P->Element;
}
文件名:main.c
#include "hashsep.h"
#include <stdio.h>
int main()
{
HashTable H = InitializeTable( 10 );
int i;
printf( "HashTable:\n" );
for ( i = 1; i < 11; i++ )
{
Insert( i * i, H );
printf( "%d:%d\n", i*i, Hash( i * i, 10 ) );
}
return 0;
}
2. 开放定址法
分离链接散列算法的缺点是需要指针,由于给新单元分配地址需要时间,因此就导致算法的速度多少有些减慢,同时算法实际上还要求对另一种数据结构的实现。除使用链表解决冲突外,开放定址散列法(Open addressing hashing)是另外一种不用链表解决冲突的方法。在开放定址散列算法系统中,如果有冲突发生,那么就要尝试选择另外的单元,直到找出空的单元为止。
文件名:hashquad.h
#ifndef _HashQuad_H
typedef int ElementType;
typedef unsigned int Index;
typedef Index Position;
struct HashTbl;
typedef struct HashTbl *HashTable;
static int NextPrime( int N );
Index Hash( ElementType Key, int TableSize );
HashTable InitializeTable( int TableSize );
void DestroyTable( HashTable H );
Position Find( ElementType Key, HashTable H );
void Insert( ElementType Key, HashTable H );
ElementType Retrieve( Position P, HashTable H );
HashTable Rehash( HashTable H );
/* Routines such as Delete and MakeEmpty are omitted */
#endif /* _HashQuad_H */
文件名:hashquad.c
#include "hashquad.h"
#include "fatal.h"
#define MinTableSize (10)
enum KindOfEntry { Legitimate, Empty, Deleted };
struct HashEntry
{
ElementType Element;
enum KindOfEntry Info;
};
typedef struct HashEntry Cell;
/* Cell *TheCells will be an array of */
/* HashEntry cells, allocated later */
struct HashTbl
{
int TableSize;
Cell *TheCells;
};
/* Return next prime; assume N >= 10 */
static int
NextPrime( int N )
{
int i;
if( N % 2 == 0 )
N++;
for( ; ; N += 2 )
{
for( i = 3; i * i <= N; i += 2 )
if( N % i == 0 )
goto ContOuter; /* Sorry about this! */
return N;
ContOuter: ;
}
}
/* Hash function for ints */
Index
Hash( ElementType Key, int TableSize )
{
return Key % TableSize;
}
HashTable
InitializeTable( int TableSize )
{
HashTable H;
int i;
if ( TableSize < MinTableSize )
{
Error( "Table size too small!" );
return NULL;
}
/* Allocate table */
H = malloc( sizeof( struct HashTbl ) );
if ( H == NULL )
FatalError( "Out of space!!!" );
H->TableSize = NextPrime( TableSize );
/* Allocate array of Cells */
H->TheCells = malloc( sizeof( Cell ) * H->TableSize );
if ( H->TheCells == NULL )
FatalError( "Out of space!!!" );
for ( i = 0; i < H->TableSize; i++ )
H->TheCells[ i ].Info = Empty;
return H;
}
Position
Find( ElementType Key, HashTable H )
{
Position CurrentPos;
int CollisionNum;
CollisionNum = 0;
CurrentPos = Hash( Key, H->TableSize );
while ( H->TheCells[ CurrentPos ].Info != Empty &&
H->TheCells[ CurrentPos ].Element != Key )
/* Probably need strcpy! */
{
CurrentPos += 2 * ++CollisionNum - 1;
if ( CurrentPos >= H->TableSize )
CurrentPos -= H->TableSize;
}
return CurrentPos;
}
void
Insert( ElementType Key, HashTable H )
{
Position Pos;
Pos = Find( Key, H );
if ( H->TheCells[ Pos ].Info != Legitimate )
{
/* Ok to insert here */
H->TheCells[ Pos ].Info = Legitimate;
H->TheCells[ Pos ].Element = Key; /* Probably need strcpy! */
}
}
HashTable
Rehash( HashTable H )
{
int i, OldSize;
Cell *OldCells;
OldCells = H->TheCells;
OldSize = H->TableSize;
/* Get a new, empty table */
H = InitializeTable( 2 * OldSize );
/* Scan through old table, reinserting into new */
for( i = 0; i < OldSize; i++ )
if ( OldCells[ i ].Info == Legitimate )
Insert( OldCells[ i ].Element, H );
free( OldCells );
return H;
}
ElementType
Retrieve( Position P, HashTable H )
{
return H->TheCells[ P ].Element;
}
void
DestroyTable( HashTable H )
{
free( H->TheCells );
free( H );
}
文件名:main.c
#include "hashquad.h"
#include <stdio.h>
int main()
{
HashTable H = InitializeTable( 10 );
int i;
printf( "Hash Table: \n" );
for ( i = 1; i < 11; i++ )
{
Insert( i * i, H );
printf( "%d:%d\n", i*i, Hash( i * i, 10 ) );
}
return 0;
}
附录:上述代码中用到了Error、FatalError等函数,其实现如下(即fatal.h文件):
#include <stdio.h>
#include <stdlib.h>
#define Error( Str ) FatalError( Str )
#define FatalError( Str ) fprintf( stderr, "%s\n", Str ), exit( 1 )
备注:本文摘自《数据结构与算法分析 C语言描述 Mark Allen Weiss著》,代码经gcc编译测试通过。
附件下载:http://download.csdn.net/detail/shuxiao9058/4212416#hashsep_20120406.tar.gz ,http://download.csdn.net/detail/shuxiao9058/4212417# hashquad_20120406.tar.gz