Hashes
问题:你有一个很大的字符串数组。需要知道另外一个字符串是否在这个字符串数组中。你可能会将这个字符串与数组中的字符串依次作比较。但是实际中,你会发现这种方法太慢。必须找其它的方法。但是除了依次比较字符串外,还有没有其它方法来知道某个字符串是否存在呢?
解决方案: Hashes。 Hashes是用小的数据类型(如,数字)来表示其它大的数据类型(通常是字符串)。在这种情形下,你可能将字符串存储在hash数组中。然后你可以计算要查找字符串的hash值,用这个hash值与数组中的hash值进行比较。如果在hash数组中有一个hash值与这个新的要查询的hash值相等,则证实这个字符串存在。这个方法, 称为索引(indexing)。
#include <stdio.h>
#include <stdlib.h>
//#include "hash.h"
unsigned long cryptTable[0x500];
typedef struct hash_s {
unsigned long nHash;
unsigned long nHashA;
unsigned long nHashB;
int bExists;
}hash_t;
#define FALSE 0
#define TRUE 1
#define HASH_TABLE_ARRAY_SIZE 1024
hash_t lpTable[HASH_TABLE_ARRAY_SIZE];
unsigned long HashString(char *lstring, unsigned long dwHashType)
{
unsigned char *key = (unsigned char *)lstring;
unsigned long seed1 = 0x7FED7FED,seed2 = 0xEEEEEEEE;
int ch;
while(*key != 0)
{
ch = toupper(*key++);
seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2);
seed2 = ch + seed1 +seed2 + (seed2 << 5) + 3;
}
return seed1;
}
int InsertHashTable(char *lpszString, hash_t *lpTable, int nTableSize)
{
const unsigned long HASH_OFFSET = 0,HASH_A = 1, HASH_B = 2;
unsigned long nHash,nHashA,nHashB;
int nHashPos;
int nHashInsertPos = -1;
int nHashStart;
nHash = HashString(lpszString,HASH_OFFSET);
nHashPos = nHash % nTableSize;
nHashStart = nHashPos;
nHashA = HashString(lpszString,HASH_A);
nHashB = HashString(lpszString,HASH_B);
do {
if(lpTable[nHashPos].bExists) {
if(lpTable[nHashPos].nHashA == nHashA && lpTable[nHashPos].nHashB == nHashB)
return 0;/*the same elements had been inserted*/
}else{
nHashInsertPos = nHashPos;
break;
}
nHashPos++;
nHashPos = nHashPos % HASH_TABLE_ARRAY_SIZE;
}while(nHashStart != nHashPos);
if(nHashInsertPos != -1) {
lpTable[nHashInsertPos].nHash = nHash;
lpTable[nHashInsertPos].nHashA = nHashA;
lpTable[nHashInsertPos].nHashB = nHashB;
lpTable[nHashInsertPos].bExists = TRUE;
printf("nHashInsertPos = %d\n",nHashInsertPos);
}
return 0;
}
int GetHashTablePos(char *lpszString, hash_t *lpTable, int nTableSize)
{
const int HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;
unsigned long nHash = HashString(lpszString, HASH_OFFSET), nHashA = HashString(lpszString, HASH_A), nHashB = HashString(lpszString, HASH_B), nHashStart = nHash % nTableSize, n
HashPos = nHashStart;
printf("nHashStart = %d\n",nHashStart);
while (lpTable[nHashPos].bExists)
{
if (lpTable[nHashPos].nHashA == nHashA && lpTable[nHashPos].nHashB == nHashB)
return nHashPos;
else
nHashPos = (nHashPos + 1) % nTableSize;
if (nHashPos == nHashStart)
break;
}
return -1; //Error value
}
void prepareCryptTable()
{
unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i;
for(index1 = 0; index1 < 0x100; index1++)
{
for(index2 = index1, i = 0; i < 5; i++, index2 += 0x100)
{
unsigned long temp1, temp2;
seed = (seed * 125 + 3) % 0x2AAAAB;
temp1 = (seed & 0xFFFF) << 0x10;
seed = (seed * 125 + 3) % 0x2AAAAB;
temp2 = (seed & 0xFFFF);
cryptTable[index2] = (temp1 | temp2);
}
}
}
int main(int argc, char *argv[])
{
int i = 0;
int ret = 0;
prepareCryptTable();
/* for ( ; i < 0x500; i++ )
{
if ( i % 10 == 0 )
{
printf("\n");
}
printf("%-12X", cryptTable[i] );
}
printf("\n");
*/
printf("the hash value of arr\\units.dat is 0x%x\n",HashString("arr\\units.dat",0));
ret = InsertHashTable("arr\\units.dat",lpTable,HASH_TABLE_ARRAY_SIZE);
if(ret != 0) {
printf("InsertHashTable error\n");
}
ret = GetHashTablePos("arr\\units.dat",lpTable,HASH_TABLE_ARRAY_SIZE);
printf("ret = %d\n",ret);
return 0;
}
参考文档:
1.http://blog.csdn.net/v_JULY_v/article/details/6256463
2.http://sfsrealm.hopto.org/inside_mopaq/chapter2.htm