魔兽哈希算法封装和测试
Jluzc2012.12.14
近期由于需要,研究了魔兽文件打包管理器的相关算法,重点对其文件索引表的生成和查找进行了研究:采用哈希表进行,在冲突方面的处理方面,采用线性探测再散列。在添加和查找过程中进行了三次哈希,第一个哈希值用来查找,后两个哈希值用来校验,这样可以大大减少冲突的几率。
这里对其进行了简单的封装,扩展时,仅仅需要对结构体进行扩展即可。更为详细的说明,参考代码:
一、类声明头文件
1. /
2. // Name: HashAlgo.h
3. // Purpose: 使用魔兽Hash算法,实现索引表的填充和查找功能。
4. // Author: jluzc
5. // Modified by:
6. // Created: 07/30/09
7. // RCS-ID: $Id: treetest.h 43021 2012-07-30 16:36:51Z VZ $
8. // Copyright: (C) Copyright 2009, TSong Corporation, All Rights Reserved.
9. // Licence:
10. /
11.
12. #define MAXFILENAME 255 // 最大文件名长度
13. #define MAXTABLELEN 1024 // 默认哈希索引表大小
14.
15. //
16. // 测试宏定义,正式使用时关闭
17. #define DEBUGTEST 1
18.
19. //
20. // 哈希索引表定义
21. typedef struct
22. {
23. long nHashA;
24. long nHashB;
25. bool bExists;
26. char test_filename[MAXFILENAME];
27. // ......
28. } MPQHASHTABLE;
29.
30. //
31. // 对哈希索引表的算法进行封装
32. class CHashAlgo
33. {
34. public:
35.
36. #if DEBUGTEST
37. long testid; // 测试之用
38. #endif
39.
40. CHashAlgo( const long nTableLength = MAXTABLELEN )// 创建指定大小的哈希索引表,不带参数的构造函数创建默认大小的哈希索引表
41. {
42. prepareCryptTable();
43. m_tablelength = nTableLength;
44.
45. m_HashIndexTable = new MPQHASHTABLE[nTableLength];
46. for ( int i = 0; i < nTableLength; i++ )
47. {
48. m_HashIndexTable[i].nHashA = -1;
49. m_HashIndexTable[i].nHashB = -1;
50. m_HashIndexTable[i].bExists = false;
51. m_HashIndexTable[i].test_filename[0] = '/0';
52. }
53. }
54.
55. void prepareCryptTable(); // 对哈希索引表预处理
56.
57. unsigned long HashString(char *lpszFileName, unsigned long dwHashType); // 求取哈希值
58. long GetHashTablePos( char *lpszString ); // 得到在定长表中的位置
59. bool SetHashTable( char *lpszString ); // 将字符串散列到哈希表中
60.
61. unsigned long GetTableLength(void);
62. void SetTableLength( const unsigned long nLength );
63.
64. ~CHashAlgo()
65. {
66. if ( NULL != m_HashIndexTable )
67. {
68. delete []m_HashIndexTable;
69. m_HashIndexTable = NULL;
70. m_tablelength = 0;
71. }
72. }
73. protected:
74.
75. private:
76. unsigned long cryptTable[0x500];
77. unsigned long m_tablelength; // 哈希索引表长度
78. MPQHASHTABLE *m_HashIndexTable;
79. };
二、类实现文件
[cpp:nogutter] view plaincopyprint?
1. /
2. // Name: HashAlgo.cpp
3. // Purpose: 使用魔兽Hash算法,实现索引表的填充和查找功能。
4. // Author: 陈相礼
5. // Modified by:
6. // Created: 07/30/09
7. // RCS-ID: $Id: treetest.h 43021 2009-07-30 16:36:51Z VZ $
8. // Copyright: (C) Copyright 2009, TSong Corporation, All Rights Reserved.
9. // Licence:
10. /
11.
12. #include "windows.h"
13. #include "HashAlgo.h"
14.
15. //
16. // 预处理
17. void CHashAlgo::prepareCryptTable()
18. {
19. unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i;
20.
21. for( index1 = 0; index1 < 0x100; index1++ )
22. {
23. for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100 )
24. {
25. unsigned long temp1, temp2;
26. seed = (seed * 125 + 3) % 0x2AAAAB;
27. temp1 = (seed & 0xFFFF) << 0x10;
28. seed = (seed * 125 + 3) % 0x2AAAAB;
29. temp2 = (seed & 0xFFFF);
30. cryptTable[index2] = ( temp1 | temp2 );
31. }
32. }
33. }
34.
35. //
36. // 求取哈希值
37. unsigned long CHashAlgo::HashString(char *lpszFileName, unsigned long dwHashType)
38. {
39. unsigned char *key = (unsigned char *)lpszFileName;
40. unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE;
41. int ch;
42.
43. while(*key != 0)
44. {
45. ch = toupper(*key++);
46.
47. seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2);
48. seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3;
49. }
50. return seed1;
51. }
52.
53. //
54. // 得到在定长表中的位置
55. long CHashAlgo::GetHashTablePos(char *lpszString)
56.
57. {
58. const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;
59. unsigned long nHash = HashString(lpszString, HASH_OFFSET);
60. unsigned long nHashA = HashString(lpszString, HASH_A);
61. unsigned long nHashB = HashString(lpszString, HASH_B);
62. unsigned long nHashStart = nHash % m_tablelength,
63. nHashPos = nHashStart;
64.
65. while ( m_HashIndexTable[nHashPos].bExists)
66. {
67. if (m_HashIndexTable[nHashPos].nHashA == nHashA && m_HashIndexTable[nHashPos].nHashB == nHashB)
68. return nHashPos;
69. else
70. nHashPos = (nHashPos + 1) % m_tablelength;
71.
72. if (nHashPos == nHashStart)
73. break;
74. }
75.
76. return -1; //没有找到
77. }
78. //
79. // 通过传入字符串,将相应的表项散列到索引表相应位置中去
80. bool CHashAlgo::SetHashTable( char *lpszString )
81. {
82. const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;
83. unsigned long nHash = HashString(lpszString, HASH_OFFSET);
84. unsigned long nHashA = HashString(lpszString, HASH_A);
85. unsigned long nHashB = HashString(lpszString, HASH_B);
86. unsigned long nHashStart = nHash % m_tablelength,
87. nHashPos = nHashStart;
88.
89. while ( m_HashIndexTable[nHashPos].bExists)
90. {
91. nHashPos = (nHashPos + 1) % m_tablelength;
92. if (nHashPos == nHashStart)
93. {
94.
95. #if DEBUGTEST
96. testid = -1;
97. #endif
98.
99. return false;
100. }
101. }
102. m_HashIndexTable[nHashPos].bExists = true;
103. m_HashIndexTable[nHashPos].nHashA = nHashA;
104. m_HashIndexTable[nHashPos].nHashB = nHashB;
105. strcpy( m_HashIndexTable[nHashPos].test_filename, lpszString );
106.
107. #if DEBUGTEST
108. testid = nHashPos;
109. #endif
110.
111. return true;
112. }
113.
114. //
115. // 取得哈希索引表长
116. unsigned long CHashAlgo::GetTableLength(void)
117. {
118. return m_tablelength;
119. }
120.
121. //
122. // 设置哈希索引表长
123. void CHashAlgo::SetTableLength( const unsigned long nLength )
124. {
125. m_tablelength = nLength;
126. return;
127. }
三、测试主文件
1. /
2. // Name: DebugMain.cpp
3. // Purpose: 测试Hash算法封装的类,完成索引表的填充和查找功能的测试。
4. // Author: 陈相礼
5. // Modified by:
6. // Created: 07/30/09
7. // RCS-ID: $Id: treetest.h 43021 2009-07-30 16:36:51Z VZ $
8. // Copyright: (C) Copyright 2009, TSong Corporation, All Rights Reserved.
9. // Licence:
10. /
11.
12. //
13. // 测试参数设定宏
14. #define TESTNUM 32
15.
16. #include <iostream>
17. #include <fstream>
18. #include "HashAlgo.h"
19.
20. using namespace std;
21.
22. //
23. // 测试主函数开始
24. int main( int argc, char **argv )
25. {
26. CHashAlgo hash_test( TESTNUM );
27.
28. cout << "取得初始化散列索引表长为:" << hash_test.GetTableLength() << endl;
29.
30. bool is_success = hash_test.SetHashTable( "test" );
31. if ( is_success )
32. {
33. cout << "散列结果一:成功!" << endl;
34. }
35. else
36. {
37. cout << "散列结果一:失败!" << endl;
38. }
39.
40. is_success = hash_test.SetHashTable( "测试" );
41. if ( is_success )
42. {
43. cout << "散列结果二:成功!" << endl;
44. }
45. else
46. {
47. cout << "散列结果二:失败!" << endl;
48. }
49.
50. long pos = hash_test.GetHashTablePos( "test" );
51. cout << "查找测试字符串:/"test/" 的散列位置:" << pos << endl;
52. pos = hash_test.GetHashTablePos( "测试" );
53. cout << "查找测试字符串:“测试” 的散列位置:" << pos << endl;
54.
55. //
56. // 散列测试
57. for ( int i = 0; i < TESTNUM; i++ )
58. {
59. char buff[32];
60. sprintf(buff, "abcdefg%d.", i);
61. is_success = hash_test.SetHashTable(buff);
62. is_success ? cout << buff << "散列结果:成功!位置:" << hash_test.testid << endl : cout << buff << "散列结果:失败!" << endl;
63. }
64. system( "pause" );
65. //
66. // 查找测试
67. for ( int i = 0; i < TESTNUM; i++ )
68. {
69. char buff[32];
70. sprintf(buff, "abcdefg%d.", i);
71. pos = hash_test.GetHashTablePos( buff );
72. pos != -1 ? cout << "查找测试字符串:"<< buff <<" 的散列位置:" << pos << endl : cout << buff << "存在冲突!" << endl;
73. }
74.
75. system( "pause" );
76. return 0;
77. }