昨晚看了一篇关于暴雪三重hash算法的文章,有意实践一下。
以前做字符串hash的时候想到过用另一个hash_key去区别同位置的字符串(用strcmp来比较两个字符串是否相等真的好慢),但是因为理论上不可能有hash_key可以唯一的让一个string区别与另一个string,所以一直认为这么做不可行。一直用开地址链表的方法做hash.
看完暴雪三重hash这个文章之后,感觉只要选择两个(或者更多)好的hash函数,就可以在很低很低的错误概率下(如果程序要求绝对精确,则此法失效),通过两个key值来区别不同的string.
poj3297 open source
题目大意:
一个公告板,上面可以写开源项目的标题(大写字母),然后想参与的人可以写上自己的名字(小写字母),最后统计每个项目参与的人数。
如果有的人在不同的项目下面都写了名,则这个人名不计数。
poj3297 open source
我的程序:
/*
* =====================================================================================
*
* Filename: 3297.c
*
* Description:
*
* Version: 1.0
* Created: 2012年03月13日 18时08分33秒
* Revision: none
* Compiler: gcc
*
* Author: MaZheng (blog.csdn.net/mazheng1989), mazheng19891019@gmail.com
* Company: Dalian University Of Technology
*
* =====================================================================================
*/
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#define LEN 20 /* */
#define HASH_TABLE_LEN 9999 /* */
#define PROJECT_NUM 100 /* */
//please declare parameters here.
struct Project{
char name[LEN];
int num;
}projects[PROJECT_NUM];
struct HashNode{ //we use key_A and key_B to diff the strings
unsigned int key_A; //key_A
unsigned int key_B; //key_B
int project_ID; //which Project this string first emerged in
//if this node is NULL,Project_ID is -1
}HashTable[HASH_TABLE_LEN];
int project_index;
//please declare functions here.
void init_hashtable()
{
int i=0;
for(i=0;i<HASH_TABLE_LEN;i++)
{
HashTable[i].project_ID=-1;
}
}
// RS Hash Function
unsigned int RSHash( char * str)
{
unsigned int b = 378551 ;
unsigned int a = 63689 ;
unsigned int hash = 0 ;
while ( * str)
{
hash = hash * a + ( * str ++ );
a *= b;
}
return (hash & 0x7FFFFFFF );
}
// JS Hash Function
unsigned int JSHash( char * str)
{
unsigned int hash = 1315423911 ;
while ( * str)
{
hash ^= ((hash << 5 ) + ( * str ++ ) + (hash >> 2 ));
}
return (hash & 0x7FFFFFFF );
}
// ELF Hash Function
unsigned int ELFHash( char * str)
{
unsigned int hash = 0 ;
unsigned int x = 0 ;
while ( * str)
{
hash = (hash << 4 ) + ( * str ++ );
if ((x = hash & 0xF0000000L ) != 0 )
{
hash ^= (x >> 24 );
hash &= ~ x;
}
}
return (hash & 0x7FFFFFFF );
}
void count_student(char str[20],int project_ID)
{
unsigned int key=RSHash(str)%HASH_TABLE_LEN;
unsigned int key_A=JSHash(str);
unsigned int key_B=ELFHash(str);
while(HashTable[key].project_ID!=-1)
{
if(key_A==HashTable[key].key_A&&key_B==HashTable[key].key_B)
{
if(HashTable[key].project_ID==project_ID)
{
return;
}
else if(HashTable[key].project_ID==-2)
{
return;
}
else
{
projects[HashTable[key].project_ID].num--;
HashTable[key].project_ID=-2;
}
return;
}
key=(key+1)%HASH_TABLE_LEN;
}
HashTable[key].key_A=key_A;
HashTable[key].key_B=key_B;
HashTable[key].project_ID=project_ID;
projects[project_ID].num++;
}
int compare(const void *a,const void *b)
{
struct Project *p1=(struct Project *)a;
struct Project *p2=(struct Project *)b;
if(p1->num!=p2->num)
return p2->num-p1->num;
return strcmp(p1->name,p2->name);
}
void output()
{
qsort(projects,project_index+1,sizeof(struct Project),compare);
int i;
for(i=0;i<=project_index;i++)
{
int str_len=strlen(projects[i].name);
projects[i].name[str_len-1]='\0';
printf("%s %d\n",projects[i].name,projects[i].num);
}
}
int main()
{
if(freopen("input.txt","r",stdin)==NULL) perror("Can not open the input file!");
//input your ...
char input[LEN];
while(fgets(input,LEN,stdin)&&input[0]!='0')
{
init_hashtable();
strcpy(projects[0].name,input);
// printf("project name:%s\n",input);
projects[0].num=0;
project_index=0;
while(fgets(input,LEN,stdin)&&input[0]!='1')
{
if(input[0]>='A'&&input[0]<='Z')//project name
{
project_index++;
strcpy(projects[project_index].name,input);
// printf("project name:%s\n",input);
projects[project_index].num=0;
}
else
{
count_student(input,project_index);
// printf("stduent name:%s\n",input);
}
}
output();
}
return 0;
}
这个程序刷到了头一页上(本人很少程序能到第一页),很高兴。
不要说你改进了那个算法,把效率提高了几倍!好的算法是可以几十倍,几百倍的提高效率的! ---杨老师。