GNU工具-gperf详解(完美哈希函数生成器)

1.gperf是干什么的?

GNU gperf is a perfect hash function generator. For a given list of strings,it produces a hash function and hash table, in form of C or C++ code, forlooking up a value depending on the input string. The hash function isperfect, which means that the hash table has no collisions, and the hash table lookup needs a single string comparison only.

GNU gperf是完美哈希函数生成器。对于一个给定的字符串集,它会生成哈希函数和哈希表。以c或cpp代码的形式,通过输入的字符串来实现查询操作。产生的哈希函数是完美的,意味着哈希表没有冲突,并且查询操作仅仅需要一次比较操作。


2.下载安装(windows版)

下载地址:点击此处

下载后直接双击安装

设置环境变量 把安装的bin文件目录设置到path中



3.使用方法

  gperf可以根据输入文件中的字符串集合生成hash函数和哈希表(生成完美哈希),一般根据字符串计算出无冲突hash值,然后进行字符串比较

 1.配置输入文件

输入文件格式实例c.gperf

if
do
int
for
case
char
auto
goto
else
long
void
enum
float
short
union
break
while
const
double
static
extern
struct
return
sizeof
switch
signed
typedef
default
unsigned
continue
register
volatile

jscript.h:(需要的头文件)

 enum
 {
    TK_ABSTRACT = 255,
    TK_BOOLEAN,
    TK_BREAK,
    TK_BYTE,
    TK_CASE,
    TK_CATCH,
    TK_CHAR,
    TK_CLASS,
    TK_CONST,
    TK_CONTINUE,
    TK_DEFAULT,
    TK_DO,
    TK_DOUBLE,
    TK_ELSE,
    TK_EXTENDS,
    TK_FALSE,
    TK_FINAL,
    TK_FINALLY,
    TK_FLOAT,
    TK_FOR,
    TK_FUNCTION,
    TK_GOTO,
    TK_IF,
    TK_IMPLEMENTS,
    TK_IMPORT,
    TK_IN,
    TK_INSTANCEOF,
    TK_INT,
    TK_INTERFACE,
    TK_LONG,
    TK_NATIVE,
    TK_NEW,
    TK_NULL,
    TK_PACKAGE,
    TK_PRIVATE,
    TK_PROTECTED,
    TK_PUBLIC,
    TK_RETURN,
    TK_SHORT,
    TK_STATIC,
    TK_SUPER,
    TK_SWITCH,
    TK_SYNCHRONIZED,
    TK_THIS,
    TK_THROW,
    TK_THROWS,
    TK_TRANSIENT,
    TK_TRUE,
    TK_TRY,
    TK_VAR,
    TK_VOID,
    TK_WHILE,
    TK_WITH,
};



jscript.gperf文件实例:

%{
/* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf */
/* -g -o -j1 -t -p */
#include <stdio.h>
#include "jscript.h"
%}
struct js_keyword {
  char * name;
  int token;
};

%%
# Javascript reserved words, see "keywords.html"
abstract, TK_ABSTRACT
boolean, TK_BOOLEAN
break, TK_BREAK
byte, TK_BYTE
case, TK_CASE
catch, TK_CATCH
char, TK_CHAR
class, TK_CLASS
const, TK_CONST
continue, TK_CONTINUE
default, TK_DEFAULT
do, TK_DO
double, TK_DOUBLE
else, TK_ELSE
extends, TK_EXTENDS
false, TK_FALSE
final, TK_FINAL
finally, TK_FINALLY
float, TK_FLOAT
for, TK_FOR
function, TK_FUNCTION
goto, TK_GOTO
if, TK_IF
implements, TK_IMPLEMENTS
import, TK_IMPORT
in, TK_IN
instanceof, TK_INSTANCEOF
int, TK_INT
interface, TK_INTERFACE
long, TK_LONG
native, TK_NATIVE
new, TK_NEW
null, TK_NULL
package, TK_PACKAGE
private, TK_PRIVATE
protected, TK_PROTECTED
public, TK_PUBLIC
return, TK_RETURN
short, TK_SHORT
static, TK_STATIC
super, TK_SUPER
switch, TK_SWITCH
synchronized, TK_SYNCHRONIZED
this, TK_THIS
throw, TK_THROW
throws, TK_THROWS
transient, TK_TRANSIENT
true, TK_TRUE
try, TK_TRY
var, TK_VAR
void, TK_VOID
while, TK_WHILE
with, TK_WITH
%%

int main(void)  //要写的程序也可以写到文件中,从而后来生成真正的程序
{
    char *js_word[] = {"protected", "throws", "with", 0};
    char *no_js_word[] = {"protectef", "throwd", "witp", 0};

    char ** cp;

    cp = &js_word[0];
    while (*cp != NULL)
    {
        struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp));
        printf("%s is %s javascript expression!\n", *cp,p? "in":"not in");
        printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in");
        printf("%s->%d\n",p->name,p->token);
        cp++;
    }

    cp = &no_js_word[0];
    while (*cp != NULL)
    {
        printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in");
        cp++;
    }
    return 0;
}

2.用配置文件生成程序源文件(注意,gperf哈希函数和哈希表都是硬编码到此生成文件中)

D:\study\gperf\example>gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf  > jscript.c

切换到当前目录,通过设置的jscript.gperf文件生成 jscript.c源文件

/* C code produced by gperf version 3.0.1 */
/* Command-line: gperf -k'1,2,$' -t -K name -H js_kw_hash -N js_kw_lookup jscript.gperf  */

#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646.  */
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
#endif

#line 1 "jscript.gperf"

/* Command-line: gperf -k"1,2,$" -t -K "name" -H "js_kw_hash" -N "js_kw_lookup" jscript.gperf */
/* -g -o -j1 -t -p */
#include <stdio.h>
#include "jscript.h"
#line 7 "jscript.gperf"
struct js_keyword {
  char * name;
  int token;
};

#define TOTAL_KEYWORDS 53  //统计记录总数
#define MIN_WORD_LENGTH 2  //字符串最小长度
#define MAX_WORD_LENGTH 12 //字符串最大长度
#define MIN_HASH_VALUE 2   //最小哈希值,对应于wordlist的索引值
#define MAX_HASH_VALUE 79  //最大哈希值,对应于wordlist的索引值
/* maximum key range = 78, duplicates = 0 */

#ifdef __GNUC__
__inline
#else
#ifdef __cplusplus
inline
#endif
#endif
static unsigned int
js_kw_hash (str, len)
     register const char *str;
     register unsigned int len;
{
  static unsigned char asso_values[] =       /*定义包含255个字符的数组,用MAX_HASH_VALUE+1初始化*/
    {
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 25, 35, 25,
      20,  0, 10, 50,  0,  0, 80, 15, 15, 45,
       0, 10,  5, 80, 15,  5,  5, 40,  5,  0,
       0, 30, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80, 80, 80, 80, 80,
      80, 80, 80, 80, 80, 80
    };
  return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]] + asso_values[(unsigned char)str[len - 1]];  //最重要的hash函数,计算wordlist索引
}

#ifdef __GNUC__
__inline
#endif
struct js_keyword *
js_kw_lookup (str, len)
     register const char *str;
     register unsigned int len;
{
  static struct js_keyword wordlist[] =     //定义hash表
    {
      {""}, {""},
#line 39 "jscript.gperf"
      {"in", TK_IN},
#line 45 "jscript.gperf"
      {"new", TK_NEW},
#line 66 "jscript.gperf"
      {"with", TK_WITH},
#line 65 "jscript.gperf"
      {"while", TK_WHILE},
      {""}, {""},
#line 41 "jscript.gperf"
      {"int", TK_INT},
#line 42 "jscript.gperf"
      {"interface", TK_INTERFACE},
#line 58 "jscript.gperf"
      {"throw", TK_THROW},
#line 55 "jscript.gperf"
      {"switch", TK_SWITCH},
#line 28 "jscript.gperf"
      {"extends", TK_EXTENDS},
      {""},
#line 57 "jscript.gperf"
      {"this", TK_THIS},
#line 52 "jscript.gperf"
      {"short", TK_SHORT},
#line 59 "jscript.gperf"
      {"throws", TK_THROWS},
      {""}, {""},
#line 27 "jscript.gperf"
      {"else", TK_ELSE},
#line 40 "jscript.gperf"
      {"instanceof", TK_INSTANCEOF},
#line 51 "jscript.gperf"
      {"return", TK_RETURN},
#line 36 "jscript.gperf"
      {"if", TK_IF},
      {""},
#line 61 "jscript.gperf"
      {"true", TK_TRUE},
      {""}, {""},
#line 48 "jscript.gperf"
      {"private", TK_PRIVATE},
      {""}, {""},
#line 30 "jscript.gperf"
      {"final", TK_FINAL},
#line 44 "jscript.gperf"
      {"native", TK_NATIVE},
#line 24 "jscript.gperf"
      {"default", TK_DEFAULT},
      {""},
#line 60 "jscript.gperf"
      {"transient", TK_TRANSIENT},
#line 32 "jscript.gperf"
      {"float", TK_FLOAT},
#line 26 "jscript.gperf"
      {"double", TK_DOUBLE},
#line 47 "jscript.gperf"
      {"package", TK_PACKAGE},
#line 33 "jscript.gperf"
      {"for", TK_FOR},
#line 64 "jscript.gperf"
      {"void", TK_VOID},
#line 29 "jscript.gperf"
      {"false", TK_FALSE},
#line 53 "jscript.gperf"
      {"static", TK_STATIC},
#line 25 "jscript.gperf"
      {"do", TK_DO},
#line 23 "jscript.gperf"
      {"continue", TK_CONTINUE},
#line 20 "jscript.gperf"
      {"char", TK_CHAR},
#line 22 "jscript.gperf"
      {"const", TK_CONST},
      {""},
#line 31 "jscript.gperf"
      {"finally", TK_FINALLY},
#line 63 "jscript.gperf"
      {"var", TK_VAR},
#line 49 "jscript.gperf"
      {"protected", TK_PROTECTED},
#line 21 "jscript.gperf"
      {"class", TK_CLASS},
      {""},
#line 15 "jscript.gperf"
      {"boolean", TK_BOOLEAN},
#line 62 "jscript.gperf"
      {"try", TK_TRY},
#line 18 "jscript.gperf"
      {"case", TK_CASE},
#line 19 "jscript.gperf"
      {"catch", TK_CATCH},
#line 38 "jscript.gperf"
      {"import", TK_IMPORT},
      {""},
#line 34 "jscript.gperf"
      {"function", TK_FUNCTION},
#line 46 "jscript.gperf"
      {"null", TK_NULL},
#line 37 "jscript.gperf"
      {"implements", TK_IMPLEMENTS},
      {""}, {""}, {""}, {""},
#line 54 "jscript.gperf"
      {"super", TK_SUPER},
      {""},
#line 56 "jscript.gperf"
      {"synchronized", TK_SYNCHRONIZED},
      {""},
#line 17 "jscript.gperf"
      {"byte", TK_BYTE},
#line 16 "jscript.gperf"
      {"break", TK_BREAK},
      {""}, {""},
#line 14 "jscript.gperf"
      {"abstract", TK_ABSTRACT},
#line 35 "jscript.gperf"
      {"goto", TK_GOTO},
      {""},
#line 50 "jscript.gperf"
      {"public", TK_PUBLIC},
      {""}, {""},
#line 43 "jscript.gperf"
      {"long", TK_LONG}
    };

  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
    {
      register int key = js_kw_hash (str, len);

      if (key <= MAX_HASH_VALUE && key >= 0)
        {
          register const char *s = wordlist[key].name;

          if (*str == *s && !strcmp (str + 1, s + 1))
            return &wordlist[key];
        }
    }
  return 0;
}
#line 67 "jscript.gperf"


int main(void)
{
    char *js_word[] = {"protected", "throws", "with", 0};
    char *no_js_word[] = {"protectef", "throwd", "witp", 0};

    char ** cp;

    cp = &js_word[0];
   while (*cp != NULL)
    {
        struct js_keyword* p = js_kw_lookup(*cp, strlen(*cp));
        printf("%s is %s javascript expression!\n", *cp,p? "in":"not in");
        printf("%s is %s javascript expression!\n", *cp,p ? "in":"not in");
        printf("%s->%d\n",p->name,p->token);
        cp++;
    }

    cp = &no_js_word[0];
    while (*cp != NULL)
    {
        printf("%s is %s javascript expression!\n", *cp, js_kw_lookup(*cp, strlen(*cp))? "in":"not in");
        cp++;
    }
 return 0;
}

我们主要关注两个函数:

static unsigned int js_kw_hash (str, len):根据传入的字符串生成哈希值

struct js_keyword * js_kw_lookup (str, len):根据传入的字符串进行查找操作


编译



运行:




详解参见:https://www.ibm.com/developerworks/cn/linux/l-gperf.html

  • 2
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

五癫

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值