GNU hsearch (I)

用JNI封装使用了STL容器(map和vector)的C++代码,触发了glibc 2.12版本之前版本中存在的一个bug。故寻找STL map的简单替代方案。基于GNU hsearch(ref. man 3 hsearch),封装了一个简单的hash table class。

关于glibc的那个bug,可参见:

https://bugzilla.redhat.com/show_bug.cgi?id=752122

https://bugzilla.redhat.com/show_bug.cgi?id=751331

https://lists.debian.org/debian-glibc/2011/01/msg00074.html

http://mail.openjdk.java.net/pipermail/hotspot-compiler-dev/2013-March/010002.html


关于一些现成的hash table的简单对比,看参见:

http://preshing.com/20110603/hash-table-performance-tests/


简单示例(g++ -g -W -Wall simple_ht_example.cpp SimpleHashtab.o):

#include "SimpleHashtab.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>


int main()
{
    Hashtab ht(16);

    printf("size[%u] filled[%u]\n", ht.getSize(), ht.getFilled());

    ht.insert("rtest", "111.111.111.111:9092,112.112.112.112:9092");
    ht.insert("test", "222.222.222.222:9092,221.221.221.221:9092");

    printf("size[%u] filled[%u]\n", ht.getSize(), ht.getFilled());

    ENTRY *ep = ht.find("rtest");

    printf("key[%s] data[%s]\n", ht.getEntryKey(ep), (char *)ht.getEntryData(ep));

    ht.replace("rtest", "999.999.999.999:9092,998.998.998.998:9092", GC_NONE);

    printf("size[%u] filled[%u]\n", ht.getSize(), ht.getFilled());

    ep = ht.find("rtest");

    printf("key[%s] data[%s]\n", ht.getEntryKey(ep), (char *)ht.getEntryData(ep));

    //ht.erase("test", GC_NONE);

    //printf("size[%u] filled[%u]\n", ht.getSize(), ht.getFilled());

    char **keys = (char **)malloc((ht.getFilled() + 4) * sizeof(char *));

    ht.getKeys(keys, ht.getFilled() + 4);

    for (unsigned int i = 0; i < ht.getFilled() + 4; i++) {
        printf("[%s]\n", keys[i] ? keys[i] : "nil");
    }

    free(keys);

    return 0;
}



封装代码如下(g++ -g -W -Wall -c SimpleHashtab.c):

#ifndef _SIMPLEHASHTAB_H
#define _SIMPLEHASHTAB_H


#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
#endif

#include <search.h>


#ifdef __cplusplus
extern "C" {
#endif

typedef enum {
    FIND2,
    ENTER2,
    REPLACE,
    RELEASE
} ACTION2;

// from glibc/misc
typedef struct _ENTRY
{
  unsigned int used;
  ENTRY entry;
}
_ENTRY;

typedef enum {
    GC_NONE = 0,
    GC_KEY_BY_FREE = 0x01,
    GC_KEY_BY_DELETE = 0x02,
    GC_DATA_BY_FREE = 0x04,
    GC_DATA_BY_DELETE = 0x08 // nonsenses
} GC_MANUAL ;

enum {
    HT_ERR_BAD_ARG = -100,
    HT_ERR_SHORT_SPACE = -101
};

#ifdef __cplusplus
}
#endif


#if 0
// TODO
typedef int (*ht_walk_fn)(const char *key, void *data, void *uarg);
#endif


class Hashtab {
private:
    struct hsearch_data htab_;
    int errno_;

    friend int hrelease_r(struct hsearch_data *,
                          GC_MANUAL manual = GC_KEY_BY_FREE);
    friend int hsearch2_r(ENTRY, ACTION2, ENTRY **, struct hsearch_data *,
                          GC_MANUAL manual = GC_NONE);

public:
    Hashtab(unsigned int size);
    ~Hashtab();

    bool insert(const char *key, const void *data);
    bool replace(const char *key, const void *data,
                 GC_MANUAL manual = GC_DATA_BY_FREE);
    ENTRY *find(const char *key);
    bool erase(const char *key, GC_MANUAL manual = GC_KEY_BY_FREE);
#if 0
    // TODO
    bool traverse(ht_walk_fn fn, int on_error);
#endif

    unsigned int getSize() const;
    unsigned int getFilled() const;
    bool getKeys(char *keys[], unsigned int num);
    const char *getEntryKey(const ENTRY *ep);
    void *getEntryData(const ENTRY *ep);

    int getErrno() const;
};


#endif

#include "SimpleHashtab.h"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>


// from glibc/misc
#define __set_errno(val) (errno = (val))


int hrelease_r(struct hsearch_data *htab, GC_MANUAL manual)
{
    unsigned int i;

    if (htab->filled == 0) return 1;

    for (i = 0; i < htab->size; i++) {
        switch (htab->table[i].used) {
        case (unsigned int)-1:
            htab->table[i].used = 0;
        case 0:
            break;
        default:
            htab->table[i].used = 0;
            if (manual & 0x01) free(htab->table[i].entry.key);
            else if (manual & 0x02) delete htab->table[i].entry.key;
            if (manual & 0x04) free(htab->table[i].entry.data);
            // FIXME:
            //else if (manual & 0x08) delete htab->table[i].entry.data;
            htab->table[i].entry.key = NULL;
            htab->table[i].entry.data = NULL;
            break;
        }
    }

    htab->filled = 0;

    return 1;
}


// modified from glibc-2.19 misc/hsearch_r.c
int hsearch2_r(ENTRY item, ACTION2 action, ENTRY **retval,
               struct hsearch_data *htab, GC_MANUAL manual)
{
  unsigned int hval;
  unsigned int count;
  unsigned int len = strlen (item.key);
  unsigned int idx;

  /* Compute an value for the given string. Perhaps use a better method. */
  hval = len;
  count = len;
  while (count-- > 0)
    {
      hval <<= 4;
      hval += item.key[count];
    }
  if (hval == 0)
    ++hval;

  /* First hash function: simply take the modul but prevent zero. */
  idx = hval % htab->size + 1;

  // n.b. open-address (re-hash) to deal with collision
  // used == (unsigned int)-1 means
  // this slot was ever released and could be reused now
  if (htab->table[idx].used)
    {
      /* Further action might be required according to the action value. */
      if (htab->table[idx].used == hval
          && htab->table[idx].entry.key != NULL
	  && strcmp (item.key, htab->table[idx].entry.key) == 0)
	{
        switch (action) {
        case REPLACE:
            if (manual & 0x04) free(htab->table[idx].entry.data);
            // FIXME:
            //else if (manual & 0x08) delete htab->table[idx].entry.data;
            htab->table[idx].entry.data = item.data;
            break;
        case RELEASE:
            if (manual & 0x01) free(htab->table[idx].entry.key);
            else if (manual & 0x02) delete htab->table[idx].entry.key;
            if (manual & 0x04) free(htab->table[idx].entry.data);
            // FIXME:
            //else if (manual & 0x08) delete htab->table[idx].entry.data;
            htab->table[idx].entry.key = NULL;
            htab->table[idx].used = (unsigned int)-1;
            --htab->filled;
            break;
        default:
            break;
        }

	  *retval = &htab->table[idx].entry;
	  return 1;
	} else if (htab->table[idx].used == (unsigned int)-1) {
        if (action == ENTER2) {
            htab->table[idx].used  = hval;
            htab->table[idx].entry = item;

            ++htab->filled;

            *retval = &htab->table[idx].entry;
            return 1;
        }
    }

      /* Second hash function, as suggested in [Knuth] */
      unsigned int hval2 = 1 + hval % (htab->size - 2);
      unsigned int first_idx = idx;

      do
	{
	  /* Because SIZE is prime this guarantees to step through all
             available indeces.  */
          if (idx <= hval2)
	    idx = htab->size + idx - hval2;
	  else
	    idx -= hval2;

	  /* If we visited all entries leave the loop unsuccessfully.  */
	  if (idx == first_idx)
	    break;

            /* If entry is found use it. */
          if (htab->table[idx].used == hval
              && htab->table[idx].entry.key != NULL
	      && strcmp (item.key, htab->table[idx].entry.key) == 0)
	    {
            switch (action) {
            case REPLACE:
                if (manual & 0x04) free(htab->table[idx].entry.data);
                // FIXME:
                //else if (manual & 0x08) delete htab->table[idx].entry.data;
	        htab->table[idx].entry.data = item.data;
                break;
            case RELEASE:
                if (manual & 0x01) free(htab->table[idx].entry.key);
                else if (manual & 0x02) delete htab->table[idx].entry.key;
                if (manual & 0x04) free(htab->table[idx].entry.data);
                // FIXME:
                //else if (manual & 0x08) delete htab->table[idx].entry.data;
                htab->table[idx].entry.key = NULL;
                htab->table[idx].used = 0;
                --htab->filled;
                break;
            default:
                break;
            }

	      *retval = &htab->table[idx].entry;
	      return 1;
	    } else if (htab->table[idx].used == (unsigned int)-1) {
            if (action == ENTER2) {
                htab->table[idx].used  = hval;
                htab->table[idx].entry = item;

                ++htab->filled;

                *retval = &htab->table[idx].entry;
                return 1;
            }
        }
	}
      while (htab->table[idx].used);
    }

  /* An empty bucket has been found. */
  if (action == ENTER2)
    {
      /* If table is full and another entry should be entered return
	 with error.  */
      if (htab->filled == htab->size)
	{
	  __set_errno (ENOMEM);
	  *retval = NULL;
	  return 0;
	}

      htab->table[idx].used  = hval;
      htab->table[idx].entry = item;

      ++htab->filled;

      *retval = &htab->table[idx].entry;
      return 1;
    }

  __set_errno (ESRCH);
  *retval = NULL;
  return 0;
}


Hashtab::Hashtab(unsigned int size): errno_(0)
{
    memset(&htab_, 0, sizeof(struct hsearch_data));

    if (hcreate_r(size, &htab_) == 0) {
        errno_ = errno;
        fprintf(stderr, "hcreate_r failed\n");
    }
}


Hashtab::~Hashtab()
{
    for (unsigned int i = 0; i < htab_.size; i++) {
        if (htab_.table[i].used == 0
            || htab_.table[i].used == (unsigned int)-1)
        {
            continue;
        }

        free(htab_.table[i].entry.key);
        // XXX: what about entry.data?
    }

    hdestroy_r(&htab_);
}


// XXX:
// life span of \param key
// memory ops of \param key and \param data
bool Hashtab::insert(const char *key, const void *data)
{
    if (key == NULL || data == NULL) {
        errno_ = HT_ERR_BAD_ARG;
        return false;
    }

    ENTRY e, *ep;
    size_t len = strlen(key);

    if ((e.key = (char *)malloc(len + 1)) == NULL) {
        errno_ = ENOMEM;
        fprintf(stderr, "malloc failed for ENTRY.key\n");
        return false;
    }

    memcpy(e.key, key, len);
    e.key[len] = '\0';

    e.data = const_cast<void *>(data);

    if (hsearch2_r(e, ENTER2, &ep, &htab_, GC_NONE) == 0) {
        errno_ = errno;
        free(e.key);
        fprintf(stderr, "hsearch_r ENTER failed\n");
        return false;
    }

    return true;
}


bool Hashtab::replace(const char *key, const void *data, GC_MANUAL manual)
{
    if (key == NULL || data == NULL) {
        errno_ = HT_ERR_BAD_ARG;
        return false;
    }

    ENTRY e, *ep;

    e.key = const_cast<char *>(key);
    e.data = const_cast<void *>(data);

    if (hsearch2_r(e, REPLACE, &ep, &htab_, manual) == 0) {
        errno_ = errno;
        fprintf(stderr, "hsearch2_r REPLACE failed\n");
        return false;
    }

    return true;
}


ENTRY *Hashtab::find(const char *key)
{
    if (key == NULL) {
        errno_ = HT_ERR_BAD_ARG;
        return NULL;
    }

    ENTRY e, *ep = NULL;

    e.key = const_cast<char *>(key);

    if (hsearch2_r(e, FIND2, &ep, &htab_, GC_NONE) == 0) {
        errno_ = errno;
        fprintf(stderr, "hsearch_r FIND failed\n");
        return NULL;
    }

    return ep;
}


bool Hashtab::erase(const char *key, GC_MANUAL manual)
{
    if (key == NULL) {
        errno_ = HT_ERR_BAD_ARG;
        return false;
    }

    ENTRY e, *ep;

    e.key = const_cast<char *>(key);

    if (hsearch2_r(e, RELEASE, &ep, &htab_, manual) == 0) {
        errno_ = errno;
        fprintf(stderr, "hsearch2_r RELEASE failed\n");
        return false;
    }

    return true;
}


unsigned int Hashtab::getSize() const
{
    return htab_.size;
}


unsigned int Hashtab::getFilled() const
{
    return htab_.filled;
}


int Hashtab::getErrno() const
{
    return errno_;
}


bool Hashtab::getKeys(char *keys[], unsigned int num)
{
    if (num < getFilled()) {
        errno_ = HT_ERR_SHORT_SPACE;
        return false;
    }

    unsigned n = 0;

    for (unsigned int i = 0, j = getSize(); i < j; i++) {
        if (htab_.table[i].used == 0
            || htab_.table[i].used == (unsigned int)-1)
        {
            continue;
        }

        keys[n++] = htab_.table[i].entry.key;
    }

    while (n < num) keys[n++] = NULL;

    return true;
}


const char *Hashtab::getEntryKey(const ENTRY *ep)
{
    if (ep == NULL || ep->key == NULL || ep->key[0] == '\0') {
        errno_ = HT_ERR_BAD_ARG;
        return NULL;
    }

    return const_cast<const char *>(ep->key);
}


void *Hashtab::getEntryData(const ENTRY *ep)
{
    if (ep == NULL) {
        errno_ = HT_ERR_BAD_ARG;
        return NULL;
    }

    return ep->data;
}


#if 0
// TODO
bool Hashtab::traverse(ht_walk_fn fn, int on_error)
{
    if (fn == NULL) {
        errno_ = HT_ERR_BAD_ARG;
        return false;
    }

    for (unsigned int i = 0; i < htab_.size; i++) {
        if (htab_.table[i].used == 0
            || htab_.table[i].used == (unsigned int)-1)
        {
            continue;
        }

        if ((*fn)(htab_.table[i].entry.key, htab_.table[i].entry.data) < 0) {
            switch (on_error) {
            case 1:
                return false;
            default:
                break;
            }
        }
    }

    return true;
}
#endif




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值