用JNI封装使用了STL容器(map和vector)的C++代码,触发了glibc 2.12版本之前版本中存在的一个bug。故寻找STL map的简单替代方案。基于GNU hsearch(ref. man 3 hsearch),封装了一个简单的hash table class。
关于glibc的那个bug,可参见:
https://bugzilla.redhat.com/show_bug.cgi?id=752122
https://bugzilla.redhat.com/show_bug.cgi?id=751331
https://lists.debian.org/debian-glibc/2011/01/msg00074.html
http://mail.openjdk.java.net/pipermail/hotspot-compiler-dev/2013-March/010002.html
关于一些现成的hash table的简单对比,看参见:
http://preshing.com/20110603/hash-table-performance-tests/
简单示例(g++ -g -W -Wall simple_ht_example.cpp SimpleHashtab.o):
#include "SimpleHashtab.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
Hashtab ht(16);
printf("size[%u] filled[%u]\n", ht.getSize(), ht.getFilled());
ht.insert("rtest", "111.111.111.111:9092,112.112.112.112:9092");
ht.insert("test", "222.222.222.222:9092,221.221.221.221:9092");
printf("size[%u] filled[%u]\n", ht.getSize(), ht.getFilled());
ENTRY *ep = ht.find("rtest");
printf("key[%s] data[%s]\n", ht.getEntryKey(ep), (char *)ht.getEntryData(ep));
ht.replace("rtest", "999.999.999.999:9092,998.998.998.998:9092", GC_NONE);
printf("size[%u] filled[%u]\n", ht.getSize(), ht.getFilled());
ep = ht.find("rtest");
printf("key[%s] data[%s]\n", ht.getEntryKey(ep), (char *)ht.getEntryData(ep));
//ht.erase("test", GC_NONE);
//printf("size[%u] filled[%u]\n", ht.getSize(), ht.getFilled());
char **keys = (char **)malloc((ht.getFilled() + 4) * sizeof(char *));
ht.getKeys(keys, ht.getFilled() + 4);
for (unsigned int i = 0; i < ht.getFilled() + 4; i++) {
printf("[%s]\n", keys[i] ? keys[i] : "nil");
}
free(keys);
return 0;
}
封装代码如下(g++ -g -W -Wall -c SimpleHashtab.c):
#ifndef _SIMPLEHASHTAB_H
#define _SIMPLEHASHTAB_H
#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
#endif
#include <search.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
FIND2,
ENTER2,
REPLACE,
RELEASE
} ACTION2;
// from glibc/misc
typedef struct _ENTRY
{
unsigned int used;
ENTRY entry;
}
_ENTRY;
typedef enum {
GC_NONE = 0,
GC_KEY_BY_FREE = 0x01,
GC_KEY_BY_DELETE = 0x02,
GC_DATA_BY_FREE = 0x04,
GC_DATA_BY_DELETE = 0x08 // nonsenses
} GC_MANUAL ;
enum {
HT_ERR_BAD_ARG = -100,
HT_ERR_SHORT_SPACE = -101
};
#ifdef __cplusplus
}
#endif
#if 0
// TODO
typedef int (*ht_walk_fn)(const char *key, void *data, void *uarg);
#endif
class Hashtab {
private:
struct hsearch_data htab_;
int errno_;
friend int hrelease_r(struct hsearch_data *,
GC_MANUAL manual = GC_KEY_BY_FREE);
friend int hsearch2_r(ENTRY, ACTION2, ENTRY **, struct hsearch_data *,
GC_MANUAL manual = GC_NONE);
public:
Hashtab(unsigned int size);
~Hashtab();
bool insert(const char *key, const void *data);
bool replace(const char *key, const void *data,
GC_MANUAL manual = GC_DATA_BY_FREE);
ENTRY *find(const char *key);
bool erase(const char *key, GC_MANUAL manual = GC_KEY_BY_FREE);
#if 0
// TODO
bool traverse(ht_walk_fn fn, int on_error);
#endif
unsigned int getSize() const;
unsigned int getFilled() const;
bool getKeys(char *keys[], unsigned int num);
const char *getEntryKey(const ENTRY *ep);
void *getEntryData(const ENTRY *ep);
int getErrno() const;
};
#endif
#include "SimpleHashtab.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
// from glibc/misc
#define __set_errno(val) (errno = (val))
int hrelease_r(struct hsearch_data *htab, GC_MANUAL manual)
{
unsigned int i;
if (htab->filled == 0) return 1;
for (i = 0; i < htab->size; i++) {
switch (htab->table[i].used) {
case (unsigned int)-1:
htab->table[i].used = 0;
case 0:
break;
default:
htab->table[i].used = 0;
if (manual & 0x01) free(htab->table[i].entry.key);
else if (manual & 0x02) delete htab->table[i].entry.key;
if (manual & 0x04) free(htab->table[i].entry.data);
// FIXME:
//else if (manual & 0x08) delete htab->table[i].entry.data;
htab->table[i].entry.key = NULL;
htab->table[i].entry.data = NULL;
break;
}
}
htab->filled = 0;
return 1;
}
// modified from glibc-2.19 misc/hsearch_r.c
int hsearch2_r(ENTRY item, ACTION2 action, ENTRY **retval,
struct hsearch_data *htab, GC_MANUAL manual)
{
unsigned int hval;
unsigned int count;
unsigned int len = strlen (item.key);
unsigned int idx;
/* Compute an value for the given string. Perhaps use a better method. */
hval = len;
count = len;
while (count-- > 0)
{
hval <<= 4;
hval += item.key[count];
}
if (hval == 0)
++hval;
/* First hash function: simply take the modul but prevent zero. */
idx = hval % htab->size + 1;
// n.b. open-address (re-hash) to deal with collision
// used == (unsigned int)-1 means
// this slot was ever released and could be reused now
if (htab->table[idx].used)
{
/* Further action might be required according to the action value. */
if (htab->table[idx].used == hval
&& htab->table[idx].entry.key != NULL
&& strcmp (item.key, htab->table[idx].entry.key) == 0)
{
switch (action) {
case REPLACE:
if (manual & 0x04) free(htab->table[idx].entry.data);
// FIXME:
//else if (manual & 0x08) delete htab->table[idx].entry.data;
htab->table[idx].entry.data = item.data;
break;
case RELEASE:
if (manual & 0x01) free(htab->table[idx].entry.key);
else if (manual & 0x02) delete htab->table[idx].entry.key;
if (manual & 0x04) free(htab->table[idx].entry.data);
// FIXME:
//else if (manual & 0x08) delete htab->table[idx].entry.data;
htab->table[idx].entry.key = NULL;
htab->table[idx].used = (unsigned int)-1;
--htab->filled;
break;
default:
break;
}
*retval = &htab->table[idx].entry;
return 1;
} else if (htab->table[idx].used == (unsigned int)-1) {
if (action == ENTER2) {
htab->table[idx].used = hval;
htab->table[idx].entry = item;
++htab->filled;
*retval = &htab->table[idx].entry;
return 1;
}
}
/* Second hash function, as suggested in [Knuth] */
unsigned int hval2 = 1 + hval % (htab->size - 2);
unsigned int first_idx = idx;
do
{
/* Because SIZE is prime this guarantees to step through all
available indeces. */
if (idx <= hval2)
idx = htab->size + idx - hval2;
else
idx -= hval2;
/* If we visited all entries leave the loop unsuccessfully. */
if (idx == first_idx)
break;
/* If entry is found use it. */
if (htab->table[idx].used == hval
&& htab->table[idx].entry.key != NULL
&& strcmp (item.key, htab->table[idx].entry.key) == 0)
{
switch (action) {
case REPLACE:
if (manual & 0x04) free(htab->table[idx].entry.data);
// FIXME:
//else if (manual & 0x08) delete htab->table[idx].entry.data;
htab->table[idx].entry.data = item.data;
break;
case RELEASE:
if (manual & 0x01) free(htab->table[idx].entry.key);
else if (manual & 0x02) delete htab->table[idx].entry.key;
if (manual & 0x04) free(htab->table[idx].entry.data);
// FIXME:
//else if (manual & 0x08) delete htab->table[idx].entry.data;
htab->table[idx].entry.key = NULL;
htab->table[idx].used = 0;
--htab->filled;
break;
default:
break;
}
*retval = &htab->table[idx].entry;
return 1;
} else if (htab->table[idx].used == (unsigned int)-1) {
if (action == ENTER2) {
htab->table[idx].used = hval;
htab->table[idx].entry = item;
++htab->filled;
*retval = &htab->table[idx].entry;
return 1;
}
}
}
while (htab->table[idx].used);
}
/* An empty bucket has been found. */
if (action == ENTER2)
{
/* If table is full and another entry should be entered return
with error. */
if (htab->filled == htab->size)
{
__set_errno (ENOMEM);
*retval = NULL;
return 0;
}
htab->table[idx].used = hval;
htab->table[idx].entry = item;
++htab->filled;
*retval = &htab->table[idx].entry;
return 1;
}
__set_errno (ESRCH);
*retval = NULL;
return 0;
}
Hashtab::Hashtab(unsigned int size): errno_(0)
{
memset(&htab_, 0, sizeof(struct hsearch_data));
if (hcreate_r(size, &htab_) == 0) {
errno_ = errno;
fprintf(stderr, "hcreate_r failed\n");
}
}
Hashtab::~Hashtab()
{
for (unsigned int i = 0; i < htab_.size; i++) {
if (htab_.table[i].used == 0
|| htab_.table[i].used == (unsigned int)-1)
{
continue;
}
free(htab_.table[i].entry.key);
// XXX: what about entry.data?
}
hdestroy_r(&htab_);
}
// XXX:
// life span of \param key
// memory ops of \param key and \param data
bool Hashtab::insert(const char *key, const void *data)
{
if (key == NULL || data == NULL) {
errno_ = HT_ERR_BAD_ARG;
return false;
}
ENTRY e, *ep;
size_t len = strlen(key);
if ((e.key = (char *)malloc(len + 1)) == NULL) {
errno_ = ENOMEM;
fprintf(stderr, "malloc failed for ENTRY.key\n");
return false;
}
memcpy(e.key, key, len);
e.key[len] = '\0';
e.data = const_cast<void *>(data);
if (hsearch2_r(e, ENTER2, &ep, &htab_, GC_NONE) == 0) {
errno_ = errno;
free(e.key);
fprintf(stderr, "hsearch_r ENTER failed\n");
return false;
}
return true;
}
bool Hashtab::replace(const char *key, const void *data, GC_MANUAL manual)
{
if (key == NULL || data == NULL) {
errno_ = HT_ERR_BAD_ARG;
return false;
}
ENTRY e, *ep;
e.key = const_cast<char *>(key);
e.data = const_cast<void *>(data);
if (hsearch2_r(e, REPLACE, &ep, &htab_, manual) == 0) {
errno_ = errno;
fprintf(stderr, "hsearch2_r REPLACE failed\n");
return false;
}
return true;
}
ENTRY *Hashtab::find(const char *key)
{
if (key == NULL) {
errno_ = HT_ERR_BAD_ARG;
return NULL;
}
ENTRY e, *ep = NULL;
e.key = const_cast<char *>(key);
if (hsearch2_r(e, FIND2, &ep, &htab_, GC_NONE) == 0) {
errno_ = errno;
fprintf(stderr, "hsearch_r FIND failed\n");
return NULL;
}
return ep;
}
bool Hashtab::erase(const char *key, GC_MANUAL manual)
{
if (key == NULL) {
errno_ = HT_ERR_BAD_ARG;
return false;
}
ENTRY e, *ep;
e.key = const_cast<char *>(key);
if (hsearch2_r(e, RELEASE, &ep, &htab_, manual) == 0) {
errno_ = errno;
fprintf(stderr, "hsearch2_r RELEASE failed\n");
return false;
}
return true;
}
unsigned int Hashtab::getSize() const
{
return htab_.size;
}
unsigned int Hashtab::getFilled() const
{
return htab_.filled;
}
int Hashtab::getErrno() const
{
return errno_;
}
bool Hashtab::getKeys(char *keys[], unsigned int num)
{
if (num < getFilled()) {
errno_ = HT_ERR_SHORT_SPACE;
return false;
}
unsigned n = 0;
for (unsigned int i = 0, j = getSize(); i < j; i++) {
if (htab_.table[i].used == 0
|| htab_.table[i].used == (unsigned int)-1)
{
continue;
}
keys[n++] = htab_.table[i].entry.key;
}
while (n < num) keys[n++] = NULL;
return true;
}
const char *Hashtab::getEntryKey(const ENTRY *ep)
{
if (ep == NULL || ep->key == NULL || ep->key[0] == '\0') {
errno_ = HT_ERR_BAD_ARG;
return NULL;
}
return const_cast<const char *>(ep->key);
}
void *Hashtab::getEntryData(const ENTRY *ep)
{
if (ep == NULL) {
errno_ = HT_ERR_BAD_ARG;
return NULL;
}
return ep->data;
}
#if 0
// TODO
bool Hashtab::traverse(ht_walk_fn fn, int on_error)
{
if (fn == NULL) {
errno_ = HT_ERR_BAD_ARG;
return false;
}
for (unsigned int i = 0; i < htab_.size; i++) {
if (htab_.table[i].used == 0
|| htab_.table[i].used == (unsigned int)-1)
{
continue;
}
if ((*fn)(htab_.table[i].entry.key, htab_.table[i].entry.data) < 0) {
switch (on_error) {
case 1:
return false;
default:
break;
}
}
}
return true;
}
#endif