传统的,节点如果是ASCII字符,8位的,那么必须256字节数组,最差情况,空间浪费接近99.6%,所以网上很多把节点合并的压缩算法,我总觉得太繁琐,避而远之的,之前也想过比如稀疏数组,但时间复杂度就不是O(1)了。
刚才突发奇想,一个8位的ASCII我可以拆成2个4位的,那么不就是2个长度16字节的数组?再进一步,我可以拆成8个二进制位啊,每位只有0和1,对应2字节数组,不就解决了吗?太完美了。
不是B树,两码事,我愿称之为两个字符的字典树。
经过测试,结果有点惨不忍睹,这是和哈希表的时间对比:
trie: 0.515625
hash: 0.015625
分析应该是内存分配释放动作实在太多,也别管时间复杂度O几了,架不住基数多啊,而我的哈希表是优化过内存分配的,下面是测试代码:
#include <var.h>
#include <locale.h>
#include <windows.h>
struct trienode {
struct trienode *children[2];
struct var *value;
} root = {{NULL, NULL}, NULL};
static void _dump(int level, struct trienode *node) {
for (int i = 0; i <= 1; i++) {
struct trienode *child = (node->children)[i];
if (child) {
printf("%*s", level, "");
printf("%d\n", i);
_dump(level + 1, child);
}
}
if (node->value) {
printf("%*s", level, "");
vdump("", node->value, "\n");
}
}
void triedump() {
_dump(0, &root);
}
static void _clear(int level, struct trienode *node) {
for (int i = 0; i <= 1; i++) {
struct trienode **pchild = &((node->children)[i]);
if (*pchild) {
_clear(level + 1, *pchild);
free_s((void **)pchild);
}
}
node->value = NULL;
}
void trieclear() {
_clear(0, &root);
}
void trieput_s(const char *key, size_t klen, struct var *value) {
struct trienode *node = &root;
for (size_t i = 0; i < klen; i++) {
char ch = key[i];
for (size_t j = 0; j < (sizeof ch) * 8; j++) {
int bit = ch & 1;
struct trienode **pchild = &((node->children)[bit]);
if (*pchild == NULL) {
alloc_s((void **)pchild, 0, 1, sizeof **pchild);
}
node = *pchild;
ch >>= 1;
}
}
node->value = value;
}
void trieput(const char *key, struct var *value) {
trieput_s(key, strlen(key), value);
}
struct kv {
const char *key;
struct var *value;
};
#define ft2ns(ft) (LONGLONG)(ft).dwLowDateTime + ((LONGLONG)((ft).dwHighDateTime) << 32LL)
double process_time() {
FILETIME ct, et, kt, ut;
if (!GetProcessTimes(GetCurrentProcess(), &ct, &et, &kt, &ut)) {
return 0;
}
return (ft2ns(kt) + ft2ns(ut)) / 10000000.0;
}
int main() {
setlocale(LC_ALL, ".UTF-8");
zdeclare(a);
bdeclare(b, false);
bdeclare(c, true);
ndeclare(d, 3.14);
sdeclare(e, "hello");
sdeclare(f, "world");
struct kv samples[] = {
{"kjahdkjasdh", a},
{"jxcvhjkhkjd", b},
{"owuesriouwo", c},
{"cvoibuoicvb", d},
{"ryurtioiutr", e},
{"dhfyuwerjds", f},
};
int repeat = 10000;
{
double start = process_time();
for (int i = 0; i < repeat; i++) {
for (int j = 0; j < countof(samples); j++) {
trieput(samples[j].key, samples[j].value);
}
trieclear();
}
double end = process_time();
printf("trie: %lf\n", end - start);
}
{
odeclare(obj);
double start = process_time();
for (int i = 0; i < repeat; i++) {
for (int j = 0; j < countof(samples); j++) {
oput(obj, samples[j].key, samples[j].value);
}
oclear(obj);
}
double end = process_time();
printf("hash: %lf\n", end - start);
}
return 0;
}