redis源码阅读-数据结构篇-hyperloglog

5. HyperLogLog 实现 hyperloglog.c

数据结构定义
  • hllhdr

    struct hllhdr {
        char magic[4];      /* "HYLL" */
        uint8_t encoding;   /* HLL_DENSE or HLL_SPARSE. */
        uint8_t notused[3]; /* Reserved for future use, must be zero. */
        uint8_t card[8];    /* Cached cardinality, little endian. */
        uint8_t registers[]; /* Data bytes. */
    };
    
  • 一些宏定义

    /* The cached cardinality MSB is used to signal validity of the cached value. */
    #define HLL_INVALIDATE_CACHE(hdr) (hdr)->card[0] |= (1<<7)
    #define HLL_VALID_CACHE(hdr) (((hdr)->card[0] & (1<<7)) == 0)
    
    #define HLL_P 14 /* The greater is P, the smaller the error. */
    #define HLL_REGISTERS (1<<HLL_P) /* With P=14, 16384 registers. */
    #define HLL_P_MASK (HLL_REGISTERS-1) /* Mask to index register. */
    #define HLL_BITS 6 /* Enough to count up to 63 leading zeroes. */
    #define HLL_REGISTER_MAX ((1<<HLL_BITS)-1)
    #define HLL_HDR_SIZE sizeof(struct hllhdr)
    #define HLL_DENSE_SIZE (HLL_HDR_SIZE+((HLL_REGISTERS*HLL_BITS+7)/8))
    #define HLL_DENSE 0 /* Dense encoding. */
    #define HLL_SPARSE 1 /* Sparse encoding. */
    #define HLL_RAW 255 /* Only used internally, never exposed. */
    #define HLL_MAX_ENCODING 1
    
    static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected\r\n";
    ...
    
Helper函数(可跳过,需要时阅读)
  • 哈希函数 O(1)

    uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
        const uint64_t m = 0xc6a4a7935bd1e995;
        const int r = 47;
        uint64_t h = seed ^ (len * m);
        const uint8_t *data = (const uint8_t *)key;
        const uint8_t *end = data + (len-(len&7));
    
        while(data != end) {
            uint64_t k;
    #if (BYTE_ORDER == LITTLE_ENDIAN)
            k = *((uint64_t*)data);
    #else
            k = (uint64_t) data[0];
            k |= (uint64_t) data[1] << 8;
            k |= (uint64_t) data[2] << 16;
            k |= (uint64_t) data[3] << 24;
            k |= (uint64_t) data[4] << 32;
            k |= (uint64_t) data[5] << 40;
            k |= (uint64_t) data[6] << 48;
            k |= (uint64_t) data[7] << 56;
    #endif
            k *= m;
            k ^= k >> r;
            k *= m;
            h ^= k;
            h *= m;
            data += 8;
        }
        switch(len & 7) {
        case 7: h ^= (uint64_t)data[6] << 48;
        case 6: h ^= (uint64_t)data[5] << 40;
        case 5: h ^= (uint64_t)data[4] << 32;
        case 4: h ^= (uint64_t)data[3] << 24;
        case 3: h ^= (uint64_t)data[2] << 16;
        case 2: h ^= (uint64_t)data[1] << 8;
        case 1: h ^= (uint64_t)data[0];
                h *= m;
        };
        h ^= h >> r;
        h *= m;
        h ^= h >> r;
        return h;
    }
    
元素哈希处理 O(1)
// 返回000...1的长度
// regp 保存哈希桶index
int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
    uint64_t hash, bit, index;
    int count;
	// 获取哈希值
    hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
    index = hash & HLL_P_MASK; 	// (1 << 14)个桶的index
    hash |= ((uint64_t)1<<63);	// 确保至少一个1
    bit = HLL_REGISTERS; 		// 第15位开始算起
    count = 1; 					// 第一个出现1的rank
    while((hash & bit) == 0) {	// 直到找到1
        count++;
        bit <<= 1;
    }
    *regp = (int) index;		// (1 << 14)个桶的index
    return count;				// 返回000...1的长度
}
添加基数 O(1)
int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
    uint8_t oldcount, count;
    long index;
    // 元素哈希处理
    count = hllPatLen(ele,elesize,&index);
    // 000...1的长度更长则添加
    HLL_DENSE_GET_REGISTER(oldcount,registers,index);
    if (count > oldcount) {
        HLL_DENSE_SET_REGISTER(registers,index,count);
        return 1;
    } else {
        return 0;
    }
}

关于HyperLogLog可以详见本人之前的博客解析

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值