HyperLogLog全部源码解析

简介

英文注释就不贴了太长了,感兴趣的可以去github自己去搜索。
https://github.com/redis/redis/blob/unstable/src/hyperloglog.c
首先你要对hyperloglog的应用场景有一定的了解,建议可以去买一本《redis深度历险》看看。再推荐看一看这篇知乎的文章,主要是理论为主。然后再来看源码解析。
https://www.zhihu.com/question/53416615/answer/718740659
直接上代码!!

源码篇

基础篇

稀疏
常量
# include "server.h"
# include <stdint.h>
# include <math.h>
struct hllhdr
{
    char magic[4];     //魔术字符串,标识是hyperloglog
    uint8_t encoding;  //当前存储方式是稀疏还是密集
    uint8_t notused[3]; //没有被用到的,如果是1就系统内部报错了
    uint8_t card[8];  //缓存
    uint8_t registers[]; //具体存储的东西
};
#define HLL_INVALIDATE_CACHE(hdr) (hdr)->card[7] |= (1<<7) //将缓存置为失效状态 置为1
#define HLL_VALID_CACHE(hdr) (((hdr)->card[7] & (1<<7)) == 0)  //如果是0 就直接读缓存

#define HLL_P 14 //桶大小默认长度是14,是可以修改的
#define HLL_Q (64-HLL_P) //桶里面的最大值。也就是第一次出现1的位是多少。最大是50。
#define HLL_REGISTERS (1<<HLL_P)//2^14,就是说有多少个桶,
#define HLL_P_MASK (HLL_REGISTERS-1) //索引
#define HLL_BITS 6 //桶里面数值存储的长度,因为之前最大是50,所以6位就够了,6位最大可以存64。
#define HLL_REGISTER_MAX ((1<<HLL_BITS)-1) //最大63 实际上 默认是50,因为默认的桶的hash计算取14位。
#define HLL_HDR_SIZE sizeof(struct hllhdr) //结构体大小
#define HLL_DENSE_SIZE (HLL_HDR_SIZE+((HLL_REGISTERS*HLL_BITS+7)/8)) //一个hyperloglog的大小
#define HLL_DENSE 0 // 密集存储
#define HLL_SPARSE 1 // 稀疏存储
#define HLL_RAW 255 //注释上说仅在内部使用,我也不清楚干嘛的,有兴趣的可以去看看源码。
#define HLL_MAX_ENCODING 1
static char* invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected\r\n"; //报错信息

下面代码是找出对应桶的位置。首先这里说一下hyperloglog为了节省内存,一个桶只用了6位存储了。因为redis再内存中,内存的空间有限,用6位存储节省了25%的空间。

密集找桶的位置
/**
 * +--------+--------+--------+------//
 * |11000000|22221111|33333322|55444444
 * +--------+--------+--------+------//
 * 这就是多个桶在registers[]存储实际情况,000000为一个桶
 * 111111为一个桶,以此类推。。
 **/
#define HLL_DENSE_GET_REGISTER(target,p,regnum) do { 
uint8_t* _p = (uint8_t*)p; 
    unsigned long _byte = regnum * HLL_BITS / 8;// 6*regnum / 8,这个值是算出最后一位在register[]数组的下标
    unsigned long _fb = regnum * HLL_BITS & 7; // 6*regnum%8,这个地方就是计算偏移位
    unsigned long _fb8 = 8 - _fb; //需要移动的位
    unsigned long b0 = _p[_byte]; //最后出现所在的位置
    unsigned long b1 = _p[_byte + 1]; //第一个出现所在的位置
    target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; //target就是当前桶的值。
} while (0)
密集存储赛值。
/**
 * +--------+--------+--------+------//
 * |11000000|22221111|33333322|55444444
 * +--------+--------+--------+------//
 * 最后四段代码就在这里写中文注释了。
 * 当 _fb 小于等于2时,说明一个桶就在该字节内,只需要进行倒转就能得到桶的值。这种情况下最后两段代码是没有修改_p[_byte + 1]的值的。
 * 如果 _fb 大于 2 ,则说明一个桶分布在两个字节内,此时需要将两个字节的内容都进行倒置,然后再进行拼接得到桶的值,如下图所示
 * 11000000 22221111,你得到111111就都倒置。建议自己画图去分析最后四段代码。
*/
#define HLL_DENSE_SET_REGISTER(p,regnum,val) do { 
    uint8_t* _p = (uint8_t*)p; 
    unsigned long _byte = regnum * HLL_BITS / 8; 
    unsigned long _fb = regnum * HLL_BITS & 7; 
    unsigned long _fb8 = 8 - _fb; \
    unsigned long _v = val; 
    /*
    * +--------+--------+--------+------//
    * |11000000|22221111|33333322|55444444
    * +--------+--------+--------+------//
    * 分情况讨论,这个时候先不要看上面的值(上面的0、1、2、3...),因为redis底层是bit数组。
    * 情况一:当要替换000000的时候。11我们是不能动的,这个时候先和000000与一下,把他清0,在和val去或运算
    * _p[_byte] &= ~(HLL_REGISTER_MAX << _fb) fb是0,他与了一个非63,也就是与了个1100000;
    * _p[_byte] |= _v << _fb; 这行代码就是把value去或运算,fb是0
    * _p[_byte + 1] &= ~(HLL_REGISTER_MAX >> _fb8); 
    *_p[_byte + 1] |= _v >> _fb8; 这两行代码没干什么事,第一行与了111111111,值没变,后面一个或了000000000;
    * 情况二: 当要替换333333的值的时候,同理我们要把333333置为0,然后再去和value去或运算一下
    *_p[_byte] &= ~(HLL_REGISTER_MAX << _fb); fb是2,与了一个11111100的非,也就是与了一个000000011,将其置为0
    *_p[_byte] |= _v << _fb; 再把value的值或一下
    *_p[_byte + 1] &= ~(HLL_REGISTER_MAX >> _fb8); 与了一个0000000的非也就是与了一个11111111,什么也没变
    *_p[_byte + 1] |= _v >> _fb8; 或了一下00000000什么也没变。
    * 情况三: 当要替换222222的时候,这个时候我们要把先置为的 高位清0,后置位的地位清0。
    *_p[_byte] &= ~(HLL_REGISTER_MAX << _fb); 22223333,的2222清0。
    *_p[_byte] |= _v << _fb; 将高位去或运算
    *_p[_byte + 1] &= ~(HLL_REGISTER_MAX >> _fb8); 33333322低位清0
    *_p[_byte + 1] |= _v >> _fb8; 或运算
    */
    _p[_byte] &= ~(HLL_REGISTER_MAX << _fb); 
    _p[_byte] |= _v << _fb; 
    _p[_byte + 1] &= ~(HLL_REGISTER_MAX >> _fb8); 
    _p[_byte + 1] |= _v >> _fb8; 
} while (0)
稀疏存储方式

ZERO : 一字节,表示连续多少个桶计数为0,前两位为标志00,后6位表示有多少个桶,最大为64。
XZERO : 两个字节,表示连续多少个桶计数为0,前两位为标志01,后14位表示有多少个桶,最大为16384。
VAL : 一字节,表示连续多少个桶的计数为多少,前一位为标志1,四位表示连桶内计数,所以最大表示桶的计数为32。后两位表示连续多少个桶。
懒得码字了,知乎复制了一段稀疏存储方式,这个文章写的也很不错。
在这里插入图片描述

#define HLL_SPARSE_XZERO_BIT 0x40 // 稀疏X0 
#define HLL_SPARSE_VAL_BIT 0x80 // 稀疏val 
#define HLL_SPARSE_IS_ZERO(p) (((*(p)) & 0xc0) == 0) //判断是否是稀疏0
#define HLL_SPARSE_IS_XZERO(p) (((*(p)) & 0xc0) == HLL_SPARSE_XZERO_BIT) //判断
#define HLL_SPARSE_IS_VAL(p) ((*(p)) & HLL_SPARSE_VAL_BIT) //判断
#define HLL_SPARSE_ZERO_LEN(p) (((*(p)) & 0x3f)+1)//求长度
#define HLL_SPARSE_XZERO_LEN(p) (((((*(p)) & 0x3f) << 8) | (*((p)+1)))+1)//求长度
#define HLL_SPARSE_VAL_VALUE(p) ((((*(p)) >> 2) & 0x1f)+1) //求值
#define HLL_SPARSE_VAL_LEN(p) (((*(p)) & 0x3)+1)//长度
#define HLL_SPARSE_VAL_MAX_VALUE 32
#define HLL_SPARSE_VAL_MAX_LEN 4
#define HLL_SPARSE_ZERO_MAX_LEN 64
#define HLL_SPARSE_XZERO_MAX_LEN 16384
//这段代码就是稀疏存储VAL设置的方法
#define HLL_SPARSE_VAL_SET(p,val,len) do { 
    *(p) = (((val) - 1) << 2 | ((len) - 1)) | HLL_SPARSE_VAL_BIT; 
} while (0)

//稀疏0设置长度
#define HLL_SPARSE_ZERO_SET(p,len) do { \
    *(p) = (len) - 1; \
} while (0)
//稀疏X0设置长度
#define HLL_SPARSE_XZERO_SET(p,len) do { \
    int _l = (len) - 1; \
    *(p) = (_l >> 8) | HLL_SPARSE_XZERO_BIT; \
    *((p) + 1) = (_l & 0xff); \
} while (0)
#define HLL_ALPHA_INF 0.721347520444481703680 //这个常量我也不知道干嘛的,可能和计数有关,计数的代码我就没仔细阅读了。知乎上也有很详细的解释。

逻辑处理

Hash篇

这里主要是计算hash值。返回64位的。

   uint64_t MurmurHash64A(const void* key, int len, unsigned int seed) {
    const uint64_t m = 0xc6a4a7935bd1e995;
    const int r = 47;
    uint64_t h = seed ^ (len * m);
    const uint8_t* data = (const uint8_t*)key;
    const uint8_t* end = data + (len - (len & 7));

    while (data != end)
    {
        uint64_t k;

#if (BYTE_ORDER == LITTLE_ENDIAN)
# ifdef USE_ALIGNED_ACCESS
        memcpy(&k, data, sizeof(uint64_t));
#else
        k = *((uint64_t*)data);
#endif
#else
        k = (uint64_t)data[0];
        k |= (uint64_t)data[1] << 8;
        k |= (uint64_t)data[2] << 16;
        k |= (uint64_t)data[3] << 24;
        k |= (uint64_t)data[4] << 32;
        k |= (uint64_t)data[5] << 40;
        k |= (uint64_t)data[6] << 48;
        k |= (uint64_t)data[7] << 56;
#endif

        k *= m;
        k ^= k >> r;
        k *= m;
        h ^= k;
        h *= m;
        data += 8;
    }

    switch (len & 7)
    {
        case 7: h ^= (uint64_t)data[6] << 48; /* fall-thru */
        case 6: h ^= (uint64_t)data[5] << 40; /* fall-thru */
        case 5: h ^= (uint64_t)data[4] << 32; /* fall-thru */
        case 4: h ^= (uint64_t)data[3] << 24; /* fall-thru */
        case 3: h ^= (uint64_t)data[2] << 16; /* fall-thru */
        case 2: h ^= (uint64_t)data[1] << 8; /* fall-thru */
        case 1:
            h ^= (uint64_t)data[0];
            h *= m; /* fall-thru */
    };

    h ^= h >> r;
    h *= m;
    h ^= h >> r;
    return h;
}
计算第一次出现1的位置

默认是50,所以也就是50位从后算起第一次出现1的位置。
这段代码比较简单,就不做过多说明,大概思想就是从1到10到100一直去与,不等于1就break;

int hllPatLen(unsigned char* ele, size_t elesize, long* regp)
{
    uint64_t hash, bit, index;
    int count;
    hash = MurmurHash64A(ele, elesize, 0xadc83b19ULL);
    /*
    *hash去和2^14-1去与,其实就是hash的后14位去与14个1
    *算出index就是他桶的索引
    */
    index = hash & HLL_P_MASK;
    /*
    *移除后14位
    */
    hash >>= HLL_P;
    /*
    * 注释写确定他的值小于2^50,
    * 至于为什么这么做我不是特别清楚,
    * 可能c里面的移位操作有泄露啥的。(待考证)
    */ 
    hash |= ((uint64_t)1 << HLL_Q); 
    bit = 1;
    count = 1;
    while ((hash & bit) == 0)
    {
        count++;
        bit <<= 1;
    }
    *regp = (int)index;
    return count;
}
密集设置值

这里设置的是最大的,同样代码也很简单。不做多余注释

int hllDenseSet(uint8_t* registers, long index, uint8_t count)
{
    uint8_t oldcount;
	/*拿到当前索引的值*/
    HLL_DENSE_GET_REGISTER(oldcount, registers, index);
    /*比他大就设置值*/
    if (count > oldcount)
    {
        HLL_DENSE_SET_REGISTER(registers, index, count);
        return 1;
    }
    else
    {
        return 0;
    }
}

密集添加

也没什么好讲的,都是调方法。这里就把代码贴出来。

int hllDenseAdd(uint8_t* registers, unsigned char* ele, size_t elesize)
{
    long index;
    /*算出第一次出现1的位置*/
    uint8_t count = hllPatLen(ele, elesize, &index);
    return hllDenseSet(registers, index, count);
}
以密集表示形式计算寄存器直方图
void hllDenseRegHisto(uint8_t* registers, int* reghisto)
{
    int j;
    /*
    *这里就只看默认情况了
    */
    if (HLL_REGISTERS == 16384 && HLL_BITS == 6)
    {
        uint8_t* r = registers;
        unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9,
                      r10, r11, r12, r13, r14, r15;
        for (j = 0; j < 1024; j++)//2^10循环,一次计算2^4,就是2^14个桶
        {
            r0 = r[0] & 63; //计算第一个桶的值
            r1 = (r[0] >> 6 | r[1] << 2) & 63;//第二个桶的值
            r2 = (r[1] >> 4 | r[2] << 4) & 63;//第三个
            r3 = (r[2] >> 2) & 63;
            r4 = r[3] & 63;
            r5 = (r[3] >> 6 | r[4] << 2) & 63;
            r6 = (r[4] >> 4 | r[5] << 4) & 63;
            r7 = (r[5] >> 2) & 63;
            r8 = r[6] & 63;
            r9 = (r[6] >> 6 | r[7] << 2) & 63;
            r10 = (r[7] >> 4 | r[8] << 4) & 63;
            r11 = (r[8] >> 2) & 63;
            r12 = r[9] & 63;
            r13 = (r[9] >> 6 | r[10] << 2) & 63;
            r14 = (r[10] >> 4 | r[11] << 4) & 63;
            r15 = (r[11] >> 2) & 63;
			/*//桶位数索引是r0的自增。
			*最多50个reghisto,打个比方reghisto[34]=2 
			*就表示值为34的桶有两个。
			*/
            reghisto[r0]++;
            reghisto[r1]++;
            reghisto[r2]++;
            reghisto[r3]++;
            reghisto[r4]++;
            reghisto[r5]++;
            reghisto[r6]++;
            reghisto[r7]++;
            reghisto[r8]++;
            reghisto[r9]++;
            reghisto[r10]++;
            reghisto[r11]++;
            reghisto[r12]++;
            reghisto[r13]++;
            reghisto[r14]++;
            reghisto[r15]++;
			//16个桶就是12个字节。
            r += 12;
        }
    }
    else
    {
        for (j = 0; j < HLL_REGISTERS; j++)
        {
            unsigned long reg;
            HLL_DENSE_GET_REGISTER(reg, registers, j);
            reghisto[reg]++;
        }
    }
}
稀疏处理

这里是把稀疏转为密集。

int hllSparseToDense(robj* o)
{
    sds sparse = o->ptr, dense;
    struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse;
int idx = 0, runlen, regval;
uint8_t* p = (uint8_t*)sparse, *end = p+sdslen(sparse);
hdr = (struct hllhdr*) sparse;
if (hdr->encoding == HLL_DENSE) return C_OK;    
dense = sdsnewlen(NULL, HLL_DENSE_SIZE);
hdr = (struct hllhdr*) dense;
*hdr = *oldhdr; 
hdr->encoding = HLL_DENSE;

/* 
*读取稀疏存储并赋值给密集存储
 */
p += HLL_HDR_SIZE;
while (p < end)
{	
	/*
	*是0的话,先算出runlen的长度,idx是0对应桶的值
	*p就是当前字节数组的索引下标
	*/
    if (HLL_SPARSE_IS_ZERO(p))
    {
        runlen = HLL_SPARSE_ZERO_LEN(p);
        idx += runlen;
        p++;
    }
    else if (HLL_SPARSE_IS_XZERO(p))
    {
        runlen = HLL_SPARSE_XZERO_LEN(p);
        idx += runlen;
        p += 2;
    }
    //稀疏为VAL情况
    else
    {
        runlen = HLL_SPARSE_VAL_LEN(p);
        regval = HLL_SPARSE_VAL_VALUE(p);
        //超过了大小就报错
        if ((runlen + idx) > HLL_REGISTERS) break; //异常了
        while (runlen--)
        {	
        	//给密集赋值。idx是索引。字节数组的索引
            HLL_DENSE_SET_REGISTER(hdr->registers, idx, regval);
            idx++;
        }
        p++;
    }
}
//发现idx桶的值不是设置的,就是异常情况
if (idx != HLL_REGISTERS)
{
    sdsfree(dense);
    return C_ERR;
}


sdsfree(o->ptr);
o->ptr = dense;
return C_OK;
}
设置稀疏的值

首先先考虑几种可能情况,
情况一,值大于val所能存储的最大值,这个时候是要把稀疏转密集的。
情况二,稀疏还是稀疏,X0从一分为二,一个X0中间可能插入一个可能出现转成一个X0和一个VAL的情况。也就是之前两个字节现在是三个字节了

int hllSparseSet(robj* o, long index, uint8_t count)
{
    struct hllhdr *hdr;
uint8_t oldcount, *sparse, *end, *p, *prev, *next;
long first, span;
long is_zero = 0, is_xzero = 0, is_val = 0, runlen = 0;

/* 
* 稀疏转密集。这个值比VAL MAX VALUE大
 */
if (count > HLL_SPARSE_VAL_MAX_VALUE) goto promote;

/* 
* 当一个X0 转成 一X0 与一个VAL的时候,这个时候需要多
*  申请一个字节的内存。
 */
o->ptr = sdsMakeRoomFor(o->ptr, 3);

/* 
*step1: 找到对应的位置
 */
sparse = p = ((uint8_t*)o->ptr) + HLL_HDR_SIZE;
end = p + sdslen(o->ptr) - HLL_HDR_SIZE;

first = 0;
prev = NULL; 
next = NULL; 
span = 0;
while (p < end)
{
    long oplen;
    oplen = 1;
    /*
    * 如果p是稀疏0的话
    */
    if (HLL_SPARSE_IS_ZERO(p))
    {
        span = HLL_SPARSE_ZERO_LEN(p);
    }
    // 是val
    else if (HLL_SPARSE_IS_VAL(p))
    {
        span = HLL_SPARSE_VAL_LEN(p);
    }
    // x0
    else
    { 
        span = HLL_SPARSE_XZERO_LEN(p);
        oplen = 2;
    }
    /* Break if this opcode covers the register as 'index'. */
    if (index <= first + span - 1) break;
    prev = p;
    p += oplen;
    first += span;
}
if (span == 0 || p >= end) return -1; 

next = HLL_SPARSE_IS_XZERO(p) ? p + 2 : p + 1;
if (next >= end) next = NULL;

/* 这个时候p就是所在位置 判断他属于稀疏的哪些情况。
rulen是稀疏的长度*/
if (HLL_SPARSE_IS_ZERO(p))
{
    is_zero = 1;
    runlen = HLL_SPARSE_ZERO_LEN(p);
}
else if (HLL_SPARSE_IS_XZERO(p))
{
    is_xzero = 1;
    runlen = HLL_SPARSE_XZERO_LEN(p);
}
else
{
    is_val = 1;
    runlen = HLL_SPARSE_VAL_LEN(p);
}

/* Step 2:设置值*/
//稀疏是VAL的情况		
if (is_val)
{
    oldcount = HLL_SPARSE_VAL_VALUE(p);
    /* Case A. 比他大就不设置直接return*/
    if (oldcount >= count) return 0;

    /* Case B. */
    if (runlen == 1)
    {   
        /*
        设置值*/
        HLL_SPARSE_VAL_SET(p, count, 1);
        /*
        *更新值 不懂这个方法可以往后看,或者顺着看都行。
        *这里告诉你这个updated方法干了些什么,主要是
        *更新值,然后找前5个,val相同的进行合并。
        *目的也是为了节省内存空间。
        */
        goto updated;
    }
}

/* 另外一种情况 */
if (is_zero && runlen == 1)
{   
    HLL_SPARSE_VAL_SET(p, count, 1);
    goto updated;
}

/* 一般情况
*
*/
uint8_t seq[5], *n = seq;
int last = first + span - 1; 
int len;
//如果稀疏是0或者x0,那么你设置值的话,其实就需要拆分。
if (is_zero || is_xzero)
{	

    if (index != first)
    {
        len = index - first;
        //看你是拆分成x0还是0,根据长度来判断
        if (len > HLL_SPARSE_ZERO_MAX_LEN)
        {	
        	//(x0设置值)
            HLL_SPARSE_XZERO_SET(n, len);
            //(索引位置+2)
            n += 2;
        }
        else
        {
            HLL_SPARSE_ZERO_SET(n, len);
            n++;
        }
    }
    //在中间位置设置一个VAL的值。也就是桶
    HLL_SPARSE_VAL_SET(n, count, 1);
    n++;
    //如果index不是last,说明后面还有桶。继续拆分
    if (index != last)
    {
        len = last - index;
        //逻辑与上同理
        if (len > HLL_SPARSE_ZERO_MAX_LEN)
        {
            HLL_SPARSE_XZERO_SET(n, len);
            n += 2;
        }
        else
        {
            HLL_SPARSE_ZERO_SET(n, len);
            n++;
        }
    }
}
//下面是设置val的值
else
{
    /* Handle splitting of VAL. */
    int curval = HLL_SPARSE_VAL_VALUE(p);
	//不是最前面的话,那前面就多拆出一个val。
    if (index != first)
    {
        len = index - first;
        HLL_SPARSE_VAL_SET(n, curval, len);
        n++;
    }
    //设置对应桶的值
    HLL_SPARSE_VAL_SET(n, count, 1);
    n++;
    //同理,如果不是最后面的话,最后面也要设置一个值出来。
    if (index != last)
    {
        len = last - index;
        HLL_SPARSE_VAL_SET(n, curval, len);
        n++;
    }
}

/* Step 3:
* 旧的序列替换新的序列。
*/
int seqlen = n - seq;
int oldlen = is_xzero ? 2 : 1;
int deltalen = seqlen - oldlen;

if (deltalen > 0 &&
    sdslen(o->ptr) + deltalen > server.hll_sparse_max_bytes) goto promote;
if (deltalen && next) memmove(next + deltalen, next, end - next);
sdsIncrLen(o->ptr, deltalen);
memcpy(p, seq, seqlen);
end += deltalen;

updated:
/* Step 4: 更新逻辑,因为你修改了val的值,可能出现有连续多个val值一样的情况,这个时候就需要merge */
p = prev ? prev : sparse;
int scanlen = 5; /* 扫五个 */
while (p < end && scanlen--)
{	
	//0的情况就不考虑直接continue就行,因为你设置的值肯定是大于0的。
    if (HLL_SPARSE_IS_XZERO(p))
    {
        p += 2;
        continue;
    }
    else if (HLL_SPARSE_IS_ZERO(p))
    {
        p++;
        continue;
    }
    /*合并逻辑*/
    if (p + 1 < end && HLL_SPARSE_IS_VAL(p + 1))
    {
        int v1 = HLL_SPARSE_VAL_VALUE(p);
        int v2 = HLL_SPARSE_VAL_VALUE(p + 1);
        //相同才会合并
        if (v1 == v2)
        {
            int len = HLL_SPARSE_VAL_LEN(p) + HLL_SPARSE_VAL_LEN(p + 1);
            /*
            * val的长度是有大小限制的。小于才会真的合并
            */
            if (len <= HLL_SPARSE_VAL_MAX_LEN)
            {	
            	//设置值
                HLL_SPARSE_VAL_SET(p + 1, v1, len);
                /*和内存有关,网上说是拷贝内存,
                *具体我也不是很清楚。因为我是搞爪娃的。
                */
                memmove(p, p + 1, end - p);
                sdsIncrLen(o->ptr, -1);
                end--;
                continue;
            }
        }
    }
    p++;
}

/*失效缓存
*设置完值之后,缓存要把他实现,在hll结构体中。
* 他并不会马上计算count,实际上则是懒加载形式
 */
hdr = o->ptr;
HLL_INVALIDATE_CACHE(hdr);
return 1;
//其实就是把稀疏变成密集。
promote: 
if (hllSparseToDense(o) == C_ERR) return -1; 
hdr = o->ptr;

/*
*转为密集之后就设置密集的值
*/
int dense_retval = hllDenseSet(hdr->registers, index, count);
/*
* https://wiki.jikexueyuan.com/project/redis/log-and-assertions.html
* 为什么需要断言?»»»»»»> TODO 当你认为某些事情在正常情况下不可能出现
,应尽可能结束任务,而不是捕捉错误,尝试挽救。同样在西加加里,
使用 try…catch() 会让程序的逻辑变乱,甚至让程序的行为变得不可预测,大胆的使用断言吧。
*/
serverAssert(dense_retval == 1); 
return dense_retval;
}
稀疏添加

调方法而已。

int hllSparseAdd(robj* o, unsigned char* ele, size_t elesize)
{
    long index;
    /*算出桶的大小*/
    uint8_t count = hllPatLen(ele, elesize, &index);
    /* Update the register if this element produced a longer run of zeroes. */
    return hllSparseSet(o, index, count);
}
稀疏注册

这代码比较简单,注释就先不写了

/* Compute the register histogram in the sparse representation. */
void hllSparseRegHisto(uint8_t* sparse, int sparselen, int* invalid, int* reghisto)
{
    int idx = 0, runlen, regval;
    uint8_t* end = sparse + sparselen, *p = sparse;

    while (p < end)
    {
        if (HLL_SPARSE_IS_ZERO(p))
        {
            runlen = HLL_SPARSE_ZERO_LEN(p);
            idx += runlen;
            reghisto[0] += runlen;
            p++;
        }
        else if (HLL_SPARSE_IS_XZERO(p))
        {
            runlen = HLL_SPARSE_XZERO_LEN(p);
            idx += runlen;
            reghisto[0] += runlen;
            p += 2;
        }
        else
        {
            runlen = HLL_SPARSE_VAL_LEN(p);
            regval = HLL_SPARSE_VAL_VALUE(p);
            idx += runlen;
            reghisto[regval] += runlen;
            p++;
        }
    }
    if (idx != HLL_REGISTERS && invalid) *invalid = 1;
}

核心算法精髓

分享知乎上张戎的解释
https://www.zhihu.com/question/53416615
下面是这个算法的源代码,很惭愧我没有仔细看,原因是我没学过数学,感兴趣的朋友可以去网上找找资料。

/* ========================= HyperLogLog Count ==============================
 * This is the core of the algorithm where the approximated count is computed.
 * The function uses the lower level hllDenseRegHisto() and hllSparseRegHisto()
 * functions as helpers to compute histogram of register values part of the
 * computation, which is representation-specific, while all the rest is common. */

/* Implements the register histogram calculation for uint8_t data type
 * which is only used internally as speedup for PFCOUNT with multiple keys. */
void hllRawRegHisto(uint8_t* registers, int* reghisto)
{
    uint64_t* word = (uint64_t*)registers;
    uint8_t* bytes;
    int j;

    for (j = 0; j < HLL_REGISTERS / 8; j++)
    {
        if (*word == 0)
        {
            reghisto[0] += 8;
        }
        else
        {
            bytes = (uint8_t*)word;
            reghisto[bytes[0]]++;
            reghisto[bytes[1]]++;
            reghisto[bytes[2]]++;
            reghisto[bytes[3]]++;
            reghisto[bytes[4]]++;
            reghisto[bytes[5]]++;
            reghisto[bytes[6]]++;
            reghisto[bytes[7]]++;
        }
        word++;
    }
}

/* Helper function sigma as defined in
 * "New cardinality estimation algorithms for HyperLogLog sketches"
 * Otmar Ertl, arXiv:1702.01284 */
double hllSigma(double x)
{
    if (x == 1.) return INFINITY;
    double zPrime;
    double y = 1;
    double z = x;
    do
    {
        x *= x;
        zPrime = z;
        z += x * y;
        y += y;
    } while (zPrime != z);
    return z;
}

/* Helper function tau as defined in
 * "New cardinality estimation algorithms for HyperLogLog sketches"
 * Otmar Ertl, arXiv:1702.01284 */
double hllTau(double x)
{
    if (x == 0. || x == 1.) return 0.;
    double zPrime;
    double y = 1.0;
    double z = 1 - x;
    do
    {
        x = sqrt(x);
        zPrime = z;
        y *= 0.5;
        z -= pow(1 - x, 2) * y;
    } while (zPrime != z);
    return z / 3;
}

/* Return the approximated cardinality of the set based on the harmonic
 * mean of the registers values. 'hdr' points to the start of the SDS
 * representing the String object holding the HLL representation.
 *
 * If the sparse representation of the HLL object is not valid, the integer
 * pointed by 'invalid' is set to non-zero, otherwise it is left untouched.
 *
 * hllCount() supports a special internal-only encoding of HLL_RAW, that
 * is, hdr->registers will point to an uint8_t array of HLL_REGISTERS element.
 * This is useful in order to speedup PFCOUNT when called against multiple
 * keys (no need to work with 6-bit integers encoding). */
uint64_t hllCount(struct hllhdr *hdr, int* invalid) {
    double m = HLL_REGISTERS;
    double E;
    int j;
    /* Note that reghisto size could be just HLL_Q+2, because HLL_Q+1 is
     * the maximum frequency of the "000...1" sequence the hash function is
     * able to return. However it is slow to check for sanity of the
     * input: instead we history array at a safe size: overflows will
     * just write data to wrong, but correctly allocated, places. */
    int reghisto[64] = { 0 };

    /* Compute register histogram */
    if (hdr->encoding == HLL_DENSE)
    {
        hllDenseRegHisto(hdr->registers, reghisto);
    }
    else if (hdr->encoding == HLL_SPARSE)
    {
        hllSparseRegHisto(hdr->registers,
                         sdslen((sds)hdr) - HLL_HDR_SIZE, invalid, reghisto);
    }
    else if (hdr->encoding == HLL_RAW)
    {
        hllRawRegHisto(hdr->registers, reghisto);
    }
    else
    {
        serverPanic("Unknown HyperLogLog encoding in hllCount()");
    }

    /* Estimate cardinality form register histogram. See:
     * "New cardinality estimation algorithms for HyperLogLog sketches"
     * Otmar Ertl, arXiv:1702.01284 */
    double z = m * hllTau((m - reghisto[HLL_Q + 1]) / (double)m);
    for (j = HLL_Q; j >= 1; --j)
    {
        z += reghisto[j];
        z *= 0.5;
    }
    z += m * hllSigma(reghisto[0] / (double)m);
    E = llroundl(HLL_ALPHA_INF * m * m / z);

    return (uint64_t)E;
}

/* Call hllDenseAdd() or hllSparseAdd() according to the HLL encoding. */
int hllAdd(robj* o, unsigned char* ele, size_t elesize)
{
    struct hllhdr *hdr = o->ptr;
switch (hdr->encoding)
{
    case HLL_DENSE: return hllDenseAdd(hdr->registers, ele, elesize);
    case HLL_SPARSE: return hllSparseAdd(o, ele, elesize);
    default: return -1; /* Invalid representation. */
}
}

/* Merge by computing MAX(registers[i],hll[i]) the HyperLogLog 'hll'
 * with an array of uint8_t HLL_REGISTERS registers pointed by 'max'.
 *
 * The hll object must be already validated via isHLLObjectOrReply()
 * or in some other way.
 *
 * If the HyperLogLog is sparse and is found to be invalid, C_ERR
 * is returned, otherwise the function always succeeds. */
int hllMerge(uint8_t* max, robj* hll)
{
    struct hllhdr *hdr = hll->ptr;
int i;

if (hdr->encoding == HLL_DENSE)
{
    uint8_t val;

    for (i = 0; i < HLL_REGISTERS; i++)
    {
        HLL_DENSE_GET_REGISTER(val, hdr->registers, i);
        if (val > max[i]) max[i] = val;
    }
}
else
{
    uint8_t* p = hll->ptr, *end = p + sdslen(hll->ptr);
    long runlen, regval;

    p += HLL_HDR_SIZE;
    i = 0;
    while (p < end)
    {
        if (HLL_SPARSE_IS_ZERO(p))
        {
            runlen = HLL_SPARSE_ZERO_LEN(p);
            i += runlen;
            p++;
        }
        else if (HLL_SPARSE_IS_XZERO(p))
        {
            runlen = HLL_SPARSE_XZERO_LEN(p);
            i += runlen;
            p += 2;
        }
        else
        {
            runlen = HLL_SPARSE_VAL_LEN(p);
            regval = HLL_SPARSE_VAL_VALUE(p);
            if ((runlen + i) > HLL_REGISTERS) break; /* Overflow. */
            while (runlen--)
            {
                if (regval > max[i]) max[i] = regval;
                i++;
            }
            p++;
        }
    }
    if (i != HLL_REGISTERS) return C_ERR;
}
return C_OK;
}

HyperLogLog指令原理

创建一个稀疏对象

为了节省内存。这段代码也比较简单,没什么过多的逻辑,不做多余说明

robj* createHLLObject(void)
{
    robj* o;
    struct hllhdr *hdr;
sds s;
uint8_t* p;
int sparselen = HLL_HDR_SIZE +
                (((HLL_REGISTERS + (HLL_SPARSE_XZERO_MAX_LEN - 1)) /
                 HLL_SPARSE_XZERO_MAX_LEN) * 2);
int aux;

/* Populate the sparse representation with as many XZERO opcodes as
 * needed to represent all the registers. */
aux = HLL_REGISTERS;
s = sdsnewlen(NULL, sparselen);
p = (uint8_t*)s + HLL_HDR_SIZE;
while (aux)
{
    int xzero = HLL_SPARSE_XZERO_MAX_LEN;
    if (xzero > aux) xzero = aux;
    HLL_SPARSE_XZERO_SET(p, xzero);
    p += 2;
    aux -= xzero;
}
serverAssert((p - (uint8_t*)s) == sparselen);

/* Create the actual object. */
o = createObject(OBJ_STRING, s);
hdr = o->ptr;
memcpy(hdr->magic, "HYLL", 4);
hdr->encoding = HLL_SPARSE;
return o;
}
检查对象是否是Hyperlolog
int isHLLObjectOrReply(client* c, robj* o)
{
    struct hllhdr *hdr;

/* 类型检查 */
if (checkType(c, o, OBJ_STRING))
    return C_ERR; /* Error already sent. */

if (!sdsEncodedObject(o)) goto invalid;
if (stringObjectLen(o) < sizeof(*hdr)) goto invalid;
hdr = o->ptr;

/*校验魔术字符串 */
if (hdr->magic[0] != 'H' || hdr->magic[1] != 'Y' ||
    hdr->magic[2] != 'L' || hdr->magic[3] != 'L') goto invalid;
//校验encoding
if (hdr->encoding > HLL_MAX_ENCODING) goto invalid;

/* 如果似乎密集存储的话但是他的size不是密集存储就报错*/
if (hdr->encoding == HLL_DENSE &&
    stringObjectLen(o) != HLL_DENSE_SIZE) goto invalid;

/* All tests passed. */
return C_OK;

invalid:
addReplySds(c,
    sdsnew("-WRONGTYPE Key is not a valid "

           "HyperLogLog string value.\r\n"));
return C_ERR;
}
PFADD代码原理
void pfaddCommand(client* c)
{
    robj* o = lookupKeyWrite(c->db, c->argv[1]);
    struct hllhdr *hdr;
int updated = 0, j;
//如果不存在
if (o == NULL)
{
    /* 创建一个HLL对象 */
    o = createHLLObject();
    dbAdd(c->db, c->argv[1], o);
    updated++;
}
else
{	
	//先判断这个是不是HLL,不是的话直接返回
    if (isHLLObjectOrReply(c, o) != C_OK) return;
    o = dbUnshareStringValue(c->db, c->argv[1], o);
}
/* 对每个参数都执行一遍add操作,这里为什么
* j是从2开始,是因为客户端发起这个操作是 
* pfadd key balabala
* 所以真正的参数是从2开始 */
for (j = 2; j < c->argc; j++)
{	
    int retval = hllAdd(o, (unsigned char *)c->argv[j]->ptr,
                               sdslen(c->argv[j]->ptr));
switch (retval)
{
    case 1:
        updated++;
        break;
    case -1:
        addReplySds(c, sdsnew(invalid_hll_err));
        return;
}
    }
    hdr = o->ptr;
/*如果有被修改了*/
if (updated)
{	
	/**/
    signalModifiedKey(c, c->db, c->argv[1]);
    notifyKeyspaceEvent(NOTIFY_STRING, "pfadd", c->argv[1], c->db->id);
    server.dirty++;
    HLL_INVALIDATE_CACHE(hdr);
}
addReply(c, updated ? shared.cone : shared.czero);
}
PFCOUNT原理
/* PFCOUNT var -> approximated cardinality of set. */
void pfcountCommand(client* c)
{
    robj* o;
    struct hllhdr *hdr;
uint64_t card;

/* Case 1: multi-key keys, cardinality of the union.
 *
 * When multiple keys are specified, PFCOUNT actually computes
 * the cardinality of the merge of the N HLLs specified. */
if (c->argc > 2)
{
    uint8_t max[HLL_HDR_SIZE + HLL_REGISTERS], *registers;
    int j;

    /* Compute an HLL with M[i] = MAX(M[i]_j). */
    memset(max, 0, sizeof(max));
    hdr = (struct hllhdr*) max;
hdr->encoding = HLL_RAW; /* Special internal-only encoding. */
registers = max + HLL_HDR_SIZE;
for (j = 1; j < c->argc; j++)
{
    /* Check type and size. */
    robj* o = lookupKeyRead(c->db, c->argv[j]);
    if (o == NULL) continue; /* Assume empty HLL for non existing var.*/
    if (isHLLObjectOrReply(c, o) != C_OK) return;

    /* Merge with this HLL with our 'max' HLL by setting max[i]
     * to MAX(max[i],hll[i]). 
     * pfcount 如果一次性多个参数,则是一起的预估值*/
    if (hllMerge(registers, o) == C_ERR)
    {
        addReplySds(c, sdsnew(invalid_hll_err));
        return;
    }
}

/* Compute cardinality of the resulting set.*/
addReplyLongLong(c, hllCount(hdr, NULL));
return;
    }

    /* Case 2: cardinality of the single HLL.
     ** 入参是1个的情况下 
     * The user specified a single key. Either return the cached value
     * or compute one and update the cache. */
    o = lookupKeyWrite(c->db, c->argv[1]);
if (o == NULL)
{
    /* No key? Cardinality is zero since no element was added, otherwise
     * we would have a key as HLLADD creates it as a side effect. */
    addReply(c, shared.czero);
}
else
{
    if (isHLLObjectOrReply(c, o) != C_OK) return;
    o = dbUnshareStringValue(c->db, c->argv[1], o);

    /* Check if the cached cardinality is valid. */
    hdr = o->ptr;
    if (HLL_VALID_CACHE(hdr))
    {
        /* Just return the cached value. */
        card = (uint64_t)hdr->card[0];
        card |= (uint64_t)hdr->card[1] << 8;
        card |= (uint64_t)hdr->card[2] << 16;
        card |= (uint64_t)hdr->card[3] << 24;
        card |= (uint64_t)hdr->card[4] << 32;
        card |= (uint64_t)hdr->card[5] << 40;
        card |= (uint64_t)hdr->card[6] << 48;
        card |= (uint64_t)hdr->card[7] << 56;
    }
    else
    {
        int invalid = 0;
        /* Recompute it and update the cached value. */
        card = hllCount(hdr, &invalid);
        if (invalid)
        {
            addReplySds(c, sdsnew(invalid_hll_err));
            return;
        }
        hdr->card[0] = card & 0xff;
        hdr->card[1] = (card >> 8) & 0xff;
        hdr->card[2] = (card >> 16) & 0xff;
        hdr->card[3] = (card >> 24) & 0xff;
        hdr->card[4] = (card >> 32) & 0xff;
        hdr->card[5] = (card >> 40) & 0xff;
        hdr->card[6] = (card >> 48) & 0xff;
        hdr->card[7] = (card >> 56) & 0xff;
        /* This is not considered a read-only command even if the
         * data structure is not modified, since the cached value
         * may be modified and given that the HLL is a Redis string
         * we need to propagate the change. */
        signalModifiedKey(c, c->db, c->argv[1]);
        server.dirty++;
    }
    addReplyLongLong(c, card);
}
}
PFMERGE命令
/* PFMERGE dest src1 src2 src3 ... srcN => OK */
void pfmergeCommand(client* c)
{
    uint8_t max[HLL_REGISTERS];
    struct hllhdr *hdr;
int j;
int use_dense = 0; /* Use dense representation as target? */

/* Compute an HLL with M[i] = MAX(M[i]_j).
 * We store the maximum into the max array of registers. We'll write
 * it to the target variable later. */
memset(max, 0, sizeof(max));
for (j = 1; j < c->argc; j++)
{
    /* Check type and size. */
    robj* o = lookupKeyRead(c->db, c->argv[j]);
    if (o == NULL) continue; /* Assume empty HLL for non existing var. */
    if (isHLLObjectOrReply(c, o) != C_OK) return;

    /* If at least one involved HLL is dense, use the dense representation
     * as target ASAP to save time and avoid the conversion step. */
    hdr = o->ptr;
    if (hdr->encoding == HLL_DENSE) use_dense = 1;

    /* Merge with this HLL with our 'max' HLL by setting max[i]
     * to MAX(max[i],hll[i]). */
    if (hllMerge(max, o) == C_ERR)
    {
        addReplySds(c, sdsnew(invalid_hll_err));
        return;
    }
}

/* Create / unshare the destination key's value if needed. */
robj* o = lookupKeyWrite(c->db, c->argv[1]);
if (o == NULL)
{
    /* Create the key with a string value of the exact length to
     * hold our HLL data structure. sdsnewlen() when NULL is passed
     * is guaranteed to return bytes initialized to zero. */
    o = createHLLObject();
    dbAdd(c->db, c->argv[1], o);
}
else
{
    /* If key exists we are sure it's of the right type/size
     * since we checked when merging the different HLLs, so we
     * don't check again. */
    o = dbUnshareStringValue(c->db, c->argv[1], o);
}

/* Convert the destination object to dense representation if at least
 * one of the inputs was dense. */
if (use_dense && hllSparseToDense(o) == C_ERR)
{
    addReplySds(c, sdsnew(invalid_hll_err));
    return;
}

/* Write the resulting HLL to the destination HLL registers and
 * invalidate the cached value. */
for (j = 0; j < HLL_REGISTERS; j++)
{
    if (max[j] == 0) continue;
    hdr = o->ptr;
    switch (hdr->encoding)
    {
        case HLL_DENSE: hllDenseSet(hdr->registers, j, max[j]); break;
        case HLL_SPARSE: hllSparseSet(o, j, max[j]); break;
    }
}
hdr = o->ptr; /* o->ptr may be different now, as a side effect of
                     last hllSparseSet() call. */
HLL_INVALIDATE_CACHE(hdr);

signalModifiedKey(c, c->db, c->argv[1]);
/* We generate a PFADD event for PFMERGE for semantical simplicity
 * since in theory this is a mass-add of elements. */
notifyKeyspaceEvent(NOTIFY_STRING, "pfadd", c->argv[1], c->db->id);
server.dirty++;
addReply(c, shared.ok);
}

HyperLogLog的内部测试

这块代码我也没有仔细阅读过,他主要是测试了几个方面
访问寄存器,测试近似误差,还有一些指令测试。感兴趣的同学可以去github看看。这里只把源代码贴上来,不做多余的说明。

/* ========================== Testing / Debugging  ========================== */

/* PFSELFTEST
 * This command performs a self-test of the HLL registers implementation.
 * Something that is not easy to test from within the outside. */
#define HLL_TEST_CYCLES 1000
void pfselftestCommand(client* c)
{
    unsigned int j, i;
    sds bitcounters = sdsnewlen(NULL, HLL_DENSE_SIZE);
    struct hllhdr *hdr = (struct hllhdr*) bitcounters, *hdr2;
robj* o = NULL;
uint8_t bytecounters[HLL_REGISTERS];

/* Test 1: access registers.
 * The test is conceived to test that the different counters of our data
 * structure are accessible and that setting their values both result in
 * the correct value to be retained and not affect adjacent values. */
for (j = 0; j < HLL_TEST_CYCLES; j++)
{
    /* Set the HLL counters and an array of unsigned byes of the
     * same size to the same set of random values. */
    for (i = 0; i < HLL_REGISTERS; i++)
    {
        unsigned int r = rand() & HLL_REGISTER_MAX;

        bytecounters[i] = r;
        HLL_DENSE_SET_REGISTER(hdr->registers, i, r);
    }
    /* Check that we are able to retrieve the same values. */
    for (i = 0; i < HLL_REGISTERS; i++)
    {
        unsigned int val;

        HLL_DENSE_GET_REGISTER(val, hdr->registers, i);
        if (val != bytecounters[i])
        {
            addReplyErrorFormat(c,
                "TESTFAILED Register %d should be %d but is %d",
                i, (int)bytecounters[i], (int)val);
            goto cleanup;
        }
    }
}

/* Test 2: approximation error. 近似误差
 * The test adds unique elements and check that the estimated value
 * is always reasonable bounds.
 *
 * We check that the error is smaller than a few times than the expected
 * standard error, to make it very unlikely for the test to fail because
 * of a "bad" run.
 *
 * The test is performed with both dense and sparse HLLs at the same
 * time also verifying that the computed cardinality is the same. */
memset(hdr->registers, 0, HLL_DENSE_SIZE - HLL_HDR_SIZE);
o = createHLLObject();
double relerr = 1.04 / sqrt(HLL_REGISTERS);
int64_t checkpoint = 1;
uint64_t seed = (uint64_t)rand() | (uint64_t)rand() << 32;
uint64_t ele;
for (j = 1; j <= 10000000; j++)
{
    ele = j ^ seed;
    hllDenseAdd(hdr->registers, (unsigned char *) & ele,sizeof(ele));
hllAdd(o, (unsigned char *) & ele,sizeof(ele));

/* Make sure that for small cardinalities we use sparse
 * encoding. */
if (j == checkpoint && j < server.hll_sparse_max_bytes / 2)
{
    hdr2 = o->ptr;
    if (hdr2->encoding != HLL_SPARSE)
    {
        addReplyError(c, "TESTFAILED sparse encoding not used");
        goto cleanup;
    }
}

/* Check that dense and sparse representations agree. */
if (j == checkpoint && hllCount(hdr, NULL) != hllCount(o->ptr, NULL))
{
    addReplyError(c, "TESTFAILED dense/sparse disagree");
    goto cleanup;
}

/* Check error. */
if (j == checkpoint)
{
    int64_t abserr = checkpoint - (int64_t)hllCount(hdr, NULL);
    uint64_t maxerr = ceil(relerr * 6 * checkpoint);

    /* Adjust the max error we expect for cardinality 10
     * since from time to time it is statistically likely to get
     * much higher error due to collision, resulting into a false
     * positive. */
    if (j == 10) maxerr = 1;

    if (abserr < 0) abserr = -abserr;
    if (abserr > (int64_t)maxerr)
    {
        addReplyErrorFormat(c,
            "TESTFAILED Too big error. card:%llu abserr:%llu",
            (unsigned long long) checkpoint,
                    (unsigned long long) abserr);
        goto cleanup;
    }
    checkpoint *= 10;
}
    }

    /* Success! */
    addReply(c, shared.ok);

cleanup:
sdsfree(bitcounters);
if (o) decrRefCount(o);
}

/* PFDEBUG <subcommand> <key> ... args ...
 * Different debugging related operations about the HLL implementation. */
void pfdebugCommand(client* c)
{
    char* cmd = c->argv[1]->ptr;
    struct hllhdr *hdr;
robj* o;
int j;

o = lookupKeyWrite(c->db, c->argv[2]);
if (o == NULL)
{
    addReplyError(c, "The specified key does not exist");
    return;
}
if (isHLLObjectOrReply(c, o) != C_OK) return;
o = dbUnshareStringValue(c->db, c->argv[2], o);
hdr = o->ptr;

/* PFDEBUG GETREG <key> */
if (!strcasecmp(cmd, "getreg"))
{
    if (c->argc != 3) goto arityerr;

    if (hdr->encoding == HLL_SPARSE)
    {
        if (hllSparseToDense(o) == C_ERR)
        {
            addReplySds(c, sdsnew(invalid_hll_err));
            return;
        }
        server.dirty++; /* Force propagation on encoding change. */
    }

    hdr = o->ptr;
    addReplyArrayLen(c, HLL_REGISTERS);
    for (j = 0; j < HLL_REGISTERS; j++)
    {
        uint8_t val;

        HLL_DENSE_GET_REGISTER(val, hdr->registers, j);
        addReplyLongLong(c, val);
    }
}
/* PFDEBUG DECODE <key> */
else if (!strcasecmp(cmd, "decode"))
{
    if (c->argc != 3) goto arityerr;

    uint8_t* p = o->ptr, *end = p + sdslen(o->ptr);
    sds decoded = sdsempty();

    if (hdr->encoding != HLL_SPARSE)
    {
        sdsfree(decoded);
        addReplyError(c, "HLL encoding is not sparse");
        return;
    }

    p += HLL_HDR_SIZE;
    while (p < end)
    {
        int runlen, regval;

        if (HLL_SPARSE_IS_ZERO(p))
        {
            runlen = HLL_SPARSE_ZERO_LEN(p);
            p++;
            decoded = sdscatprintf(decoded, "z:%d ", runlen);
        }
        else if (HLL_SPARSE_IS_XZERO(p))
        {
            runlen = HLL_SPARSE_XZERO_LEN(p);
            p += 2;
            decoded = sdscatprintf(decoded, "Z:%d ", runlen);
        }
        else
        {
            runlen = HLL_SPARSE_VAL_LEN(p);
            regval = HLL_SPARSE_VAL_VALUE(p);
            p++;
            decoded = sdscatprintf(decoded, "v:%d,%d ", regval, runlen);
        }
    }
    decoded = sdstrim(decoded, " ");
    addReplyBulkCBuffer(c, decoded, sdslen(decoded));
    sdsfree(decoded);
}
/* PFDEBUG ENCODING <key> */
else if (!strcasecmp(cmd, "encoding"))
{
    char* encodingstr[2] = { "dense", "sparse" };
    if (c->argc != 3) goto arityerr;

    addReplyStatus(c, encodingstr[hdr->encoding]);
}
/* PFDEBUG TODENSE <key> */
else if (!strcasecmp(cmd, "todense"))
{
    int conv = 0;
    if (c->argc != 3) goto arityerr;

    if (hdr->encoding == HLL_SPARSE)
    {
        if (hllSparseToDense(o) == C_ERR)
        {
            addReplySds(c, sdsnew(invalid_hll_err));
            return;
        }
        conv = 1;
        server.dirty++; /* Force propagation on encoding change. */
    }
    addReply(c, conv ? shared.cone : shared.czero);
}
else
{
    addReplyErrorFormat(c, "Unknown PFDEBUG subcommand '%s'", cmd);
}
return;

arityerr:
addReplyErrorFormat(c,
    "Wrong number of arguments for the '%s' subcommand", cmd);
}

© 2020 GitHub, Inc.
Terms
Privacy
Security
Status
Help
Contact GitHub
Pricing
API
Training
Blog
About

结语

谢谢观看,如有疑问欢迎留言,上面的解析可能有错误之处,欢迎留言指导。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值