varint为变长整数。长度为1到9个字节。最大可表示64位整数。
使用varint表示整数的原因:大多数情况下,整数数值比较小,如果使用64位整数保存的话,会浪费空间。
Varint格式如下(源于源码util.c):
A = 0xxxxxxx 7位数据,一个标志位(这里就是高位的0)
B = 1xxxxxxx 7位数据,一个标志位(这里就是高位的1)
C = xxxxxxxx 8位数据
7 bits - A
14 bits - BA
21 bits - BBA
28 bits - BBBA
35 bits - BBBBA
42 bits - BBBBBA
49 bits - BBBBBBA
56 bits - BBBBBBBA
64 bits - BBBBBBBBC
对于前8个字节,以A为结尾,通过判断字节是否为A来来确定是否到达varint尾部。
对于9个字节组成的varint来说,如果前8个字节都为B,则说明此varint为9个字节长。如果为8个B,则说明下一个字节就是varint的尾部。SQLite的作者设计果然精巧。
以下是源码分析:
/*
** 从p[0]开始的内存处读取64位变长整数。
** 返回varint的字节数。varint值保存在*v中。
*/
u8 sqlite3GetVarint(const unsigned char *p, u64 *v){
u32 a,b,s;
a = *p;
/* a: p0 (unmasked) */
//varint为一个字节时
if (!(a&0x80)) //如果a<=127=0x7F=1111111b
{
//值在0到127之间
*v = a; //直接取值 之后返回
return 1;
}
p++;
b = *p;
/* b: p1 (unmasked) */
//varint为两个字节时
if (!(b&0x80))//判断第二个字节是否为A。如果是,则此varint占用两个字节 格式为BA
{
a &= 0x7f; //a为第一个字节 取低7位
a = a<<7; //a左移7位 为B的7位腾出空间
a |= b; //A的低7位+B的低7位
*v = a;
return 2;
}
/* Verify that constants are precomputed correctly */
assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
p++;
a = a<<14;
a |= *p; //此后a为BXA或BXB
/* a: p0<<14 | p2 (unmasked) */
if (!(a&0x80)) //判断第三个字节是否为A。如果是,则此varint占用三个字节,格式为BBA。
{
a &= SLOT_2_0; //SLOT_2_0=111111100000001111111 清空X位
b &= 0x7f;
b = b<<7;
a |= b; //填充X位 合成BBA
*v = a;
return 3;
}
//</略过分析>
/* CSE1 from below */
a &= SLOT_2_0;
p++;
b = b<<14;
b |= *p;
/* b: p1<<14 | p3 (unmasked) */
if (!(b&0x80))
{
b &= SLOT_2_0;
/* moved CSE1 up */
/* a &= (0x7f<<14)|(0x7f); */
a = a<<7;
a |= b;
*v = a;
return 4;
}
/* a: p0<<14 | p2 (masked) */
/* b: p1<<14 | p3 (unmasked) */
/* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
/* moved CSE1 up */
/* a &= (0x7f<<14)|(0x7f); */
b &= SLOT_2_0;
s = a;
/* s: p0<<14 | p2 (masked) */
p++;
a = a<<14;
a |= *p;
/* a: p0<<28 | p2<<14 | p4 (unmasked) */
if (!(a&0x80))
{
/* we can skip these cause they were (effectively) done above
** while calculating s */
/* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
/* b &= (0x7f<<14)|(0x7f); */
b = b<<7;
a |= b;
s = s>>18;
*v = ((u64)s)<<32 | a;
return 5;
}
/* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
s = s<<7;
s |= b;
/* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
p++;
b = b<<14;
b |= *p;
/* b: p1<<28 | p3<<14 | p5 (unmasked) */
if (!(b&0x80))
{
/* we can skip this cause it was (effectively) done above in calc'ing s */
/* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
a &= SLOT_2_0;
a = a<<7;
a |= b;
s = s>>18;
*v = ((u64)s)<<32 | a;
return 6;
}
p++;
a = a<<14;
a |= *p;
/* a: p2<<28 | p4<<14 | p6 (unmasked) */
if (!(a&0x80))
{
a &= SLOT_4_2_0;
b &= SLOT_2_0;
b = b<<7;
a |= b;
s = s>>11;
*v = ((u64)s)<<32 | a;
return 7;
}
/* CSE2 from below */
a &= SLOT_2_0;
p++;
b = b<<14;
b |= *p;
/* b: p3<<28 | p5<<14 | p7 (unmasked) */
if (!(b&0x80))
{
b &= SLOT_4_2_0;
/* moved CSE2 up */
/* a &= (0x7f<<14)|(0x7f); */
a = a<<7;
a |= b;
s = s>>4;
*v = ((u64)s)<<32 | a;
return 8;
}
//</略过分析>
//前8个字节都为B,所以此varint有9个字节,格式为BBBBBBBBC。
p++;
a = a<<15;
a |= *p;
/* a: p4<<29 | p6<<15 | p8 (unmasked) */
/* moved CSE2 up */
/* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
b &= SLOT_2_0;
b = b<<8;
a |= b;
s = s<<4;
b = p[-4];
b &= 0x7f;
b = b>>3;
s |= b;
*v = ((u64)s)<<32 | a;
return 9;
}