在上篇文章中我们讲了SQLite3中printf的基本框架,而其格式化字符串输出的核心是sqlite3VXPrintf()函数,本文就来详细分析sqlite3VXPrintf()函数的实现。由于支持的格式较多,所以只针对最常用的%d、%x、%f、%g做分析。
1.基本例子
下面举几个小例子,读者可以根据这几个例子来理解代码。
int a = 10;
int b = 100;
printf("b: %#*x\n",a,b);//#的作用是在16进制前添加0x,*被参数a替代,表示宽度为10
输出b: 0x64
int a = 123456789
printf("a: %,d\n",a);
输出123,456,789
int a = 20;
int b = 5;
double c = 100.123456789;
printf("c: %*.*f\n",a,b,c);
printf("c: %*.*e\n",a,b,c);
printf("c: %*.*g\n",a,b,c);
输出:
c: 100.12346
c: 1.00123e+002
c: 100.12
2.源码分析
由于这些实现比较细碎 ,所以分析以代码的注释给出,结合上面的例子还是不难理解的。
首先SQLite把所有格式定义在了一个结构体数组里,fmttype代表类型如%d、%f等,base代表10进制还是16进制,最后会根据解析fmttype得到一个type,在相应的switch分支里处理相应的格式。
typedef struct et_info { /* Information about each format field */
char fmttype; /* The format field code letter */
etByte base; /* The base for radix conversion */
etByte flags; /* One or more of FLAG_ constants below */
etByte type; /* Conversion paradigm */
etByte charset; /* Offset into aDigits[] of the digits string */
etByte prefix; /* Offset into aPrefix[] of the prefix string */
} et_info;
/*
** Allowed values for et_info.flags
*/
#define FLAG_SIGNED 1 /* True if the value to convert is signed */
#define FLAG_STRING 4 /* Allow infinite precision */
static const char aDigits[] = "0123456789ABCDEF0123456789abcdef";
static const char aPrefix[] = "-x0\000X0";
static const et_info fmtinfo[] = {
{ 'd', 10, 1, etDECIMAL, 0, 0 },
{ 's', 0, 4, etSTRING, 0, 0 },
{ 'g', 0, 1, etGENERIC, 30, 0 },
{ 'z', 0, 4, etDYNSTRING, 0, 0 },
{ 'q', 0, 4, etSQLESCAPE, 0, 0 },
{ 'Q', 0, 4, etSQLESCAPE2, 0, 0 },
{ 'w', 0, 4, etSQLESCAPE3, 0, 0 },
{ 'c', 0, 0, etCHARX, 0, 0 },
{ 'o', 8, 0, etRADIX, 0, 2 },
{ 'u', 10, 0, etDECIMAL, 0, 0 },
{ 'x', 16, 0, etRADIX, 16, 1 },
{ 'X', 16, 0, etRADIX, 0, 4 },
#ifndef SQLITE_OMIT_FLOATING_POINT
{ 'f', 0, 1, etFLOAT, 0, 0 },
{ 'e', 0, 1, etEXP, 30, 0 },
{ 'E', 0, 1, etEXP, 14, 0 },
{ 'G', 0, 1, etGENERIC, 14, 0 },
#endif
{ 'i', 10, 1, etDECIMAL, 0, 0 },
{ 'n', 0, 0, etSIZE, 0, 0 },
{ '%', 0, 0, etPERCENT, 0, 0 },
{ 'p', 16, 0, etPOINTER, 0, 1 },
/* All the rest are undocumented and are for internal use only */
{ 'T', 0, 0, etTOKEN, 0, 0 },
{ 'S', 0, 0, etSRCLIST, 0, 0 },
{ 'r', 10, 1, etORDINAL, 0, 0 },
};
下面来看sqlite3VXPrintf的实现
void sqlite3VXPrintf(
StrAccum *pAccum, /* Accumulate results here */
const char *fmt, /* Format string */
va_list ap /* arguments */
){
/*先是定义一大堆局部变量,主要是解析字符串偏移的指针bufpt,一些前缀变量,如例子中的#,还有一些长度、宽度、精度等变量*/
int c; /* Next character in the format string */
char *bufpt; /* Pointer to the conversion buffer */
int precision; /* Precision of the current field */
int length; /* Length of the field */
int idx; /* A general purpose loop counter */
int width; /* Width of the current field */
etByte flag_leftjustify; /* True if "-" flag is present */
etByte flag_prefix; /* '+' or ' ' or 0 for prefix */
etByte flag_alternateform; /* True if "#" flag is present */
etByte flag_altform2; /* True if "!" flag is present */
etByte flag_zeropad; /* True if field width constant starts with zero */
etByte flag_long; /* 1 for the "l" flag, 2 for "ll", 0 by default */
etByte done; /* Loop termination flag */
etByte cThousand; /* Thousands separator for %d and %u */
etByte xtype = etINVALID; /* Conversion paradigm */
u8 bArgList; /* True for SQLITE_PRINTF_SQLFUNC */
char prefix; /* Prefix character. "+" or "-" or " " or '\0'. */
sqlite_uint64 longvalue; /* Value for integer types */
LONGDOUBLE_TYPE realvalue; /* Value for real types */
const et_info *infop; /* Pointer to the appropriate info structure */
char *zOut; /* Rendering buffer */
int nOut; /* Size of the rendering buffer */
char *zExtra = 0; /* Malloced memory used by some conversion */
#ifndef SQLITE_OMIT_FLOATING_POINT
int exp, e2; /* exponent of real numbers */
int nsd; /* Number of significant digits returned */
double rounder; /* Used for rounding floating point values */
etByte flag_dp; /* True if decimal point should be shown */
etByte flag_rtz; /* True if trailing zeros should be removed */
#endif
PrintfArguments *pArgList = 0; /* Arguments for SQLITE_PRINTF_SQLFUNC */
char buf[etBUFSIZE]; /* Conversion buffer */
/*定义完变量后,开始解析格式字符串fmt*/
bufpt = 0;
for(; (c=(*fmt))!=0; ++fmt){
/*没碰到%之前的字符串不做处理,直接原样添加到输出结果中*/
if( c!='%' ){
bufpt = (char *)fmt;
do{ fmt++; }while( *fmt && *fmt != '%' );
sqlite3StrAccumAppend(pAccum, bufpt, (int)(fmt - bufpt));
if( *fmt==0 ) break;
}
if( (c=(*++fmt))==0 ){
sqlite3StrAccumAppend(pAccum, "%", 1);//最后一个字符是%
break;
}
/* 这里根据解析的前缀,添加相应的标志变量 */
/* Find out what flags are present */
flag_leftjustify = flag_prefix = cThousand =
flag_alternateform = flag_altform2 = flag_zeropad = 0;
done = 0;
do{
switch( c ){
case '-': flag_leftjustify = 1; break;
case '+': flag_prefix = '+'; break;
case ' ': flag_prefix = ' '; break;
case '#': flag_alternateform = 1; break;
case '!': flag_altform2 = 1; break;
case '0': flag_zeropad = 1; break;
case ',': cThousand = ','; break;
default: done = 1; break;
}
}while( !done && (c=(*++fmt))!=0 );
/* Get the field width */
if( c=='*' ){
if( bArgList ){
width = (int)getIntArg(pArgList);//这里是SQL函数的参数,先不用管,以后学虚拟机的时候再来看
}else{
width = va_arg(ap,int);//解析到*后用变参取代,这里指的是宽度
}
if( width<0 ){
flag_leftjustify = 1;//宽度小于0相当于前面有个负号,所以是左对齐
width = width >= -2147483647 ? -width : 0;//对于一个有符号数最小负数的绝对值比最大正数的绝对值要大1
}
c = *++fmt;
}else{
/*如果%后面跟的不是*而是数字,那么数字就是宽度*/
unsigned wx = 0;
while( c>='0' && c<='9' ){
wx = wx*10 + c - '0';
c = *++fmt;
}
testcase( wx>0x7fffffff );
width = wx & 0x7fffffff;
}
assert( width>=0 );
/* Get the precision */
/*如果解析到小数点,那么小数点后面跟的是精度*/
if( c=='.' ){
/*获取精度precision,这部分代码和上面完全一样*/
……
}else{
precision = -1;
}
assert( precision>=(-1) );
/* Get the conversion type modifier */
/*解析整型变量的长度是32位还是64位,如%lld代表long long变量,即64位整型*/
if( c=='l' ){
flag_long = 1;
c = *++fmt;
if( c=='l' ){
flag_long = 2;
c = *++fmt;
}
}else{
flag_long = 0;
}
/* Fetch the info entry for the field */
/*接下来就是获得格式类型了,fmtinfo如上面定义*/
infop = &fmtinfo[0];
xtype = etINVALID;
for(idx=0; idx<ArraySize(fmtinfo); idx++){
if( c==fmtinfo[idx].fmttype ){
infop = &fmtinfo[idx];
xtype = infop->type;
break;
}
}
/*接下来就是解析相应的格式了*/
switch( xtype ){
case etPOINTER://%p
flag_long = sizeof(char*)==sizeof(i64) ? 2 :
sizeof(char*)==sizeof(long int) ? 1 : 0;
/* Fall through into the next case */
case etORDINAL:
case etRADIX: //%x、%X、%o
cThousand = 0;
/* Fall through into the next case */
case etDECIMAL://%d、%u、%i
if( infop->flags & FLAG_SIGNED ){
/* 解析有符号数*/
i64 v;
if( bArgList ){
v = getIntArg(pArgList);
}else if( flag_long ){
if( flag_long==2 ){
v = va_arg(ap,i64) ;//获取64位长度的可变参数
}else{
v = va_arg(ap,long int);
}
}else{
v = va_arg(ap,int);//默认参数为int型
}
if( v<0 ){
/*
#define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
#define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
*/
if( v==SMALLEST_INT64 ){
/*注意整型变量最小的负数没有对应的相反数正数*/
longvalue = ((u64)1)<<63;
}else{
longvalue = -v;
}
/*负数需要添加前缀-*/
prefix = '-';
}else{
longvalue = v;
prefix = flag_prefix;//flag_prefix可以是空格或+
}
}else{
/* 解析无符号数*/
if( bArgList ){
longvalue = (u64)getIntArg(pArgList);
}else if( flag_long ){
if( flag_long==2 ){
longvalue = va_arg(ap,u64);
}else{
longvalue = va_arg(ap,unsigned long int);
}
}else{
longvalue = va_arg(ap,unsigned int);
}
prefix = 0;
}
if( longvalue==0 ) flag_alternateform = 0;//如果是0那就不要加0x前缀
if( flag_zeropad && precision<width-(prefix!=0) ){
/*flag_zeropad表示空出的地方用0填充*/
precision = width-(prefix!=0);
}
/* etBUFSIZE/3应该指的是','的个数*/
if( precision<etBUFSIZE-10-etBUFSIZE/3 ){
nOut = etBUFSIZE;//tBUFSIZE是70
zOut = buf;//buf为分配的临时缓存空间
}else{
/*空间不够了那么重新分配*/
u64 n = (u64)precision + 10 + precision/3;
zOut = zExtra = sqlite3Malloc( n );
if( zOut==0 ){
setStrAccumError(pAccum, STRACCUM_NOMEM);
return;
}
nOut = (int)n;
}
/*这里是从后面往前面解析,因为取余操作先获得最低位,放到最后面*/
bufpt = &zOut[nOut-1];
if( xtype==etORDINAL ){
/*这里指的是序号,如1st,2nd,3rd,4th*/
static const char zOrd[] = "thstndrd";
int x = (int)(longvalue % 10);
if( x>=4 || (longvalue/10)%10==1 ){
x = 0;
}
*(--bufpt) = zOrd[x*2+1];
*(--bufpt) = zOrd[x*2];
}
{
/*
"0123456789ABCDEF0123456789abcdef"
%d,%X, infop->charset是0
%x,infop->charset是16
%e,%g,infop->charset是30
*/
const char *cset = &aDigits[infop->charset];
u8 base = infop->base;//进制数
do{ /* Convert to ascii */
*(--bufpt) = cset[longvalue%base];
longvalue = longvalue/base;
}while( longvalue>0 );
}
length = (int)(&zOut[nOut-1]-bufpt);
while( precision>length ){
//精度超过总长度填'0'
*(--bufpt) = '0'; /* Zero pad */
length++;
}
if( cThousand ){
/*对应大整数,每3位隔1个','
int nn = (length - 1)/3; /* Number of "," to insert */
int ix = (length - 1)%3 + 1;//等价于length%3
bufpt -= nn;
for(idx=0; nn>0; idx++){
bufpt[idx] = bufpt[idx+nn];
ix--;
if( ix==0 ){
bufpt[++idx] = cThousand;
nn--;
ix = 3;
}
}
}
if( prefix ) *(--bufpt) = prefix; /* Add sign */
/*
"-x0\000X0"
%X, infop->prefix是4
%x,infop->prefix是1
%o,infop->prefix是2
*/
if( flag_alternateform && infop->prefix ){ /* Add "0" or "0x" */
const char *pre;
char x;
pre = &aPrefix[infop->prefix];
for(; (x=(*pre))!=0; pre++) *(--bufpt) = x;
}
length = (int)(&zOut[nOut-1]-bufpt);
break;
case etFLOAT://%f
case etEXP://%e
case etGENERIC://%g
if( bArgList ){
realvalue = getDoubleArg(pArgList);
}else{
realvalue = va_arg(ap,double);
}
if( precision<0 ) precision = 6; /* Set default precision */
if( realvalue<0.0 ){
realvalue = -realvalue;
prefix = '-';
}else{
prefix = flag_prefix;
}
/*对于%g来说,指的是整体精度如上面的例子
100.123456789
precision是5,减1后变4
最后结果是100.12
指数精度是2,小数精度是2,整体精度是5
*/
if( xtype==etGENERIC && precision>0 ) precision--;
testcase( precision>0xfff );
for(idx=precision&0xfff, rounder=0.5; idx>0; idx--, rounder*=0.1){}//rounder为该精度下做四舍五入的数
if( xtype==etFLOAT ) realvalue += rounder;// realvalue为原始的数,做四舍五入
/* Normalize realvalue to within 10.0 > realvalue >= 1.0 */
exp = 0;
/*芯片不支持浮点型*/
if( sqlite3IsNaN((double)realvalue) ){
bufpt = "NaN";
length = 3;
break;
}
/* 用科学计数法表示realvalue*/
if( realvalue>0.0 ){
LONGDOUBLE_TYPE scale = 1.0;
while( realvalue>=1e100*scale && exp<=350 ){ scale *= 1e100;exp+=100;}
while( realvalue>=1e10*scale && exp<=350 ){ scale *= 1e10; exp+=10; }
while( realvalue>=10.0*scale && exp<=350 ){ scale *= 10.0; exp++; }
realvalue /= scale;
while( realvalue<1e-8 ){ realvalue *= 1e8; exp-=8; }
while( realvalue<1.0 ){ realvalue *= 10.0; exp--; }
if( exp>350 ){
//超出了浮点数double表示的范围
bufpt = buf;
buf[0] = prefix;
memcpy(buf+(prefix!=0),"Inf",4);
length = 3+(prefix!=0);
break;
}
}
bufpt = buf;
/*
** If the field type is etGENERIC, then convert to either etEXP
** or etFLOAT, as appropriate.
*/
if( xtype!=etFLOAT ){
realvalue += rounder;//对表示成科学计数法后的数做四舍五入
/*如果出现进位*/
if( realvalue>=10.0 ){ realvalue *= 0.1; exp++; }
}
if( xtype==etGENERIC ){
/* flag_rtz表示是否移除尾部的0*/
flag_rtz = !flag_alternateform;
if( exp<-4 || exp>precision ){
/*如果一个数太大或太小,那么用科学计数法表示*/
xtype = etEXP;
}else{
/*这里把精度分成小数和指数2部分 */
precision = precision - exp;
xtype = etFLOAT;
}
}else{
/* flag_altform2表示!前缀 */
flag_rtz = flag_altform2;
}
/*e2表示小数点之前的位数*/
if( xtype==etEXP ){
e2 = 0;
}else{
e2 = exp;
}
if( MAX(e2,0)+(i64)precision+(i64)width > etBUFSIZE - 15 ){
/*当空间不够时,重新分配空间*/
/*一般情况下左边的长度是要比实际上大很多的 */
bufpt = zExtra
= sqlite3Malloc( MAX(e2,0)+(i64)precision+(i64)width+15 );
if( bufpt==0 ){
setStrAccumError(pAccum, STRACCUM_NOMEM);
return;
}
}
zOut = bufpt;
nsd = 16 + flag_altform2*10;//double表示的10进制数最高精度为16位,如果精度更高其实只是一个形式并不是真的*/
// flag_dp表示10进制小数
flag_dp = (precision>0 ?1:0) | flag_alternateform | flag_altform2;
/* The sign in front of the number */
if( prefix ){
*(bufpt++) = prefix;
}
/* Digits prior to the decimal point */
if( e2<0 ){
//整数部分为0,即0.x
*(bufpt++) = '0';
}else{
//获取小数点前面的数字
for(; e2>=0; e2--){
/* Example:
** input: *val = 3.14159
** output: *val = 1.4159 function return = '3'
** 如果精度超过nsd则补0
*/
*(bufpt++) = et_getdigit(&realvalue,&nsd);
}
}
/* The decimal point */
if( flag_dp ){
*(bufpt++) = '.';
}
/* "0" digits after the decimal point but before the first
** significant digit of the number */
for(e2++; e2<0; precision--, e2++){
//比如碰到0.00005,前面补0,还要补偿精度
assert( precision>0 );
*(bufpt++) = '0';
}
/* Significant digits after the decimal point */
while( (precision--)>0 ){
//获取有效数字
*(bufpt++) = et_getdigit(&realvalue,&nsd);
}
/* Remove trailing zeros and the "." if no digits follow the "." */
if( flag_rtz && flag_dp ){
//如果末尾出现0则将其移除
while( bufpt[-1]=='0' ) *(--bufpt) = 0;
assert( bufpt>zOut );
if( bufpt[-1]=='.' ){
if( flag_altform2 ){
*(bufpt++) = '0';
}else{
*(--bufpt) = 0;
}
}
}
/* Add the "eNNN" suffix */
if( xtype==etEXP ){
// aDigits[infop->charset]是e
*(bufpt++) = aDigits[infop->charset];
if( exp<0 ){
*(bufpt++) = '-'; exp = -exp;
}else{
*(bufpt++) = '+';
}
if( exp>=100 ){
*(bufpt++) = (char)((exp/100)+'0'); /* 100's digit */
exp %= 100;
}
*(bufpt++) = (char)(exp/10+'0'); /* 10's digit */
*(bufpt++) = (char)(exp%10+'0'); /* 1's digit */
}
*bufpt = 0;
/* The converted number is in buf[] and zero terminated. Output it.
** Note that the number is in the usual order, not reversed as with
** integer conversions. */
length = (int)(bufpt-zOut);
bufpt = zOut;
/* Special case: Add leading zeros if the flag_zeropad flag is
** set and we are not left justified */
if( flag_zeropad && !flag_leftjustify && length < width){
/*如果是%0*.*f的形式,宽度不够在左边补0
如果是%-0*.*f的形式,表示左对齐,不能补0
*/
int i;
int nPad = width - length;
for(i=width; i>=nPad; i--){
bufpt[i] = bufpt[i-nPad];
}
i = prefix!=0;//在前缀后面补0
while( nPad-- ) bufpt[i++] = '0';
length = width;
}
#endif /* !defined(SQLITE_OMIT_FLOATING_POINT) */
break;
/*其他类型的解析*/
……
} /* End switch over the format type */
/*
** The text of the conversion is pointed to by "bufpt" and is
** "length" characters long. The field width is "width". Do
** the output.
*/
//实际长度不足width,那么用空格填补
width -= length;
if( width>0 ){
if( !flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
sqlite3StrAccumAppend(pAccum, bufpt, length);
if( flag_leftjustify ) sqlite3AppendChar(pAccum, width, ' ');
}else{
sqlite3StrAccumAppend(pAccum, bufpt, length);
}
if( zExtra ){
sqlite3DbFree(pAccum->db, zExtra);
zExtra = 0;
}
} /* End for loop over the format string */
} /* End of function */