C语言printf函数实现解读

WKYFP

已于 2022-11-10 16:21:13 修改

阅读量3.7k

点赞数 3

分类专栏： C语言文章标签： c语言开发语言后端开源

于 2022-11-10 16:16:01 首次发布

本文链接：https://blog.csdn.net/WKYFP/article/details/127768447

版权

C语言专栏收录该内容

1 篇文章 1 订阅

订阅专栏

C语言printf函数实现解读

1 源码下载

gun官网链接
在这里插入图片描述

在这里插入图片描述

按照这些步骤可以顺利的下载gnu的c语言源码，接下去我们看看printf函数。

2 printf函数源码

用vscode打开下载的源码，找到printf函数。
（ printf 函数路径为：glibc-2.36\stdio-common\printf.c )
在这里插入图片描述
源码如下：

int
__printf (const char *format, ...)
{
  va_list arg;
  int done;

  va_start (arg, format);
  done = __vfprintf_internal (stdout, format, arg, 0);
  va_end (arg);

  return done;
}

#undef _IO_printf
ldbl_strong_alias (__printf, printf);
ldbl_strong_alias (__printf, _IO_printf);

可以看见主要是四个东西：va_list va_start va_end __vfprintf_internal
前面三个先不看，后面重点介绍，先看看能不能看懂__vfprintf_internal 这个函数的实现。

在这里插入图片描述

在 glibc-2.36\stdio-common\vfprintf-internal.c 里可以看到这个函数实际上就是vfprintf函数，也在相同的.c文件中

在这里插入图片描述

源码如下：

int
vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags)
{
  /* The character used as thousands separator.  */
  THOUSANDS_SEP_T thousands_sep = 0;

  /* The string describing the size of groups of digits.  */
  const char *grouping;

  /* Place to accumulate the result.  */
  int done;

  /* Current character in format string.  */
  const UCHAR_T *f;

  /* End of leading constant string.  */
  const UCHAR_T *lead_str_end;

  /* Points to next format specifier.  */
  const UCHAR_T *end_of_spec;

  /* Buffer intermediate results.  */
  CHAR_T work_buffer[WORK_BUFFER_SIZE];
  CHAR_T *workend;

  /* We have to save the original argument pointer.  */
  va_list ap_save;

  /* Count number of specifiers we already processed.  */
  int nspecs_done;

  /* For the %m format we may need the current `errno' value.  */
  int save_errno = errno;

  /* 1 if format is in read-only memory, -1 if it is in writable memory,
     0 if unknown.  */
  int readonly_format = 0;

  /* Orient the stream.  */
#ifdef ORIENT
  ORIENT;
#endif

  /* Sanity check of arguments.  */
  ARGCHECK (s, format);

#ifdef ORIENT
  /* Check for correct orientation.  */
  if (_IO_vtable_offset (s) == 0
      && _IO_fwide (s, sizeof (CHAR_T) == 1 ? -1 : 1)
      != (sizeof (CHAR_T) == 1 ? -1 : 1))
    /* The stream is already oriented otherwise.  */
    return EOF;
#endif

  if (UNBUFFERED_P (s))
    /* Use a helper function which will allocate a local temporary buffer
       for the stream and then call us again.  */
    return buffered_vfprintf (s, format, ap, mode_flags);

  /* Initialize local variables.  */
  done = 0;
  grouping = (const char *) -1;
#ifdef __va_copy
  /* This macro will be available soon in gcc's <stdarg.h>.  We need it
     since on some systems `va_list' is not an integral type.  */
  __va_copy (ap_save, ap);
#else
  ap_save = ap;
#endif
  nspecs_done = 0;

#ifdef COMPILE_WPRINTF
  /* Find the first format specifier.  */
  f = lead_str_end = __find_specwc ((const UCHAR_T *) format);
#else
  /* Find the first format specifier.  */
  f = lead_str_end = __find_specmb ((const UCHAR_T *) format);
#endif

  /* Lock stream.  */
  _IO_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, s);
  _IO_flockfile (s);

  /* Write the literal text before the first format.  */
  outstring ((const UCHAR_T *) format,
	     lead_str_end - (const UCHAR_T *) format);

  /* If we only have to print a simple string, return now.  */
  if (*f == L_('\0'))
    goto all_done;

  /* Use the slow path in case any printf handler is registered.  */
  if (__glibc_unlikely (__printf_function_table != NULL
			|| __printf_modifier_table != NULL
			|| __printf_va_arg_table != NULL))
    goto do_positional;

  /* Process whole format string.  */
  do
    {
      STEP0_3_TABLE;
      STEP4_TABLE;

      int is_negative;	/* Flag for negative number.  */
      union
      {
	unsigned long long int longlong;
	unsigned long int word;
      } number;
      int base;
      union printf_arg the_arg;
      CHAR_T *string;	/* Pointer to argument string.  */
      int alt = 0;	/* Alternate format.  */
      int space = 0;	/* Use space prefix if no sign is needed.  */
      int left = 0;	/* Left-justify output.  */
      int showsign = 0;	/* Always begin with plus or minus sign.  */
      int group = 0;	/* Print numbers according grouping rules.  */
      /* Argument is long double/long long int.  Only used if
	 double/long double or long int/long long int are distinct.  */
      int is_long_double __attribute__ ((unused)) = 0;
      int is_short = 0;	/* Argument is short int.  */
      int is_long = 0;	/* Argument is long int.  */
      int is_char = 0;	/* Argument is promoted (unsigned) char.  */
      int width = 0;	/* Width of output; 0 means none specified.  */
      int prec = -1;	/* Precision of output; -1 means none specified.  */
      /* This flag is set by the 'I' modifier and selects the use of the
	 `outdigits' as determined by the current locale.  */
      int use_outdigits = 0;
      UCHAR_T pad = L_(' ');/* Padding character.  */
      CHAR_T spec;

      workend = work_buffer + WORK_BUFFER_SIZE;

      /* Get current character in format string.  */
      JUMP (*++f, step0_jumps);

      /* ' ' flag.  */
    LABEL (flag_space):
      space = 1;
      JUMP (*++f, step0_jumps);

      /* '+' flag.  */
    LABEL (flag_plus):
      showsign = 1;
      JUMP (*++f, step0_jumps);

      /* The '-' flag.  */
    LABEL (flag_minus):
      left = 1;
      pad = L_(' ');
      JUMP (*++f, step0_jumps);

      /* The '#' flag.  */
    LABEL (flag_hash):
      alt = 1;
      JUMP (*++f, step0_jumps);

      /* The '0' flag.  */
    LABEL (flag_zero):
      if (!left)
	pad = L_('0');
      JUMP (*++f, step0_jumps);

      /* The '\'' flag.  */
    LABEL (flag_quote):
      group = 1;

      if (grouping == (const char *) -1)
	{
#ifdef COMPILE_WPRINTF
	  thousands_sep = _NL_CURRENT_WORD (LC_NUMERIC,
					    _NL_NUMERIC_THOUSANDS_SEP_WC);
#else
	  thousands_sep = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
#endif

	  grouping = _NL_CURRENT (LC_NUMERIC, GROUPING);
	  if (*grouping == '\0' || *grouping == CHAR_MAX
#ifdef COMPILE_WPRINTF
	      || thousands_sep == L'\0'
#else
	      || *thousands_sep == '\0'
#endif
	      )
	    grouping = NULL;
	}
      JUMP (*++f, step0_jumps);

    LABEL (flag_i18n):
      use_outdigits = 1;
      JUMP (*++f, step0_jumps);

      /* Get width from argument.  */
    LABEL (width_asterics):
      {
	const UCHAR_T *tmp;	/* Temporary value.  */

	tmp = ++f;
	if (ISDIGIT (*tmp))
	  {
	    int pos = read_int (&tmp);

	    if (pos == -1)
	      {
		__set_errno (EOVERFLOW);
		done = -1;
		goto all_done;
	      }

	    if (pos && *tmp == L_('$'))
	      /* The width comes from a positional parameter.  */
	      goto do_positional;
	  }
	width = va_arg (ap, int);

	/* Negative width means left justified.  */
	if (width < 0)
	  {
	    width = -width;
	    pad = L_(' ');
	    left = 1;
	  }
      }
      JUMP (*f, step1_jumps);

      /* Given width in format string.  */
    LABEL (width):
      width = read_int (&f);

      if (__glibc_unlikely (width == -1))
	{
	  __set_errno (EOVERFLOW);
	  done = -1;
	  goto all_done;
	}

      if (*f == L_('$'))
	/* Oh, oh.  The argument comes from a positional parameter.  */
	goto do_positional;
      JUMP (*f, step1_jumps);

    LABEL (precision):
      ++f;
      if (*f == L_('*'))
	{
	  const UCHAR_T *tmp;	/* Temporary value.  */

	  tmp = ++f;
	  if (ISDIGIT (*tmp))
	    {
	      int pos = read_int (&tmp);

	      if (pos == -1)
		{
		  __set_errno (EOVERFLOW);
		  done = -1;
		  goto all_done;
		}

	      if (pos && *tmp == L_('$'))
		/* The precision comes from a positional parameter.  */
		goto do_positional;
	    }
	  prec = va_arg (ap, int);

	  /* If the precision is negative the precision is omitted.  */
	  if (prec < 0)
	    prec = -1;
	}
      else if (ISDIGIT (*f))
	{
	  prec = read_int (&f);

	  /* The precision was specified in this case as an extremely
	     large positive value.  */
	  if (prec == -1)
	    {
	      __set_errno (EOVERFLOW);
	      done = -1;
	      goto all_done;
	    }
	}
      else
	prec = 0;
      JUMP (*f, step2_jumps);

      /* Process 'h' modifier.  There might another 'h' following.  */
    LABEL (mod_half):
      is_short = 1;
      JUMP (*++f, step3a_jumps);

      /* Process 'hh' modifier.  */
    LABEL (mod_halfhalf):
      is_short = 0;
      is_char = 1;
      JUMP (*++f, step4_jumps);

      /* Process 'l' modifier.  There might another 'l' following.  */
    LABEL (mod_long):
      is_long = 1;
      JUMP (*++f, step3b_jumps);

      /* Process 'L', 'q', or 'll' modifier.  No other modifier is
	 allowed to follow.  */
    LABEL (mod_longlong):
      is_long_double = 1;
      is_long = 1;
      JUMP (*++f, step4_jumps);

    LABEL (mod_size_t):
      is_long_double = sizeof (size_t) > sizeof (unsigned long int);
      is_long = sizeof (size_t) > sizeof (unsigned int);
      JUMP (*++f, step4_jumps);

    LABEL (mod_ptrdiff_t):
      is_long_double = sizeof (ptrdiff_t) > sizeof (unsigned long int);
      is_long = sizeof (ptrdiff_t) > sizeof (unsigned int);
      JUMP (*++f, step4_jumps);

    LABEL (mod_intmax_t):
      is_long_double = sizeof (intmax_t) > sizeof (unsigned long int);
      is_long = sizeof (intmax_t) > sizeof (unsigned int);
      JUMP (*++f, step4_jumps);

      /* Process current format.  */
      while (1)
	{
#define process_arg_int() va_arg (ap, int)
#define process_arg_long_int() va_arg (ap, long int)
#define process_arg_long_long_int() va_arg (ap, long long int)
#define process_arg_pointer() va_arg (ap, void *)
#define process_arg_string() va_arg (ap, const char *)
#define process_arg_unsigned_int() va_arg (ap, unsigned int)
#define process_arg_unsigned_long_int() va_arg (ap, unsigned long int)
#define process_arg_unsigned_long_long_int() va_arg (ap, unsigned long long int)
#define process_arg_wchar_t() va_arg (ap, wchar_t)
#define process_arg_wstring() va_arg (ap, const wchar_t *)
#include "vfprintf-process-arg.c"
#undef process_arg_int
#undef process_arg_long_int
#undef process_arg_long_long_int
#undef process_arg_pointer
#undef process_arg_string
#undef process_arg_unsigned_int
#undef process_arg_unsigned_long_int
#undef process_arg_unsigned_long_long_int
#undef process_arg_wchar_t
#undef process_arg_wstring

	LABEL (form_float):
	LABEL (form_floathex):
	  {
	    if (__glibc_unlikely ((mode_flags & PRINTF_LDBL_IS_DBL) != 0))
	      is_long_double = 0;

	    struct printf_info info =
	      {
		.prec = prec,
		.width = width,
		.spec = spec,
		.is_long_double = is_long_double,
		.is_short = is_short,
		.is_long = is_long,
		.alt = alt,
		.space = space,
		.left = left,
		.showsign = showsign,
		.group = group,
		.pad = pad,
		.extra = 0,
		.i18n = use_outdigits,
		.wide = sizeof (CHAR_T) != 1,
		.is_binary128 = 0
	      };

	    PARSE_FLOAT_VA_ARG_EXTENDED (info);
	    const void *ptr = &the_arg;

	    int function_done = __printf_fp_spec (s, &info, &ptr);
	    if (function_done < 0)
	      {
		done = -1;
		goto all_done;
	      }
	    done_add (function_done);
	  }
	  break;

	LABEL (form_unknown):
	  if (spec == L_('\0'))
	    {
	      /* The format string ended before the specifier is complete.  */
	      __set_errno (EINVAL);
	      done = -1;
	      goto all_done;
	    }

	  /* If we are in the fast loop force entering the complicated
	     one.  */
	  goto do_positional;
	}

      /* The format is correctly handled.  */
      ++nspecs_done;

      /* Look for next format specifier.  */
#ifdef COMPILE_WPRINTF
      f = __find_specwc ((end_of_spec = ++f));
#else
      f = __find_specmb ((end_of_spec = ++f));
#endif

      /* Write the following constant string.  */
      outstring (end_of_spec, f - end_of_spec);
    }
  while (*f != L_('\0'));

  /* Unlock stream and return.  */
  goto all_done;

  /* Hand off processing for positional parameters.  */
do_positional:
  done = printf_positional (s, format, readonly_format, ap, &ap_save,
			    done, nspecs_done, lead_str_end, work_buffer,
			    save_errno, grouping, thousands_sep, mode_flags);

 all_done:
  /* Unlock the stream.  */
  _IO_funlockfile (s);
  _IO_cleanup_region_end (0);

  return done;
}

现在的水平看懂确实有一定的难度，于是就不看了。

3 几个宏及解读

前面提到了有几个其他的东西，现在我们来看看是什么

va_list

在vs2017下的定义是这样的：（\Community\VC\Tools\MSVC\14.16.27023\include\vadefs.h）


#ifndef _VA_LIST_DEFINED
    #define _VA_LIST_DEFINED
    #ifdef _M_CEE_PURE
        typedef System::ArgIterator va_list;
    #else
        typedef char* va_list;
    #endif
#endif

可以看出大部分情况下他就是 char * 类型，再看看百度的解释
在这里插入图片描述

va_start

在这里插入图片描述
这是 vs2017下x64的定义，看不了他的实现，那么就总结一下其他资料的解释。

这个宏的作用是：得到一个指向第一个可变参数的指针，也就是“”包裹的字符串后面的第一个参数。

看看这个宏的声明：

void va_start(va_list ap, last_arg);

ap: 这个参数的类型是之前提过的va_list,也就是那个指向可变参数列表的指针。
last_arg: 是最后一个固定参数，在printf函数中就是那个“”包裹的字符串。
在介绍他的实现之前，我们需要先补充三个知识：

函数传参进栈的顺序
栈区在计算机内部的地址情况
_INTSIZEOF 宏

函数参数进栈顺序

先说第一点，函数参数进栈的顺序
从右往左，依次进栈

  func(int a, int b, int c, int d);

如果是这个函数的话，那么应该是d-> c -> b -> a 这是要了解的第一点。

  void func(char *fmt, ...);

fmt肯定是在这一系列参数中最低的地址部分。

栈区在计算机内部的地址情况

来看这张经典的图：

在这里插入图片描述

栈底是高地址，栈顶是底地址，也就是说先进栈的会是高地址。之前那个例子中，地址从高到低依次也是 d ， c， b， a。
在可变参数的函数中，大概就是这种情况：

|——————————————————————————|
|最后一个可变参数 | ----------------高内存地址处
|——————————————————————————|
…
|——————————————————————————|
|第N个可变参数 | -----------va_arg(arg_ptr,int)后arg_ptr所指的地方,
| | 即第N个可变参数的地址。
|——————————————— |
………………………….
|——————————————————————————|
|第一个可变参数 | ------------ va_start(arg_ptr,start)后arg_ptr所指的地方
| | 即第一个可变参数的地址
|——————————————— |
|——————————————————————————|
| |
|最后一个固定参数 | ------------- start的起始地址
|—————————————— —| …
|—————————————————————————— |
| |
|——————————————— |-> 低内存地址处

_INTSIZEOF 宏

宏的定义：

#define _INTSIZEOF(n)  ((sizeof(n)+sizeof(int)-1)&~(sizeof(int) - 1) )

这个宏的作用是把sizeof(n) 向上取整作为 sizeof(int)的整数倍，用以在内存中对齐。
看过实现之后我觉得这个宏写的当精彩。

对于两个正整数 x, n 总存在整数 q, r 使得

x = nq + r, 其中 0<= r <n //最小非负剩余

q, r 是唯一确定的。q = [x/n], r = x - n[x/n]. 这个是带余除法的一个简单形式。在 c 语言中， q, r 容易计算出来： q = x/n, r = x % n.

所谓把 x 按 n 对齐指的是：若 r=0, 取 qn, 若 r>0, 取 (q+1)n. 这也相当于把 x 表示为：

x = nq + r’, 其中 -n < r’ <=0 //最大非正剩余

nq 是我们所求。关键是如何用 c 语言计算它。由于我们能处理标准的带余除法，所以可以把这个式子转换成一个标准的带余除法，然后加以处理：

x+n = qn + (n+r’)，其中 0<n+r’<=n //最大正剩余

x+n-1 = qn + (n+r’-1), 其中 0<= n+r’-1 <n //最小非负剩余

所以 qn = [(x+n-1)/n]n. 用 c 语言计算就是：

((x+n-1)/n)*n

若 n 是 2 的方幂, 比如 2^m，则除为右移 m 位，乘为左移 m 位。所以把 x+n-1 的最低 m 个二进制位清 0就可以了。得到：

(x+n-1) & (~(n-1))

先试着理解上面的，如果上面那些步骤没看懂没关系。我们这样想，要把一个整数表达成另一个小的整数的整数倍，那么这个整数本身肯定只有两种情况：1，他可以被这个小整数整除，也就是正好在小整数划分区间的端点上。2，不可被整除，那就是落在某个区间上。我们要做的就是找到这个数的区间端点。
在这里插入图片描述
第一种情况： x正好是某个区间的端点，那么x+n-1落在他本身的区间，c语言中的/是向下取整的，除以n刚好就是某个端点值。
第二种情况，x在某个区间中，x+n必然在他下一个区间上。那么x+n-1，又有两种情况，一种是和x+n在同一个区间（x的下一个区间）上，第二种是在x下一个区间的左端点上，不可能和x在同一个区间。那么不管是哪一种情况x+n-1 进行c语言的向下整除操作后都会落在x的右端点上，就达到了向上取整的目的。

最后解释一下(x+n-1) & (~(n-1))的含义，要执行x+n-1 除以n的操作，相当于向右移动。那么我们有这个结论 若 n 是 2 的方幂, 比如 2^m，则除为右移 m 位，乘为左移 m 位。所以把 x+n-1 的最低 m 个二进制位清 0就可以了。而n若为2^m的话，二进制下就是1后接m个0，那么对n-1取反就是得到了m个0，在进行&操作就行了。

回到va_start宏：

#define va_start(ap,v)( ap = (va_list)&v + _INTSIZEOF(v) )   // 得到第一个可变参数的地址

结合前面那张栈的图，我们可以知道，最后一个固定参数的地址在第一个可变参数的地址下方，在给出固定参数的地址后，加上固定参数本身占用的内存后，得到了第一个可变参数的起始地址。

注意：宏va_start是对参数的地址进行操作的，要求参数地址必须是有效的。一些地址无效的类型不能当作固定参数类型。比如：寄存器类型，它的地址不是有效的内存地址值；数组和函数也不允许，他们的长度是个问题。因此，这些类型时不能作为va函数的参数的。

va_arg

先看宏的定义：

#define va_arg(ap,t) ( *(t *)((ap += _INTSIZEOF(t)) - _INTSIZEOF(t)) )

这个宏主要做了两件事情：
1、指针ap指向下一个参数的地址
2、强制类型转换后得到用户所指定的值

我们可以拆开来看这句话
将( *(t *)((ap += _INTSIZEOF(t)) - _INTSIZEOF(t)) ) /* 指针ap指向下一个参数的地址 */ 拆成：

1． ap += _INTSIZEOF(t)；            // 当前，ap已经指向下一个参数了

2． return *(t *)( ap - _INTSIZEOF(t))
/* ap减去当前参数的大小得到当前参数的地址，再强制类型转换后返回它的值 */

用这个宏就进行可变参数的遍历操作，达到智能输出的效果了。

va_end

   #define va_end(ap) ( ap = (va_list)0 )

这个宏很简单，就是将指针置空，而这个空间也不是在堆上的，也不用free了。

x86平台定义为ap=(char*)0;使ap不再> 指向堆栈,而是跟NULL一样.有些直接定义为((void*)0),这样编译器不会为va_end产生代码,例如gcc在linux的x86平台就是这样定义的.

4 自己实现的可变参数函数

#include<stdio.h>
#include<stdarg.h>
int k = -1, t = 0;
int it;
char *ct = NULL;
char cc = '0';
double dd = 0;
int it2 = 0;
int i = 0;
char tt[30];

void f(int it) {
  while(it) {
    t = it % 10;
    tt[++k] = t + '0';
    it /= 10;
  }
}
void myprintf(char const* fmt, ...) {
  char const *p;
  va_list aq;
  va_start(aq, fmt);
  p = fmt;
  while(*p != '\0')  {
    if(*p != '%') {
      putchar(*p);
      p++;
      continue;
    }
    switch(*++p) {
      case 'd':
        it = va_arg(aq, int);
        if(it < 0) {
          putchar('-');
          it = -it;
        }
        f(it);
        for( ; k >= 0; k--) {
          putchar(tt[k]);
        }

        break;
      case 's':
        ct = va_arg(aq, char *);
        for( ; *ct; ct++) {
          putchar(*ct);
        }
        break;
      case 'c':
        cc = va_arg(aq, int);
        putchar(cc);
        break;
      case 'f':
        dd = va_arg(aq, double);
        if(dd < 0){
          putchar('-');
          dd = -dd; 
        } 
        
        it = (int)dd;
        it2 = it;
        dd = dd - it;
        dd *= 1000000;
        it = (int)dd;
        
        for(i = k+1;i <= k+6 ;i++ ) {
          tt[i] = '0';
        }   
        
        f(it);
        
        tt[k = i] = '.'; 
        if(it2 == 0){
          tt[++k]  = '0';
        }  
        
        f(it2);
        
        for( ; k >= 0; k--) {
          putchar(tt[k]);
        }
        
        break;
    }  
    p++;
  }
  va_end(aq);
}
int main() {
  
  myprintf("??%d, %s, >> %c, %d, %s , %f, %f", 10, "kkk", '&', 999, "aaaaa", 1234.15648777, 0.0012);
  
  //myprintf("??%f", 0.0012);

  return 0;
}



//   ??10, kkk, >> &, 999, aaaaa , 1234.156487, 0.001200