scanf源码分析

7 篇文章 1 订阅

本文分析的是glibc2.31中的scanf相关源码,目的不是研究scanf的算法,而是说明scanf在IO attack中的利用方法,属于CTF的范畴

scanf.c

其实就是对__vscanf_internal的封装

__scanf (const char *format, ...)
{
  va_list arg;
  int done;

  va_start (arg, format);
  done = __vfscanf_internal (stdin, format, arg, 0);  //实际是对__vfscanf_internal的封装,arg指向可变参数
  va_end (arg);

  return done;
}
ldbl_strong_alias (__scanf, scanf)

vscanf-internal.c

  • 函数原型
int __vfscanf_internal(FILE *s, const char *format, va_list argptr, unsigned int mode_flags)
  • 原理:
    • 在vfscanf中有两个流:一个是指导读入格式的字符串流format,一个是被读入的流s
    • vfscanf先逐字符解析format,解析出一个要读入的格式后,就会调用inchar()从流s中逐字符读入
    • 例如format="%d%",输入"13\n"
      • vfcanf解析出%d后指导要从流s中读入一个十进制整数,于是调用inchar()得到字符'1',继续调用inchar()得到'2',再次调用inchar()得到字符'\n',vfscanf就知道一个整数已经结束了,就把'\n'放回流s中,然后把读入的"12"转化为int 12
      • 其余的同理
  • 总结:vfscanf与流的接口如下,搞CTF不需要了解这个解析算法是怎么实现的,因此下面专注于流相关调用过程
//向流s中放回一个字符c
#define ungetc(c, s) ((void)((int)c == EOF || (--read_in, \
											   _IO_sputbackc(s, (unsigned char)c))))
#define ungetc_not_eof(c, s) ((void)(--read_in, \
									 _IO_sputbackc(s, (unsigned char)c)))
//从流s中取出一个字符:c=_IO_getc_unlocked(s); ++read_in;
#define inchar() (c == EOF ? ((errno = inchar_errno), EOF)                  \
						   : ((c = _IO_getc_unlocked(s)),                   \
							  (void)(c != EOF                               \
										 ? ++read_in                        \
										 : (size_t)(inchar_errno = errno)), \
							  c))
  • inchar()函数通过_IO_getc_unlocked(s)从流中抽取字符

_IO_getc_unlocked(s)

#define _IO_getc_unlocked(_fp) __getc_unlocked_body (_fp)

__getc_unlocked_body()

  • 当流的read缓冲区不足时,就会调用__uflow(_fp)
#define __getc_unlocked_body(_fp)					\
  (__glibc_unlikely ((_fp)->_IO_read_ptr >= (_fp)->_IO_read_end)	\
   ? __uflow (_fp) : *(unsigned char *) (_fp)->_IO_read_ptr++)

__uflow()

  • 最终会转向stdin的虚表中的下溢函数
    int __uflow(FILE *fp)//流下溢,read不足
{
  if (_IO_vtable_offset(fp) == 0 && _IO_fwide(fp, -1) != -1)
    return EOF;

  if (fp->_mode == 0)
    _IO_fwide(fp, -1);

  if (_IO_in_put_mode(fp))  //如果是put模式,就转换为read模式
    if (_IO_switch_to_get_mode(fp) == EOF)
      return EOF;

  if (fp->_IO_read_ptr < fp->_IO_read_end) //如果read缓冲区中还有剩余
    return *(unsigned char *)fp->_IO_read_ptr++;

  if (_IO_in_backup(fp))
  {
    _IO_switch_to_main_get_area(fp);
    if (fp->_IO_read_ptr < fp->_IO_read_end)
      return *(unsigned char *)fp->_IO_read_ptr++;
  }

  if (_IO_have_markers(fp))
  {
    if (save_for_backup(fp, fp->_IO_read_end))
      return EOF;
  }
  else if (_IO_have_backup(fp))
    _IO_free_backup_area(fp);

  return _IO_UFLOW(fp); //最终调用fp虚表中的
}
libc_hidden_def(__uflow)

_IO_file_jumps

  • stdin的默认虚表为_IO_file_jumps,uflow表项为_IO_default_uflow()
const struct _IO_jump_t _IO_file_jumps libio_vtable =
{
  JUMP_INIT_DUMMY,
  JUMP_INIT(finish, _IO_file_finish),
  JUMP_INIT(overflow, _IO_file_overflow),
  JUMP_INIT(underflow, _IO_file_underflow),
  JUMP_INIT(uflow, _IO_default_uflow),
  JUMP_INIT(pbackfail, _IO_default_pbackfail),
  JUMP_INIT(xsputn, _IO_file_xsputn),
  JUMP_INIT(xsgetn, _IO_file_xsgetn),
  JUMP_INIT(seekoff, _IO_new_file_seekoff),
  JUMP_INIT(seekpos, _IO_default_seekpos),
  JUMP_INIT(setbuf, _IO_new_file_setbuf),
  JUMP_INIT(sync, _IO_new_file_sync),
  JUMP_INIT(doallocate, _IO_file_doallocate),
  JUMP_INIT(read, _IO_file_read),
  JUMP_INIT(write, _IO_new_file_write),
  JUMP_INIT(seek, _IO_file_seek),
  JUMP_INIT(close, _IO_file_close),
  JUMP_INIT(stat, _IO_file_stat),
  JUMP_INIT(showmanyc, _IO_default_showmanyc),
  JUMP_INIT(imbue, _IO_default_imbue)
};
libc_hidden_data_def (_IO_file_jumps)

_IO_default_uflow()

  • 其实也就是对虚表中的underflow函数的封装,因此进入_IO_file_underflow()
int _IO_default_uflow(FILE *fp)
{
  int ch = _IO_UNDERFLOW(fp);
  if (ch == EOF)
    return EOF;
  return *(unsigned char *)fp->_IO_read_ptr++;
}
libc_hidden_def(_IO_default_uflow)

_IO_file_underflow()

  • 这个函数是_IO_new_file_underflow()的别名
  • 这个函数也就是本次旅程的终点了,最终通过系统调用read来读入
int _IO_new_file_underflow(FILE *fp) //stdin发生下溢时调用的函数
{
  ssize_t count;

  /* C99 requires EOF to be "sticky".  */
  if (fp->_flags & _IO_EOF_SEEN)
    return EOF;

  if (fp->_flags & _IO_NO_READS)  //如果不可读,那么就报错
  {
    fp->_flags |= _IO_ERR_SEEN;
    __set_errno(EBADF);
    return EOF;
  }

  if (fp->_IO_read_ptr < fp->_IO_read_end) //如果read缓冲区还有剩余,那么就从中获取字符
    return *(unsigned char *)fp->_IO_read_ptr;

  if (fp->_IO_buf_base == NULL) //如果还没有缓冲区,那么就分配一个
  {
    /* Maybe we already have a push back pointer.  */
    if (fp->_IO_save_base != NULL)
    {
      free(fp->_IO_save_base);
      fp->_flags &= ~_IO_IN_BACKUP;
    }
    _IO_doallocbuf(fp);
  }

  /* FIXME This can/should be moved to genops ?? */
  if (fp->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED)) //针对行缓冲的特殊操作,不用管
  {
    /* We used to flush all line-buffered stream.  This really isn't
	 required by any standard.  My recollection is that
	 traditional Unix systems did this for stdout.  stderr better
	 not be line buffered.  So we do just that here
	 explicitly.  --drepper */
    _IO_acquire_lock(stdout);

    if ((stdout->_flags & (_IO_LINKED | _IO_NO_WRITES | _IO_LINE_BUF)) == (_IO_LINKED | _IO_LINE_BUF))//如果是linked、line_buf、并且可写,那么就刷新流
      _IO_OVERFLOW(stdout, EOF);

    _IO_release_lock(stdout);
  }

  _IO_switch_to_get_mode(fp); //对于三个标准流,不存在put与get模式切换的问题

  /* This is very tricky. We have to adjust those
     pointers before we call _IO_SYSREAD () since
     we may longjump () out while waiting for
     input. Those pointers may be screwed up. H.J.*/

  //根据_IO_buf_base设置空的read与空的write缓冲区
  fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
  fp->_IO_read_end = fp->_IO_buf_base;
  fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = fp->_IO_buf_base;

  //调用read函数,向_IO_buf_base中读入字符,如果可以控制_IO_buf_base就可以进行任意写
  count = _IO_SYSREAD(fp, fp->_IO_buf_base, //虚表中的__read函数为_IO_file_read
                      fp->_IO_buf_end - fp->_IO_buf_base);

  if (count <= 0) //读入失败
  {
    if (count == 0)
      fp->_flags |= _IO_EOF_SEEN;
    else
      fp->_flags |= _IO_ERR_SEEN, count = 0;
  }

  fp->_IO_read_end += count;    //设置read缓冲区
  if (count == 0)
  {
    /* If a stream is read to EOF, the calling application may switch active
	 handles.  As a result, our offset cache would no longer be valid, so
	 unset it.  */
    fp->_offset = _IO_pos_BAD;
    return EOF;
  }
  if (fp->_offset != _IO_pos_BAD)
    _IO_pos_adjust(fp->_offset, count);
  return *(unsigned char *)fp->_IO_read_ptr;
}
libc_hidden_ver(_IO_new_file_underflow, _IO_file_underflow)

总结

  • 要向通过scanf实现任意写,需要控制_IO_buf_base,并且read缓冲区为空,这样才会触发_IO_new_file_underflow()函数,进而调用read(fd, _IO_buf_base, len)
  • 如果scanf从流中读入失败,那么他会把读入的字符放回流中,当输入不合法时scanf就会死循环,读不出数据,因此如果题目需要通过scanf实现任意写,那么多会在scanf后面加上一个getchar()来帮助我们清空read缓冲区
  • 5
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值