[总结型]C++stl中常见数据结构逆向分析

最新推荐文章于 2024-10-18 12:27:51 发布

easylyou

最新推荐文章于 2024-10-18 12:27:51 发布

阅读量1.6k

点赞数 1

分类专栏：二进制安全文章标签： c++ stl

本文链接：https://blog.csdn.net/easy_level1/article/details/115771814

版权

二进制安全专栏收录该内容

7 篇文章

订阅专栏

文章目录

前言
std::vector
std::shared_ptr
std::string
- 内存布局

前言

更新时间2021.12.1
以一道题说明重要性。defcon2019q-election_coin，是2019年defcon的资格赛。election_coin一题，标签为reverse&pwn，当年有8支队伍做出来，分数最后为250+，属于中等偏难的题目。虽然是pwn，但妥妥的是一道reverse题。作者是用c++编写的一个网络应用，并且最后给出的二进制是stripped掉的。
题目资料: github
别人的wp: github

题目资料中有题目的源码，分析源码很容易看出其中的bug，有一个任意读的bug，有一个任意写的bug。所以最后exp脚本非常的简单，这题的难点还是在如何逆向上。下面截取题目的源码:

auto debug = headers.tryGet<ElectionDebugHeader>();
if (!debug) {
    auto addr = reinterpret_cast<const uint64_t*>(auth_token->token_.data() + auth_token->token_.size() - 8);
    std::stringstream buffer;
    
    //任意地址读 addr可控
    buffer << *reinterpret_cast<const uint64_t*>(addr);
    response.send(Http::Code::Multiple_Choices, buffer.str());
    return;
}

std::stringstream buffer;
buffer << "converted " << amount << " dogecoin (" << std::hex << target_value << ")";

//任意地址写 target_addr target_value 可控
*reinterpret_cast<uint64_t*>(target_addr) = target_value;
return buffer.str();

题目是reverse，非常困难，IDA分析的反C代码的main函数，极其复杂，这就要求选手(黑客)需要非常熟悉c++编译后的binary，熟悉c++常见的库与模板函数。

Ubuntu中 c++头文件目录/usr/include/c++/...，实现一般是在.../bits/stl_xxx。
例如我本机的vector实现目录在/usr/include/c++/7.5.0/bits/stl_vector.h
以下是gcc的实现，全部是-O0编译。

std::vector

/usr/include/c++/7.5.0/bits/stl_vector.h

内存布局

class vector {
protected:
	//三个成员变量由 _Vector_base 继承而来
	// sizeof(std::vector<A>) == 24;
	pointer _M_start;//使用的空间头部
	pointer _M_finish;//使用的空间尾部
	pointer _M_end_of_storage; //可用空间尾部
}

下图来自《stl源码剖析》
在这里插入图片描述

构造与析构

//构造函数
void __cdecl std::vector<A>::vector(std::vector<A> *const this)
{
  std::_Vector_base<A>::_Vector_base(this);
}
void __cdecl std::_Vector_base<A>::_Vector_base(std::_Vector_base<A> *const this)
{
  std::_Vector_base<A>::_Vector_impl::_Vector_impl(&this->_M_impl);
}
void __cdecl std::_Vector_base<A>::_Vector_impl::_Vector_impl(std::_Vector_base<A>::_Vector_impl *const this)
{
  std::allocator<A>::allocator((std::allocator<A> *const)this);
  this->_M_start = 0LL;
  this->_M_finish = 0LL;
  this->_M_end_of_storage = 0LL;
}

//析构函数
void __cdecl std::vector<A>::~vector(std::vector<A> *const this)
{
  std::allocator<A> *v1; // rax

  v1 = std::_Vector_base<A>::_M_get_Tp_allocator(this);
  std::_Destroy<A *,A>(this->_M_impl._M_start, this->_M_impl._M_finish, v1);//依次调用各个对象的析构函数
  std::_Vector_base<A>::~_Vector_base(this);//回收所有对象占用的内存
}
void __cdecl std::_Vector_base<A>::~_Vector_base(std::_Vector_base<A> *const this)
{
  std::_Vector_base<A>::_M_deallocate(
    this,
    this->_M_impl._M_start,
    this->_M_impl._M_end_of_storage - this->_M_impl._M_start);
  std::_Vector_base<A>::_Vector_impl::~_Vector_impl(&this->_M_impl);
}

//构造函数
__int64 __fastcall sub_AC2(__int64 a1)
{
  return sub_B3E(a1);
}
_QWORD *__fastcall sub_B3E(__int64 a1)
{
  return sub_BDE((_QWORD *)a1);
}
_QWORD *__fastcall sub_BDE(_QWORD *a1)
{
  _QWORD *result; // rax

  sub_C94(a1);
  *a1 = 0LL;
  a1[1] = 0LL;
  result = a1;
  a1[2] = 0LL;
  return result;
}

//析构函数
__int64 __fastcall sub_ADE(_QWORD *a1)
{
  sub_BA6(a1);
  sub_BB4(*a1, a1[1]);
  return sub_B5A(a1);
}
__int64 __fastcall sub_B5A(_QWORD *a1)
{
  sub_C38((__int64)a1, *a1, a1[2] - *a1);
  return sub_B22((__int64)a1);
}

push_back

//source code
void push_back(const value_type &__x)
{
    if (this->_M_impl._M_finish != this->_M_impl._M_end_of_storage)
    {
        _Alloc_traits::construct(this->_M_impl, this->_M_impl._M_finish, __x);
        ++this->_M_impl._M_finish;
    }
    else
        _M_realloc_insert(end(), __x);
}

void __cdecl std::vector<A>::push_back(std::vector<A> *const this, const std::vector<A>::value_type *__x)
{
  A *v2; // rcx

  if ( this->_M_impl._M_finish == this->_M_impl._M_end_of_storage )
  {
    v2 = std::vector<A>::end(this)._M_current;
    
    //此扩容函数内有一个特性字符串 "vector::_M_realloc_insert"
    std::vector<A>::_M_realloc_insert<A const&>(this, (std::vector<A>::iterator)v2, __x); 
  }
  else
  {
    std::allocator_traits<std::allocator<A>>::construct<A,A const&>(
      (std::allocator_traits<std::allocator<A> >::allocator_type *)this, this->_M_impl._M_finish, __x);
    ++this->_M_impl._M_finish;
  }
}

__int64 __fastcall sub_E6A(__int64 a1, __int64 a2)
{
  __int64 result; // rax
  __int64 v3; // rax

  if ( *(_QWORD *)(a1 + 8) == *(_QWORD *)(a1 + 16) )
  {
    v3 = sub_FEE(a1);
    result = sub_103A(a1, v3, a2);
  }
  else
  {
    sub_FB4(a1, *(_QWORD *)(a1 + 8), a2);
    result = a1;
    *(_QWORD *)(a1 + 8) += 8LL; //+=8 是因为vector内部对象的大小为8。不同对象此处不同
  }
  return result;
}

pop_back

//source code
void pop_back() _GLIBCXX_NOEXCEPT
{
      __glibcxx_requires_nonempty();
      --this->_M_impl._M_finish;
      _Alloc_traits::destroy(this->_M_impl, this->_M_impl._M_finish);//调用_M_finish位置对象的析构函数
}

void __cdecl std::vector<A>::pop_back(std::vector<A> *const this)
{
  std::allocator_traits<std::allocator<A>>::destroy<A>(
    (std::allocator_traits<std::allocator<A> >::allocator_type *)this, --this->_M_impl._M_finish);
}

__int64 __fastcall sub_104C(__int64 a1)
{
  *(_QWORD *)(a1 + 8) -= 8LL; //-=8 是与对象大小有关
  return sub_1985(a1, *(_QWORD *)(a1 + 8));
}

emplace_back

//source code
template <typename _Tp, typename _Alloc>
template <typename... _Args>
void vector<_Tp, _Alloc>::
    emplace_back(_Args &&...__args)
{
    if (this->_M_impl._M_finish != this->_M_impl._M_end_of_storage)
    {
        _Alloc_traits::construct(this->_M_impl, this->_M_impl._M_finish, std::forward<_Args>(__args)...);
        ++this->_M_impl._M_finish;
    }
    else
        _M_emplace_back_aux(std::forward<_Args>(__args)...);
}

void __cdecl std::vector<A>::emplace_back<>(std::vector<A> *const this)
{
  A *v1; // rdx

  if ( this->_M_impl._M_finish == this->_M_impl._M_end_of_storage )
  {
    v1 = std::vector<A>::end(this)._M_current;
    std::vector<A>::_M_realloc_insert<>(this, (std::vector<A>::iterator)v1);
  }
  else
  {
    std::allocator_traits<std::allocator<A>>::construct<A>(
      (std::allocator_traits<std::allocator<A> >::allocator_type *)this, this->_M_impl._M_finish);
    ++this->_M_impl._M_finish;
  }
}

__int64 __fastcall sub_F44(__int64 a1)
{
  __int64 result; // rax
  __int64 v2; // rax

  if ( *(_QWORD *)(a1 + 8) == *(_QWORD *)(a1 + 16) )
  {
    v2 = sub_1148(a1);
    result = sub_1436(a1, v2);
  }
  else
  {
    sub_1410(a1, *(_QWORD *)(a1 + 8));
    result = a1;
    *(_QWORD *)(a1 + 8) += 8LL;
  }
  return result;
}

size

std::vector<A>::size_type __cdecl std::vector<A>::size(const std::vector<A> *const this)
{
  return this->_M_impl._M_finish - this->_M_impl._M_start;
}

__int64 __fastcall sub_F1E(_QWORD *a1)
{
  return (__int64)(a1[1] - *a1) >> 3;
}

resize

//source code
void resize(size_type __new_size)
{
      if (__new_size > size())
            _M_default_append(__new_size - size());
      else if (__new_size < size())
            _M_erase_at_end(this->_M_impl._M_start + __new_size);
}

void __cdecl std::vector<A>::resize(std::vector<A> *const this, std::vector<A>::size_type __new_size)
{
  std::vector<A>::size_type v2; // rdx

  if ( __new_size <= std::vector<A>::size(this) )
  {
    if ( __new_size < std::vector<A>::size(this) )
      std::vector<A>::_M_erase_at_end(this, &this->_M_impl._M_start[__new_size]);
  }
  else
  {
    v2 = __new_size - std::vector<A>::size(this);
    std::vector<A>::_M_default_append(this, v2);
  }
}

char __fastcall sub_FB4(_QWORD *a1, unsigned __int64 a2)
{
  __int64 v2; // rax
  char result; // al

  if ( a2 <= sub_F1E(a1) )
  {
    result = a2 < sub_F1E(a1);
    if ( result )
      result = sub_18FA(a1, *a1 + 8 * a2);
  }
  else
  {
    v2 = sub_F1E(a1);
    result = sub_169C(a1, a2 - v2);
  }
  return result;
}

erase

std::vector<long int>::iterator __cdecl std::vector<long>::_M_erase(
        std::vector<long int> *const this,
        std::vector<long int>::iterator __position){
          thisa = this;
  __positiona._M_current = __position._M_current;
  __rhs._M_current = std::vector<long>::end(this)._M_current;
  __lhs._M_current = __gnu_cxx::__normal_iterator<long *,std::vector<long>>::operator+(&__positiona, 1LL)._M_current;

// 如果不是删除最后一个的话，要把后面的元素都向前移动一格。
  if ( __gnu_cxx::operator!=<long *,std::vector<long>>(&__lhs, &__rhs) )
  {
    M_current = std::vector<long>::end(thisa)._M_current;
    v3._M_current = __gnu_cxx::__normal_iterator<long *,std::vector<long>>::operator+(&__positiona, 1LL)._M_current;
    std::move<__gnu_cxx::__normal_iterator<long *,std::vector<long>>,__gnu_cxx::__normal_iterator<long *,std::vector<long>>>(
      v3,
      (__gnu_cxx::__normal_iterator<long int*,std::vector<long int> >)M_current,
      __positiona);
  }
  std::allocator_traits<std::allocator<long>>::destroy<long>(
    (std::allocator_traits<std::allocator<long int> >::allocator_type *)thisa,
    --thisa->_M_impl._M_finish);
  return __positiona;
}

__int64 __fastcall std::vector<long>::_M_erase(__int64 a1, __int64 a2)
{
  __int64 v2; // rbx
  __int64 v3; // rax
  __int64 v5; // [rsp+0h] [rbp-40h] BYREF
  __int64 v6; // [rsp+8h] [rbp-38h]
  __int64 v7; // [rsp+18h] [rbp-28h] BYREF
  __int64 v8[4]; // [rsp+20h] [rbp-20h] BYREF

  v6 = a1;
  v5 = a2;
  v8[1] = __readfsqword(0x28u);
  v8[0] = std::vector<long>::end(a1);
  v7 = __gnu_cxx::__normal_iterator<long *,std::vector<long>>::operator+(&v5, 1LL);
  if ( (unsigned __int8)__gnu_cxx::operator!=<long *,std::vector<long>>(&v7, v8) )
  {
    v2 = std::vector<long>::end(v6);
    v3 = __gnu_cxx::__normal_iterator<long *,std::vector<long>>::operator+(&v5, 1LL);
    std::move<__gnu_cxx::__normal_iterator<long *,std::vector<long>>,__gnu_cxx::__normal_iterator<long *,std::vector<long>>>(
      v3,
      v2,
      v5);
  }
  *(_QWORD *)(v6 + 8) -= 8LL;
  std::allocator_traits<std::allocator<long>>::destroy<long>(v6, *(_QWORD *)(v6 + 8));
  return v5;
}

__int64 __fastcall sub_16BE(__int64 a1, __int64 a2)
{
  __int64 v2; // rbx
  __int64 v3; // rax
  __int64 v5; // [rsp+0h] [rbp-40h] BYREF
  __int64 v6; // [rsp+8h] [rbp-38h]
  __int64 v7; // [rsp+18h] [rbp-28h] BYREF
  __int64 v8[4]; // [rsp+20h] [rbp-20h] BYREF

  v6 = a1;
  v5 = a2;
  v8[1] = __readfsqword(0x28u);
  v8[0] = sub_183E(a1);
  v7 = sub_1658(&v5, 1LL);
  if ( (unsigned __int8)sub_188E(&v7, v8) )
  {
    v2 = sub_183E(v6);
    v3 = sub_1658(&v5, 1LL);
    sub_18CE(v3, v2, v5);
  }
  *(_QWORD *)(v6 + 8) -= 8LL;
  sub_191E(v6, *(_QWORD *)(v6 + 8));
  return v5;
}

std::shared_ptr

template<typename _Tp>
class shared_ptr : public __shared_ptr<_Tp> {
...
}

template <typename _Tp, _Lock_policy _Lp>
class __shared_ptr {
private:
    _Tp *_M_ptr;                     // Contained pointer.
    __shared_count<_Lp> _M_refcount; // Reference counter.
}

template<_Lock_policy _Lp = __default_lock_policy>
class __shared_count {
private:
	//_Sp_counted_base是基类
	//若是shared_ptr(new A()), 实际上放的对象是_Sp_counted_ptr的实例
	//class _Sp_counted_ptr final : public _Sp_counted_base<_Lp>
	_Sp_counted_base<_Lp>*  _M_pi;
}

template<_Lock_policy _Lp = __default_lock_policy>
class _Sp_counted_base : public _Mutex_base<_Lp> {
private:
	//此类为基类，成员函数有virtual
	//隐藏的成员变量(vtable ptr)
      _Atomic_word  _M_use_count;     // #shared
      _Atomic_word  _M_weak_count;    // #weak + (#shared != 0)
}

template<typename _Ptr, _Lock_policy _Lp>
class _Sp_counted_ptr final : public _Sp_counted_base<_Lp> {
private:
	_Ptr             _M_ptr;
}

内存布局

动态调试发现，如果是make_shared<A>()的方式创建智能指针，则_M_pi中是没有_M_ptr的，未深究。

构造函数

void __cdecl std::shared_ptr<A>::shared_ptr<A,void>(std::shared_ptr<A> *const this, A *__p)
{
  std::__shared_ptr<A,(__gnu_cxx::_Lock_policy)2>::__shared_ptr<A,void>(this, __p);
}

void __cdecl std::__shared_ptr<A,(__gnu_cxx::_Lock_policy)2>::__shared_ptr<A,void>(std::__shared_ptr<A,(__gnu_cxx::_Lock_policy)2> *const this, A *__p)
{
  this->_M_ptr = __p;
  std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<A *>(
    &this->_M_refcount,
    __p,
    (std::false_type)((_BYTE)this + 8));
  std::__shared_ptr<A,(__gnu_cxx::_Lock_policy)2>::_M_enable_shared_from_this_with<A,A>(this, __p);
}

void __cdecl std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<A *>(std::__shared_count<(__gnu_cxx::_Lock_policy)2> *const this, A *__p, std::false_type a3)
{
  std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<A *>(this, __p);
}

void __cdecl std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<A *>(std::__shared_count<(__gnu_cxx::_Lock_policy)2> *const this, A *__p)
{
  std::_Sp_counted_ptr<A*,(__gnu_cxx::_Lock_policy)2> *v2; // rbx

  this->_M_pi = 0LL;
  v2 = (std::_Sp_counted_ptr<A*,(__gnu_cxx::_Lock_policy)2> *)operator new(0x18uLL);
  std::_Sp_counted_ptr<A *,(__gnu_cxx::_Lock_policy)2>::_Sp_counted_ptr(v2, __p);
  this->_M_pi = v2;
}

void __cdecl std::_Sp_counted_ptr<A *,(__gnu_cxx::_Lock_policy)2>::_Sp_counted_ptr(std::_Sp_counted_ptr<A*,(__gnu_cxx::_Lock_policy)2> *const this, A *__p)
{
  std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_Sp_counted_base(this);
  this->_vptr__Sp_counted_base = (int (**)(...))off_201C98;
  this->_M_ptr = __p;
}

unsigned __int64 __fastcall sub_1106(_QWORD *a1, __int64 a2)
{
  unsigned __int64 v3; // [rsp+20h] [rbp-8h]

  v3 = __readfsqword(0x28u);
  *a1 = a2;
  sub_11F6(a1 + 1, a2);
  sub_121C(a1, a2);
  return __readfsqword(0x28u) ^ v3;
}

_QWORD *__fastcall sub_125A(_QWORD *a1, __int64 a2)
{
  __int64 v2; // rbx
  _QWORD *result; // rax

  *a1 = 0LL;
  v2 = operator new(0x18uLL);
  sub_1326(v2, a2);
  result = a1;
  *a1 = v2;
  return result;
}

析构函数

void __cdecl std::shared_ptr<A>::~shared_ptr(std::shared_ptr<A> *const this)
{
  std::__shared_ptr<A,(__gnu_cxx::_Lock_policy)2>::~__shared_ptr(this);
}

void __cdecl std::__shared_ptr<A,(__gnu_cxx::_Lock_policy)2>::~__shared_ptr(std::__shared_ptr<A,(__gnu_cxx::_Lock_policy)2> *const this)
{
  std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count(&this->_M_refcount);
}

void __cdecl std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count(std::__shared_count<(__gnu_cxx::_Lock_policy)2> *const this)
{
  if ( this->_M_pi )
    std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release(this->_M_pi);
}

void __cdecl std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release(std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2> *const this)
{
  if ( __gnu_cxx::__exchange_and_add_dispatch(&this->_M_use_count, -1) == 1 )
  {
  	//调用_Sp_counted_base的析构函数，因为virtual，实则是_Sp_counted_ptr的析构，在其中释放管理的对象
    (*((void (__fastcall **)(std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2> *const))this->_vptr__Sp_counted_base + 2))(this);
    if ( __gnu_cxx::__exchange_and_add_dispatch(&this->_M_weak_count, -1) == 1 )
      (*((void (__fastcall **)(std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2> *const))this->_vptr__Sp_counted_base + 3))(this);
  }
}

多线程下shared_ptr自身的同步问题

当时逆向的时候发现shared_ptr在实现的可能的同步问题，觉得挺有意思的，就投入其中，然后这篇就鸽了。相关同步问题的讨论，注意这里说的同步问题不是shared_ptr管理的对象的同步问题，而是shared_ptr自身的，不当的使用可能会造成use-after-free或double free等严重的内存bug。贴一个我使用Fuzzing在知名开源项目发现的真实案例。

//一个正确的在多线程的用法
//In thread 1
shared_ptr<myClass> private = atomic_load(&global);
...
//In thread 2
atomic_store(&global, make_shared<myClass>());

std::string

/usr/include/c++/9/bits/basic_string.h

内存布局

  typedef basic_string<char>    string;   
  
  template<typename _CharT, typename _Traits, typename _Alloc>
    class basic_string
    {
    protected:
        _Alloc_hider	_M_dataplus;        // pointer, 8 bytes
        size_type		_M_string_length;   // size_t, 8 bytes
				
        enum { _S_local_capacity = 15 / sizeof(_CharT) };
        union
        {
          _CharT           _M_local_buf[_S_local_capacity + 1];
          size_type        _M_allocated_capacity;
        }; // 16 bytes
    }

sizeof(std::string) == 32 // 2*8 + 16 bytes

关于_M_local_buf的相关优化
如果字符串的strlen长度小于等于15，那么std:string会把它放到自己的空间中，即_M_dataplus指向的这个空间往往是栈上。如果大于15，那么字符串会被放到堆上，即_M_dataplus指向的空间是malloc出来的。

string s = "aaaaaaaaaaaaaaa"; // 15个a

pwndbg> p &s
$1 = (std::string *) 0x7fffffffdba0

pwndbg> x/8xg 0x7fffffffdba0
0x7fffffffdba0:	0x00007fffffffdbb0	0x000000000000000f
0x7fffffffdbb0:	0x6161616161616161	0x0061616161616161

string s = "aaaaaaaaaaaaaaaa"; // 16个a

pwndbg> p &s
$1 = (std::string *) 0x7fffffffdba0

pwndbg> x/8xg 0x7fffffffdba0
0x7fffffffdba0:	0x000055555556aeb0	0x0000000000000010
0x7fffffffdbb0:	0x0000000000000010	0x0000555555555330

其中0x7fffffffdbb8里面的0x0000555555555330，是未定义的值，与std:string无关。