llvm实践1:虚表内存修改

这里是引用

LLVM虚表遍历


上周收到一个任务,需要通过编写llvm pass,修改C++虚表的内容,之前没看过llvm的源码,学习了一下

1.1 C++虚表在ll里的写法

先定义了两个类A,B,B继承于A,两个类都各有2个虚函数print 和print2

class A
{
public:
	virtual void print()
	{
		cout << "A::print()" << endl;
	}
	virtual void print2()
	{
		cout << "A::print2()" << endl;
	}
};
class B : public A
{
public:
	virtual void print()
	{
		cout << "B::print()" << endl;
	}

	virtual void print2()
	{
		cout << "B::print2()" << endl;
	}
};

调用命令行:.\clang -emit-llvm .\testvtable.cpp -S -o testvtable.ll 生成llvm中间代码

可以看到ll文件里的classB的虚表,虚表里有3项,一个是classB的rtti,另外两个是两个虚函数,

@15 = private unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* bitcast (%rtti.CompleteObjectLocator* @"??_R4B@@6B@" to i8*), i8* bitcast (void (%class.B*)* @"?print@B@@UAEXXZ" to i8*), i8* bitcast (void (%class.B*)* @"?print2@B@@UAEXXZ" to i8*)] }, comdat($"??_7B@@6B@")

@15是IR里 的变量名,

comdat($"??_7B@@6B@") 是这个虚表在IR里的名字

void (%class.B*)* @"?print@B@@UAEXXZ" 是虚函数在IR里的函数名

i8* bitcast (xx to i8*)表示吧某个类型 经过bitcast转换之后,读取global的内存空间通过指针i8*,i8步长读取

@"??_7B@@6B@" = unnamed_addr alias i8*, getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @15, i32 0, i32 0, i32 1)

类实例化的地方 保存虚表到object的第一项里,用到了虚表在IR里的名字"??_7B@@6B@"

store i32 (...)** bitcast (i8** @"??_7B@@6B@" to i32 (...)**), i32 (...)*** %6, align 4

1.2 虚表在clang里的生成

Builder一个虚表有两个builder,一个是ConstantStructBuilder,一个是ConstantArrayBuilder

// 处理所有的虚表
void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
                                            const CXXRecordDecl *RD) {
  MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
  const VPtrInfoVector &VFPtrs = VFTContext.getVFPtrOffsets(RD);
  for (const std::unique_ptr<VPtrInfo> &Info : VFPtrs) {
    //处理每一个虚表
    llvm::GlobalVariable *VTable = getAddrOfVTable(RD, Info->FullOffsetInMDC);
    if (VTable->hasInitializer())
      continue;

    //获取虚表的layout, 根据layout创建虚表Constant
    const VTableLayout &VTLayout =
        VFTContext.getVFTableLayout(RD, Info->FullOffsetInMDC);

    //获取这个类的RTTI的Constant
    llvm::Constant *RTTI = nullptr;
    if (any_of(VTLayout.vtable_components(),
               [](const VTableComponent &VTC) { return VTC.isRTTIKind(); }))
      RTTI = getMSCompleteObjectLocator(RD, *Info);

    //这里开始build虚表Constant
    ConstantInitBuilder Builder(CGM);
    auto Components = Builder.beginStruct();
    //第一个ConstantArray创建的地方
    CGVT.createVTableInitializer(Components, VTLayout, RTTI);
    //第二个ConstantAggregate创建的地方
    Components.finishAndSetAsInitializer(VTable);
    emitVTableTypeMetadata(*Info, RD, VTable);
  }
}

createVTableInitializer :创建VTable的全局变量

void CodeGenVTables::createVTableInitializer(ConstantStructBuilder &builder,
                                             const VTableLayout &layout,
                                             llvm::Constant *rtti) {
  unsigned nextVTableThunkIndex = 0;
  for (unsigned i = 0, e = layout.getNumVTables(); i != e; ++i) {
    //获取一个array builder
    auto vtableElem = builder.beginArray(CGM.Int8PtrTy);
    size_t thisIndex = layout.getVTableOffset(i);
    size_t nextIndex = thisIndex + layout.getVTableSize(i);
    //遍历每个虚表,加入到vtableElem(Constantarray的builder)里
    for (unsigned i = thisIndex; i != nextIndex; ++i) {
      addVTableComponent(vtableElem, layout, i, rtti, nextVTableThunkIndex);
    }
    //把生成的ConstantArray保存到builder的buffer里,后面会创建第二个ConstantStruct
    vtableElem.finishAndAddTo(builder);
  }
}

addVTableComponent 遍历每个虚函数,为每个虚函数获取这个函数的Constant,加入到vtableElem 这个builder里

void CodeGenVTables::addVTableComponent(
    ConstantArrayBuilder &builder, const VTableLayout &layout,
    unsigned idx, llvm::Constant *rtti, unsigned &nextVTableThunkIndex) 
{
    //从layout里获取对应的虚表 component结构
    auto &component = layout.vtable_components()[idx];
    
    switch (component.getKind()) {
      case VTableComponent::CK_VCallOffset:
        return addOffsetConstant(component.getVCallOffset());

      case VTableComponent::CK_VBaseOffset:
        return addOffsetConstant(component.getVBaseOffset());

      case VTableComponent::CK_OffsetToTop:
        return addOffsetConstant(component.getOffsetToTop());

      //RTTI
      case VTableComponent::CK_RTTI:
        return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy));
       
      //虚函数和构造/析构函数
      case VTableComponent::CK_FunctionPointer:
      case VTableComponent::CK_CompleteDtorPointer:
 	  case VTableComponent::CK_DeletingDtorPointer: 
      {
          //先获取函数的global decl
          GlobalDecl GD;
          switch (component.getKind()) {
            default:
              llvm_unreachable("Unexpected vtable component kind");
            //一般虚函数
            case VTableComponent::CK_FunctionPointer:
              GD = component.getFunctionDecl();
              break;
            //构造函数
            case VTableComponent::CK_CompleteDtorPointer:
              GD = GlobalDecl(component.getDestructorDecl(), Dtor_Complete);
              break;
            //虚函数
            case VTableComponent::CK_DeletingDtorPointer:
              GD = GlobalDecl(component.getDestructorDecl(), Dtor_Deleting);
              break;
         }
            
            
           llvm::Constant *fnPtr;
           //中间有一些代码我也不知道是什么
           // 。。。

           // Pure virtual member functions.
           // 创建纯虚函数Constant
           if (cast<CXXMethodDecl>(GD.getDecl())->isPure()) {
               if (!PureVirtualFn)
                   PureVirtualFn =
                   getSpecialVirtualFn(CGM.getCXXABI().GetPureVirtualCallName());
               fnPtr = PureVirtualFn;
           // Deleted virtual member functions.
           // 创建析构函数Constant
           } else if (cast<CXXMethodDecl>(GD.getDecl())->isDeleted()) {
               if (!DeletedVirtualFn)
                   DeletedVirtualFn =
                   getSpecialVirtualFn(CGM.getCXXABI().GetDeletedVirtualCallName());
               fnPtr = DeletedVirtualFn;
           // Thunks. 这个是什么,我也不知道?
           } else if (nextVTableThunkIndex < layout.vtable_thunks().size() &&
                      layout.vtable_thunks()[nextVTableThunkIndex].first == idx) {
               auto &thunkInfo = layout.vtable_thunks()[nextVTableThunkIndex].second;
               nextVTableThunkIndex++;
               fnPtr = maybeEmitThunk(GD, thunkInfo, /*ForVTable=*/true);
               // Otherwise we can use the method definition directly.
           } else {
               //一般的虚函数在这里创建
               llvm::Type *fnTy = CGM.getTypes().GetFunctionTypeForVTable(GD);
               fnPtr = CGM.GetAddrOfFunction(GD, fnTy, /*ForVTable=*/true);
           }
            //fnPtr转成i8* bitcast 
            fnPtr = llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy);
            //加入的到builder(vtableElem)的buffer里
            builder.add(fnPtr);
            return;
        }
    case VTableComponent::CK_UnusedFunctionPointer:
    	return builder.addNullPointer(CGM.Int8PtrTy);
  }
}

  // Add a new value to this initializer.
  //把一个constant加入到一个constantarray的builder里
  void add(llvm::Constant *value) 
  {
      Builder.Buffer.push_back(value);
  }

vtableElem.finishAndAddTo(builder):

// 这个地方是第二个vector,参数是ConstantArray的builder
void finishAndAddTo(AggregateBuilderBase &parent) {
    parent.add(asImpl().finishImpl());
}

从builder里生成ConstantArray

  void finishAndAddTo(AggregateBuilderBase &parent) {
    parent.add(asImpl().finishImpl());
  }
  //把builder里保存的vector form成一个Constant(其实是一个ConstantArray)
  llvm::Constant *finishImpl() {
    return AggregateBuilderBase::finishArray(EltTy);
  }

llvm::Constant *ConstantAggregateBuilderBase::finishArray(llvm::Type *eltTy)
{
  markFinished();
  //获取buffer,里面保存所有的虚函数的fnptr
  auto &buffer = getBuffer();
  //makeArrayRef 获取一个ArrayRef的array
  auto elts = llvm::makeArrayRef(buffer).slice(Begin);
  if (!eltTy) eltTy = elts[0]->getType();
  //获取type
  auto type = llvm::ArrayType::get(eltTy, elts.size());
 //从elts生成ConstantArray,里面有多个constant,每个constant表示一个虚表
  auto constant = llvm::ConstantArray::get(type, elts);
 //删除builder 里的buffer
  buffer.erase(buffer.begin() + Begin, buffer.end());
  return constant;
}

Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
  if (Constant *C = getImpl(Ty, V))
    return C;
  //走到这个分支,创建一个ConstantArray ,ConstantArray里的每个Constant是addVTableComponent里创建的虚表
  return Ty->getContext().pImpl->ArrayConstants.getOrCreate(Ty, V);
}

 ConstantClass *getOrCreate(TypeClass *Ty, ValType V) {
    printf("ConstantClass *getOrCreate\n");
    LookupKey Key(Ty, V);
    /// Hash once, and reuse it for the lookup and the insertion if needed.
    LookupKeyHashed Lookup(MapInfo::getHashValue(Key), Key);

    ConstantClass *Result = nullptr;

    auto I = Map.find_as(Lookup);
    if (I == Map.end())
      Result = create(Ty, V, Lookup);
    else
      Result = *I;
    assert(Result && "Unexpected nullptr");

    return Result;
  }

//创建一个ConstantArray,ConstantArray是ConstantAggregate的子类,也调用一次ConstantAggregate的初始化
ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
    : ConstantAggregate(T, ConstantArrayVal, V)
{
 //getNumElements()  是虚表个数
  printf("ConstantArray::ConstantArray %d\n", T->getNumElements());
  assert(V.size() == T->getNumElements() &&
         "Invalid initializer for constant array");
}

//这里构造了ConstantAggregate, 里面保存了所有的虚表
//ConstantAggregate是Constant的子类,也调用一次Constant的初始化
ConstantAggregate::ConstantAggregate(CompositeType *T, ValueTy VT,
                                     ArrayRef<Constant *> V)
    : Constant(T, VT, OperandTraits<ConstantAggregate>::op_end(this) - V.size(),
               V.size()) 
{
  //拷贝所有虚表constant到ConstantArray里,这个地方是关键!!!
  llvm::copy(V, op_begin());

  // Check that types match, unless this is an opaque struct.
  if (auto *ST = dyn_cast<StructType>(T))
    if (ST->isOpaque())
      return;
  for (unsigned I = 0, E = V.size(); I != E; ++I)
    assert(V[I]->getType() == T->getTypeAtIndex(I) &&
           "Initializer for composite element doesn't match!");
}


components.finishAndSetAsInitializer(VTable);

//ConstantStructBuilder 的finishImpl() 里,创建出ConstantStruct
void finishAndSetAsInitializer(llvm::GlobalVariable *global)
{
    return this->Builder.setGlobalInitializer(global, asImpl().finishImpl());
}

llvm::Constant *
ConstantAggregateBuilderBase::finishStruct(llvm::StructType *ty) {
  markFinished();
  //这里的buffer只有一个,就是constantArray
  auto &buffer = getBuffer();
  auto elts = llvm::makeArrayRef(buffer).slice(Begin);

  if (ty == nullptr && elts.empty())
    ty = llvm::StructType::get(Builder.CGM.getLLVMContext(), {}, Packed);

  llvm::Constant *constant;
  if (ty) {
    constant = llvm::ConstantStruct::get(ty, elts);
  } else {
    //跑到了这个分支
    constant = llvm::ConstantStruct::getAnon(elts, Packed);
  }

  buffer.erase(buffer.begin() + Begin, buffer.end());
  return constant;
}

void ConstantInitBuilderBase::setGlobalInitializer(llvm::GlobalVariable *GV,
                                                   llvm::Constant *initializer)
{
   //把finishStruct里获取的Constant(其实是ConstantStruct) 设置到GlobalVariable里
  GV->setInitializer(initializer);
  if (!SelfReferences.empty())
    resolveSelfReferences(GV);
}

到这里虚表的GlobalVariable就创建出来了

几个类之间的关系:

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-cZJV0kjM-1582384188875)(C:\Users\Admin\AppData\Roaming\Typora\typora-user-images\image-20200222190554827.png)]

1.3 遍历虚表GlobalVariable

看懂了上面的构建代码,就很容易写出对应的虚表遍历代码。

每个Initalizer都是一个ConstantStruct,都有一个或者多个ConstantArray,每个ConstantArray里又有多个Constant,每一个constant对应一个虚表函数,所以遍历两次就可以获取每个虚表函数的Constant。

代码如下:

for (GlobalVariable &gv : gValList) {
      if (!gv.hasName() && gv.hasInitializer()) {
          Constant &cs = *gv.getInitializer();
          //获取operand个数,虚表这里都是1, 获取的是ConstantArray
          int arrayNumber = cs.getNumOperands();
          for (int i = 0; i  < arrayNumber;i++ )
          {
              llvm::Constant *item1 = cs.getAggregateElement(i);
              if (item1 != NULL) {
                  //这个地方获取的是虚表个数
                  int vtableNumber = item1->getNumOperands();
                  printf("getNumOperands:%d\n", vtableNumber);
                  for (int j = 0; j < vtableNumber; j++) {
                      //到这里就获取了每个虚表
                      llvm::Value *item2 = item1->getOperand(j);
                      if (item2 != NULL) {
                          llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(item2);
                          outs() << *item2 << "\r\n";
                      }
                  }
              } 
          }
      }
   }

获取到每个虚表的constant之后,创建一个新的Constant newIterm,再调用item1->setOperand(j,newIterm); 就可以修改虚表内存了。

参考资料:
http://llvm.org/doxygen/modules.html

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值