这里是引用
LLVM虚表遍历
上周收到一个任务,需要通过编写llvm pass,修改C++虚表的内容,之前没看过llvm的源码,学习了一下
1.1 C++虚表在ll里的写法
先定义了两个类A,B,B继承于A,两个类都各有2个虚函数print 和print2
class A
{
public:
virtual void print()
{
cout << "A::print()" << endl;
}
virtual void print2()
{
cout << "A::print2()" << endl;
}
};
class B : public A
{
public:
virtual void print()
{
cout << "B::print()" << endl;
}
virtual void print2()
{
cout << "B::print2()" << endl;
}
};
调用命令行:.\clang -emit-llvm .\testvtable.cpp -S -o testvtable.ll 生成llvm中间代码
可以看到ll文件里的classB的虚表,虚表里有3项,一个是classB的rtti,另外两个是两个虚函数,
@15 = private unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* bitcast (%rtti.CompleteObjectLocator* @"??_R4B@@6B@" to i8*), i8* bitcast (void (%class.B*)* @"?print@B@@UAEXXZ" to i8*), i8* bitcast (void (%class.B*)* @"?print2@B@@UAEXXZ" to i8*)] }, comdat($"??_7B@@6B@")
@15是IR里 的变量名,
comdat($"??_7B@@6B@") 是这个虚表在IR里的名字
void (%class.B*)* @"?print@B@@UAEXXZ" 是虚函数在IR里的函数名
i8* bitcast (xx to i8*)表示吧某个类型 经过bitcast转换之后,读取global的内存空间通过指针i8*,i8步长读取
@"??_7B@@6B@" = unnamed_addr alias i8*, getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @15, i32 0, i32 0, i32 1)
类实例化的地方 保存虚表到object的第一项里,用到了虚表在IR里的名字"??_7B@@6B@"
store i32 (...)** bitcast (i8** @"??_7B@@6B@" to i32 (...)**), i32 (...)*** %6, align 4
1.2 虚表在clang里的生成
Builder一个虚表有两个builder,一个是ConstantStructBuilder,一个是ConstantArrayBuilder
// 处理所有的虚表
void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
const CXXRecordDecl *RD) {
MicrosoftVTableContext &VFTContext = CGM.getMicrosoftVTableContext();
const VPtrInfoVector &VFPtrs = VFTContext.getVFPtrOffsets(RD);
for (const std::unique_ptr<VPtrInfo> &Info : VFPtrs) {
//处理每一个虚表
llvm::GlobalVariable *VTable = getAddrOfVTable(RD, Info->FullOffsetInMDC);
if (VTable->hasInitializer())
continue;
//获取虚表的layout, 根据layout创建虚表Constant
const VTableLayout &VTLayout =
VFTContext.getVFTableLayout(RD, Info->FullOffsetInMDC);
//获取这个类的RTTI的Constant
llvm::Constant *RTTI = nullptr;
if (any_of(VTLayout.vtable_components(),
[](const VTableComponent &VTC) { return VTC.isRTTIKind(); }))
RTTI = getMSCompleteObjectLocator(RD, *Info);
//这里开始build虚表Constant
ConstantInitBuilder Builder(CGM);
auto Components = Builder.beginStruct();
//第一个ConstantArray创建的地方
CGVT.createVTableInitializer(Components, VTLayout, RTTI);
//第二个ConstantAggregate创建的地方
Components.finishAndSetAsInitializer(VTable);
emitVTableTypeMetadata(*Info, RD, VTable);
}
}
createVTableInitializer :创建VTable的全局变量
void CodeGenVTables::createVTableInitializer(ConstantStructBuilder &builder,
const VTableLayout &layout,
llvm::Constant *rtti) {
unsigned nextVTableThunkIndex = 0;
for (unsigned i = 0, e = layout.getNumVTables(); i != e; ++i) {
//获取一个array builder
auto vtableElem = builder.beginArray(CGM.Int8PtrTy);
size_t thisIndex = layout.getVTableOffset(i);
size_t nextIndex = thisIndex + layout.getVTableSize(i);
//遍历每个虚表,加入到vtableElem(Constantarray的builder)里
for (unsigned i = thisIndex; i != nextIndex; ++i) {
addVTableComponent(vtableElem, layout, i, rtti, nextVTableThunkIndex);
}
//把生成的ConstantArray保存到builder的buffer里,后面会创建第二个ConstantStruct
vtableElem.finishAndAddTo(builder);
}
}
addVTableComponent 遍历每个虚函数,为每个虚函数获取这个函数的Constant,加入到vtableElem 这个builder里
void CodeGenVTables::addVTableComponent(
ConstantArrayBuilder &builder, const VTableLayout &layout,
unsigned idx, llvm::Constant *rtti, unsigned &nextVTableThunkIndex)
{
//从layout里获取对应的虚表 component结构
auto &component = layout.vtable_components()[idx];
switch (component.getKind()) {
case VTableComponent::CK_VCallOffset:
return addOffsetConstant(component.getVCallOffset());
case VTableComponent::CK_VBaseOffset:
return addOffsetConstant(component.getVBaseOffset());
case VTableComponent::CK_OffsetToTop:
return addOffsetConstant(component.getOffsetToTop());
//RTTI
case VTableComponent::CK_RTTI:
return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy));
//虚函数和构造/析构函数
case VTableComponent::CK_FunctionPointer:
case VTableComponent::CK_CompleteDtorPointer:
case VTableComponent::CK_DeletingDtorPointer:
{
//先获取函数的global decl
GlobalDecl GD;
switch (component.getKind()) {
default:
llvm_unreachable("Unexpected vtable component kind");
//一般虚函数
case VTableComponent::CK_FunctionPointer:
GD = component.getFunctionDecl();
break;
//构造函数
case VTableComponent::CK_CompleteDtorPointer:
GD = GlobalDecl(component.getDestructorDecl(), Dtor_Complete);
break;
//虚函数
case VTableComponent::CK_DeletingDtorPointer:
GD = GlobalDecl(component.getDestructorDecl(), Dtor_Deleting);
break;
}
llvm::Constant *fnPtr;
//中间有一些代码我也不知道是什么
// 。。。
// Pure virtual member functions.
// 创建纯虚函数Constant
if (cast<CXXMethodDecl>(GD.getDecl())->isPure()) {
if (!PureVirtualFn)
PureVirtualFn =
getSpecialVirtualFn(CGM.getCXXABI().GetPureVirtualCallName());
fnPtr = PureVirtualFn;
// Deleted virtual member functions.
// 创建析构函数Constant
} else if (cast<CXXMethodDecl>(GD.getDecl())->isDeleted()) {
if (!DeletedVirtualFn)
DeletedVirtualFn =
getSpecialVirtualFn(CGM.getCXXABI().GetDeletedVirtualCallName());
fnPtr = DeletedVirtualFn;
// Thunks. 这个是什么,我也不知道?
} else if (nextVTableThunkIndex < layout.vtable_thunks().size() &&
layout.vtable_thunks()[nextVTableThunkIndex].first == idx) {
auto &thunkInfo = layout.vtable_thunks()[nextVTableThunkIndex].second;
nextVTableThunkIndex++;
fnPtr = maybeEmitThunk(GD, thunkInfo, /*ForVTable=*/true);
// Otherwise we can use the method definition directly.
} else {
//一般的虚函数在这里创建
llvm::Type *fnTy = CGM.getTypes().GetFunctionTypeForVTable(GD);
fnPtr = CGM.GetAddrOfFunction(GD, fnTy, /*ForVTable=*/true);
}
//fnPtr转成i8* bitcast
fnPtr = llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy);
//加入的到builder(vtableElem)的buffer里
builder.add(fnPtr);
return;
}
case VTableComponent::CK_UnusedFunctionPointer:
return builder.addNullPointer(CGM.Int8PtrTy);
}
}
// Add a new value to this initializer.
//把一个constant加入到一个constantarray的builder里
void add(llvm::Constant *value)
{
Builder.Buffer.push_back(value);
}
vtableElem.finishAndAddTo(builder):
// 这个地方是第二个vector,参数是ConstantArray的builder
void finishAndAddTo(AggregateBuilderBase &parent) {
parent.add(asImpl().finishImpl());
}
从builder里生成ConstantArray
void finishAndAddTo(AggregateBuilderBase &parent) {
parent.add(asImpl().finishImpl());
}
//把builder里保存的vector form成一个Constant(其实是一个ConstantArray)
llvm::Constant *finishImpl() {
return AggregateBuilderBase::finishArray(EltTy);
}
llvm::Constant *ConstantAggregateBuilderBase::finishArray(llvm::Type *eltTy)
{
markFinished();
//获取buffer,里面保存所有的虚函数的fnptr
auto &buffer = getBuffer();
//makeArrayRef 获取一个ArrayRef的array
auto elts = llvm::makeArrayRef(buffer).slice(Begin);
if (!eltTy) eltTy = elts[0]->getType();
//获取type
auto type = llvm::ArrayType::get(eltTy, elts.size());
//从elts生成ConstantArray,里面有多个constant,每个constant表示一个虚表
auto constant = llvm::ConstantArray::get(type, elts);
//删除builder 里的buffer
buffer.erase(buffer.begin() + Begin, buffer.end());
return constant;
}
Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
if (Constant *C = getImpl(Ty, V))
return C;
//走到这个分支,创建一个ConstantArray ,ConstantArray里的每个Constant是addVTableComponent里创建的虚表
return Ty->getContext().pImpl->ArrayConstants.getOrCreate(Ty, V);
}
ConstantClass *getOrCreate(TypeClass *Ty, ValType V) {
printf("ConstantClass *getOrCreate\n");
LookupKey Key(Ty, V);
/// Hash once, and reuse it for the lookup and the insertion if needed.
LookupKeyHashed Lookup(MapInfo::getHashValue(Key), Key);
ConstantClass *Result = nullptr;
auto I = Map.find_as(Lookup);
if (I == Map.end())
Result = create(Ty, V, Lookup);
else
Result = *I;
assert(Result && "Unexpected nullptr");
return Result;
}
//创建一个ConstantArray,ConstantArray是ConstantAggregate的子类,也调用一次ConstantAggregate的初始化
ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
: ConstantAggregate(T, ConstantArrayVal, V)
{
//getNumElements() 是虚表个数
printf("ConstantArray::ConstantArray %d\n", T->getNumElements());
assert(V.size() == T->getNumElements() &&
"Invalid initializer for constant array");
}
//这里构造了ConstantAggregate, 里面保存了所有的虚表
//ConstantAggregate是Constant的子类,也调用一次Constant的初始化
ConstantAggregate::ConstantAggregate(CompositeType *T, ValueTy VT,
ArrayRef<Constant *> V)
: Constant(T, VT, OperandTraits<ConstantAggregate>::op_end(this) - V.size(),
V.size())
{
//拷贝所有虚表constant到ConstantArray里,这个地方是关键!!!
llvm::copy(V, op_begin());
// Check that types match, unless this is an opaque struct.
if (auto *ST = dyn_cast<StructType>(T))
if (ST->isOpaque())
return;
for (unsigned I = 0, E = V.size(); I != E; ++I)
assert(V[I]->getType() == T->getTypeAtIndex(I) &&
"Initializer for composite element doesn't match!");
}
components.finishAndSetAsInitializer(VTable);
//ConstantStructBuilder 的finishImpl() 里,创建出ConstantStruct
void finishAndSetAsInitializer(llvm::GlobalVariable *global)
{
return this->Builder.setGlobalInitializer(global, asImpl().finishImpl());
}
llvm::Constant *
ConstantAggregateBuilderBase::finishStruct(llvm::StructType *ty) {
markFinished();
//这里的buffer只有一个,就是constantArray
auto &buffer = getBuffer();
auto elts = llvm::makeArrayRef(buffer).slice(Begin);
if (ty == nullptr && elts.empty())
ty = llvm::StructType::get(Builder.CGM.getLLVMContext(), {}, Packed);
llvm::Constant *constant;
if (ty) {
constant = llvm::ConstantStruct::get(ty, elts);
} else {
//跑到了这个分支
constant = llvm::ConstantStruct::getAnon(elts, Packed);
}
buffer.erase(buffer.begin() + Begin, buffer.end());
return constant;
}
void ConstantInitBuilderBase::setGlobalInitializer(llvm::GlobalVariable *GV,
llvm::Constant *initializer)
{
//把finishStruct里获取的Constant(其实是ConstantStruct) 设置到GlobalVariable里
GV->setInitializer(initializer);
if (!SelfReferences.empty())
resolveSelfReferences(GV);
}
到这里虚表的GlobalVariable就创建出来了
几个类之间的关系:
1.3 遍历虚表GlobalVariable
看懂了上面的构建代码,就很容易写出对应的虚表遍历代码。
每个Initalizer都是一个ConstantStruct,都有一个或者多个ConstantArray,每个ConstantArray里又有多个Constant,每一个constant对应一个虚表函数,所以遍历两次就可以获取每个虚表函数的Constant。
代码如下:
for (GlobalVariable &gv : gValList) {
if (!gv.hasName() && gv.hasInitializer()) {
Constant &cs = *gv.getInitializer();
//获取operand个数,虚表这里都是1, 获取的是ConstantArray
int arrayNumber = cs.getNumOperands();
for (int i = 0; i < arrayNumber;i++ )
{
llvm::Constant *item1 = cs.getAggregateElement(i);
if (item1 != NULL) {
//这个地方获取的是虚表个数
int vtableNumber = item1->getNumOperands();
printf("getNumOperands:%d\n", vtableNumber);
for (int j = 0; j < vtableNumber; j++) {
//到这里就获取了每个虚表
llvm::Value *item2 = item1->getOperand(j);
if (item2 != NULL) {
llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(item2);
outs() << *item2 << "\r\n";
}
}
}
}
}
}
获取到每个虚表的constant之后,创建一个新的Constant newIterm,再调用item1->setOperand(j,newIterm); 就可以修改虚表内存了。
参考资料:
http://llvm.org/doxygen/modules.html