LLVM后端__llc中值定义信息的查询方法示例

关于LiveIntervals pass中相关数据结构的含义,在寄存器分配前置分析(5.1) - LiveInterval这篇博客中已经做了清晰的讲解,此处不再赘述,本文主要讲解值定义信息VNInfo的使用方法和注意事项。

1. VNInfo含义

在LLVM的源码中,VNInfo定义在include/llvm/CodeGen/LiveInterval.h中,该结构主要表示machine级别的值定义信息,例如下面的
MachineBasicBlock bb1中,%4是一个vreg,对于96处的use点,其定义在SlotIndex 80B的位置,后面通过%4的LiveInterval查询96B处使用的%4的定义信息时,返回的就是VNInfo对象。

值得注意的是,VNInfo还包含了isPHIDef的接口,可以查询一个值是否是Phi定义的(即使在PHI指令被消除后也可以查),实现原理主要是
看该值的def是不是Basic block boundary类型的SlotIndex,这是因为一般的值定义点SlotIndex类型都是Normal register def,只有PHI值定义点才是Basic block boundary类型。

64B	bb.1:
	; predecessors: %bb.0
	  successors: %bb.3(0x80000000); %bb.3(100.00%)

80B	  %4:gpr32all = COPY $wzr
96B	  %3:gpr32all = COPY %4:gpr32all
112B	  %12:gpr32all = COPY %3:gpr32all
128B	  B %bb.3
  /// VNInfo - Value Number Information.
  /// This class holds information about a machine level values, including
  /// definition and use points.
  ///
  class VNInfo {
  public:
    using Allocator = BumpPtrAllocator;

    /// The ID number of this value.
    unsigned id;

    /// The index of the defining instruction.
    SlotIndex def;

    /// VNInfo constructor.
    VNInfo(unsigned i, SlotIndex d) : id(i), def(d) {}

    /// VNInfo constructor, copies values from orig, except for the value number.
    VNInfo(unsigned i, const VNInfo &orig) : id(i), def(orig.def) {}

    /// Copy from the parameter into this VNInfo.
    void copyFrom(VNInfo &src) {
      def = src.def;
    }

    /// Returns true if this value is defined by a PHI instruction (or was,
    /// PHI instructions may have been eliminated).
    /// PHI-defs begin at a block boundary, all other defs begin at register or
    /// EC slots.
    bool isPHIDef() const { return def.isBlock(); }

    /// Returns true if this value is unused.
    bool isUnused() const { return !def.isValid(); }

    /// Mark this value as unused.
    void markUnused() { def = SlotIndex(); }
  };

2. 如何在llc后端pass中查询一个值的在Use点对应的定义点

2.1 编写一个简单的示例

// learn_vni_info.cc
int VNIInfoLearn(int n) {
    int res = 0;
    for (int i = 0; i < n; i++) {
        res += i;
    }

    return res;
}

使用命令clang -O1 -S -emit-llvm learn_vni_info.cc -o learn_vni_info.ll将learn_vni_info.cc编译为ll文件(这里我是用的NDK中自带的clang,
因为我编出来的代码一般在安卓设备上运行,ndk中已经配置好了交叉编译环境,对应LLVM-17)

gwz@DESKTOP-VNM3O2M:~/work/learn_llvm/vni_info$ cat learn_vni_info.ll
; ModuleID = 'learn_vni_info.cc'
source_filename = "learn_vni_info.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local noundef i32 @_Z12VNIInfoLearni(i32 noundef %0) local_unnamed_addr #0 {
  %2 = icmp sgt i32 %0, 0
  br i1 %2, label %3, label %13

3:                                                ; preds = %1
  %4 = add i32 %0, -1
  %5 = zext i32 %4 to i33
  %6 = add i32 %0, -2
  %7 = zext i32 %6 to i33
  %8 = mul i33 %5, %7
  %9 = lshr i33 %8, 1
  %10 = trunc i33 %9 to i32
  %11 = add i32 %10, %0
  %12 = add i32 %11, -1
  br label %13

13:                                               ; preds = %3, %1
  %14 = phi i32 [ 0, %1 ], [ %12, %3 ]
  ret i32 %14
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3}
!llvm.ident = !{!4}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{!"Android (11349228, +pgo, +bolt, +lto, -mlgo, based on r487747e) clang version 17.0.2 (https://android.googlesource.com/toolchain/llvm-project d9f89f4d16663d5012e5c09495f3b30ece3d2362)"}

然后使用命令~/work/llvm-project/build/bin/opt --passes='view-cfg' learn_vni_info.ll生成CFG图,然后使用
dotty /tmp/cfg._Z12VNIInfoLearni-9cd348.dot观察结构。(ndk中没有prebuilt的opt和llc,这里的opt我是自己编译的LLVM-19版本,
SSH界面使用的mobaxterm)
在这里插入图片描述
可以看到这里有个PHI值%14,在Phi消除后,在该Phi值的2个source block中,都会被替换为同一个vreg,
这里的过程可以参考寄存器分配前置分析(1) — PHIElimination.

为了简单起见,直接对lib/CodeGen/RegisterCoalescer.cpp代码进行一点小改造。为啥改这个pass,是因为
LiveIntervals pass知之后就是这个pass,对值定义点进行分析需要LiveIntervals 分析的结果,从代码中
也可以看到register-coalescer的依赖pass。

char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;

INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
                      "Register Coalescer", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
                    "Register Coalescer", false, false)

对该pass进行小改动,在执行该pass之前,check一下COPY指令SrcReg的def信息,修改代码后重新编译llc

--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -4209,6 +4209,42 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   LIS = &getAnalysis<LiveIntervals>();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   Loops = &getAnalysis<MachineLoopInfo>();
+  LLVM_DEBUG(
+    dbgs() << "learn VNInfo ----------------------------\n";
+    const SlotIndexes &Indexes = *LIS->getSlotIndexes();
+    MF->print(dbgs(), &Indexes);
+    for (MachineBasicBlock& MBB : *MF) {
+      for (MachineInstr& MI : MBB) {
+        // 以COPY指令为例,分析COPY的SrcReg定义点
+        if (MI.isCopy()) {
+          MachineOperand SrcOp = MI.getOperand(1);
+          Register SrcReg = SrcOp.getReg();
+          if (SrcReg.isVirtual()) {
+            // 获取SrcReg的LiveInterval信息
+            LiveInterval& LI = LIS->getInterval(SrcReg);
+            // 获取当前MI对应的SlotIndex,也就是在MIR中的编号
+            SlotIndex SIdx = Indexes.getInstructionIndex(MI);
+            // 通过Query接口,查询SrcReg在当前使用点SIdx的定义信息
+            LiveQueryResult LRQ = LI.Query(SIdx);
+            dbgs() << "Cur SlotIndex = " << SIdx << ", MI = " << MI;
+            // valueIn返回当前MI处Use值(live-in)的def信息,如果没有则返回nullptr
+            if (VNInfo* VNI = LRQ.valueIn()) {
+              MachineInstr* DefMI = Indexes.getInstructionFromIndex(VNI->def);
+              dbgs() << SrcOp << " def SlotIndex = " << VNI->def << "\n";
+              // 需要注意,Phi值是Block boundry的SlotIndex定义,无法与MI直接对应
+              if (!VNI->isPHIDef()) {
+                dbgs() << SrcOp << " def MI = " << *DefMI;
+              }
+            } else {
+              dbgs() << "Dont find live in value!";
+            }
+            dbgs() << "\n";
+          }
+        }
+      }
+    }
+  );
+
   if (EnableGlobalCopies == cl::BOU_UNSET)
     JoinGlobalCopies = STI.enableJoinGlobalCopies();
   else

2. 2 分析结果

执行~/work/llvm-project/build/bin/llc -march=aarch64 -filetype=obj -debug-only=regalloc -stop-after=register-coalescer learn_vni_info.ll -o learn_vni_info.o > vni.log 2>&1命令将debug信息输出到vni.log中,下面我将关键的部分截取出来(为了减少干扰,在执行完register-coalescer pass后就停止)。

为了便于阅读,我直接将分析写在log中。

********** REGISTER COALESCER **********
********** Function: _Z12VNIInfoLearni
learn VNInfo ----------------------------
# Machine code for function _Z12VNIInfoLearni: NoPHIs, TracksLiveness, TiedOpsRewritten
Function Live Ins: $w0 in %2

0B	bb.0 (%ir-block.1):
	  successors: %bb.2(0x50000000), %bb.1(0x30000000); %bb.2(62.50%), %bb.1(37.50%)
	  liveins: $w0
16B	  %2:gpr32common = COPY $w0
32B	  %5:gpr32 = SUBSWri %2:gpr32common, 1, 0, implicit-def $nzcv
48B	  Bcc 10, %bb.2, implicit killed $nzcv

64B	bb.1:
	; predecessors: %bb.0
	  successors: %bb.3(0x80000000); %bb.3(100.00%)

80B	  %4:gpr32all = COPY $wzr
96B	  %3:gpr32all = COPY %4:gpr32all
112B	  %12:gpr32all = COPY %3:gpr32all
128B	  B %bb.3

144B	bb.2 (%ir-block.3):
	; predecessors: %bb.0
	  successors: %bb.3(0x80000000); %bb.3(100.00%)

160B	  %7:gpr32common = SUBWri %2:gpr32common, 2, 0
176B	  %8:gpr64 = UMADDLrrr %5:gpr32, %7:gpr32common, $xzr
192B	  %9:gpr64 = UBFMXri %8:gpr64, 1, 63
208B	  %10:gpr32 = COPY %9.sub_32:gpr64
224B	  %11:gpr32 = ADDWrr %5:gpr32, %10:gpr32
240B	  %0:gpr32all = COPY %11:gpr32
256B	  %12:gpr32all = COPY %0:gpr32all

272B	bb.3 (%ir-block.13):
	; predecessors: %bb.2, %bb.1

288B	  %1:gpr32all = COPY %12:gpr32all
304B	  $w0 = COPY %1:gpr32all
320B	  RET_ReallyLR implicit $w0

# End machine code for function _Z12VNIInfoLearni.

Cur SlotIndex = 96B, MI = %3:gpr32all = COPY %4:gpr32all
%4:gpr32all def SlotIndex = 80r
%4:gpr32all def MI = %4:gpr32all = COPY $wzr

// %3在112B处被使用,这里定义的%12是bb.3中原Phi值(Phi消除后对应vreg %12)的第一个Source值的定义点,是常量0值
Cur SlotIndex = 112B, MI = %12:gpr32all = COPY %3:gpr32all
%3:gpr32all def SlotIndex = 96r
%3:gpr32all def MI = %3:gpr32all = COPY %4:gpr32all

Cur SlotIndex = 208B, MI = %10:gpr32 = COPY %9.sub_32:gpr64
%9.sub_32:gpr64 def SlotIndex = 192r
%9.sub_32:gpr64 def MI = %9:gpr64 = UBFMXri %8:gpr64, 1, 63

Cur SlotIndex = 240B, MI = %0:gpr32all = COPY %11:gpr32
%11:gpr32 def SlotIndex = 224r
%11:gpr32 def MI = %11:gpr32 = ADDWrr %5:gpr32, %10:gpr32

// %0在256B处被使用,这里定义的%12是bb.3中原Phi值的第二个Source值的定义点
Cur SlotIndex = 256B, MI = %12:gpr32all = COPY %0:gpr32all
%0:gpr32all def SlotIndex = 240r
%0:gpr32all def MI = %0:gpr32all = COPY %11:gpr32

// %12在288B处被使用,这里定义的%1是bb.3中的Phi值,可以看到这里%12的定义点
// 对应的SlotIndex是272B,而不是272r。
Cur SlotIndex = 288B, MI = %1:gpr32all = COPY %12:gpr32all
%12:gpr32all def SlotIndex = 272B

Cur SlotIndex = 304B, MI = $w0 = COPY %1:gpr32all
%1:gpr32all def SlotIndex = 288r
%1:gpr32all def MI = %1:gpr32all = COPY %12:gpr32all

对于查询结果LiveQueryResult还有不少有用的接口,使用方法都是类似的,读者可以通过上述的简单学习验证方法快速掌握,
这里就不再赘述了。

  • 13
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值