第七章
LLVM does require all register values to be in SSA form,
it does not require (or permit) memory objects to be in SSA form
int G, H;
int test(_Bool Condition) {
int X;
if (Condition)
X = G;
else
X = H;
return X;
}
有PHI的IR
@G = weak global i32 0 ;// type of @G is i32*
@H = weak global i32 0 ;// type of @H is i32*
define i32 @test(i1 %Condition) {
entry:
br i1 %Condition, label %cond_true, label %cond_false
cond_true:
%X.0 = load i32* @G
br label %cond_next
cond_false:
%X.1 = load i32* @H
br label %cond_next
cond_next:
%X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
ret i32 %X.2
}
@G defines space for an i32 in the global data area, but its name actually refers to the address for that space.
无PHI的IR
@G = weak global i32 0 ; type of @G is i32*
@H = weak global i32 0 ; type of @H is i32*
define i32 @test(i1 %Condition) {
entry:
%X = alloca i32 ; type of %X is i32*.
br i1 %Condition, label %cond_true, label %cond_false
cond_true:
%X.0 = load i32* @G
store i32 %X.0, i32* %X ; Update X
br label %cond_next
cond_false:
%X.1 = load i32* @H
store i32 %X.1, i32* %X ; Update X
br label %cond_next
cond_next:
%X.2 = load i32* %X ; Read X
ret i32 %X.2
}
handle arbitrary mutable variables without the need to create Phi nodes at all:
- Each mutable variable becomes a stack allocation.
- Each read of the variable becomes a load from the stack.
- Each update of the variable becomes a store to the stack.
- Taking the address of a variable just uses the stack address
directly.
mem2reg
LLVM optimizer has a highly-tuned optimization pass named “mem2reg” that handles this case, promoting allocas like this into SSA registers, inserting Phi nodes as appropriate.
Note that mem2reg only works on variables in certain circumstances:
- mem2reg is alloca-driven: it looks for allocas and if it can handle
them, it promotes them. It does not apply to global variables or
heap allocations. - mem2reg only looks for alloca instructions in the entry block of the
function. Being in the entry block guarantees that the alloca is
only executed once, which makes analysis simpler. - mem2reg only promotes allocas whose uses are direct loads and
stores. If the address of the stack object is passed to a function,
or if any funny pointer arithmetic is involved, the alloca will not
be promoted. - mem2reg only works on allocas of first class values (such as
pointers, scalars and vectors), and only if the array size of the
allocation is 1 (or missing in the .ll file). - mem2reg is not capable of promoting structs or arrays to registers.
Note that the “sroa” pass is more powerful and can promote structs,
“unions”, and arrays in many cases.
用mem2reg的三个理由:
- Proven and well tested: clang uses this technique for local mutable
variables. As such, the most common clients of LLVM are using this
to handle a bulk of their variables. You can be sure that bugs are
found fast and fixed early. - Extremely Fast: mem2reg has a number of special cases that make it
fast in common cases as well as fully general. For example, it has
fast-paths for variables that are only used in a single block,
variables that only have one assignment point, good heuristics to
avoid insertion of unneeded phi nodes, etc. - Needed for debug info generation: Debug information in LLVM relies
on having the address of the variable exposed so that debug info can
be attached to it. This technique dovetails very naturally with this
style of debug info.
在Kaleidoscope中加入可变量
- symbol table的改变
static std::map<std::string, AllocaInst*> NamedValues;
- 在entry block加入一个alloca指令
/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
/// the function. This is used for mutable variables etc.
static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
const std::string &VarName) {
// creates an IRBuilder object that is pointing at the first instruction (.begin()) of the entry block
IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
TheFunction->getEntryBlock().begin());
// creates an alloca with the expected name and returns it
return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), 0,
VarName.c_str());
}
- 变量的code generation
Value *VariableExprAST::codegen() {
// Look this variable up in the function.
Value *V = NamedValues[Name];
if (!V)
return LogErrorV("Unknown variable name");
// variables live on the stack, so code generating a reference
// to them actually needs to produce a load from the stack slot
return Builder.CreateLoad(V, Name.c_str());
}
- For loop里的PHI改成Alloca
// Part of ForExprAST::codegen()
Function *TheFunction = Builder.GetInsertBlock()->getParent();
// Create an alloca for the variable in the entry block.
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
// Emit the start code first, without 'variable' in scope.
Value *StartVal = Start->codegen();
if (!StartVal)
return nullptr;
// Store the value into the alloca.
Builder.CreateStore(StartVal, Alloca);
...
// Compute the end condition.
Value *EndCond = End->codegen();
if (!EndCond)
return nullptr;
// Reload, increment, and restore the alloca. This handles the case where
// the body of the loop mutates the variable.
Value *CurVar = Builder.CreateLoad(Alloca);
Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
Builder.CreateStore(NextVar, Alloca);
StartVal => store alloca => load alloca => CurVar => add StepVal => store alloca
- Function的arguments改写成Alloca
Function *FunctionAST::codegen() {
...
Builder.SetInsertPoint(BB);
// Record the function arguments in the NamedValues map.
NamedValues.clear();
for (auto &Arg : TheFunction->args()) {
// Create an alloca for this variable.
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
// Store the initial value into the alloca.
Builder.CreateStore(&Arg, Alloca);
// Add arguments to variable symbol table.
NamedValues[Arg.getName()] = Alloca;
}
if (Value *RetVal = Body->codegen()) {
...
- 最后加入mem2reg优化
// Promote allocas to registers.
TheFPM->add(createPromoteMemoryToRegisterPass());
- mem2reg前后IR比较
# Define ':' for sequencing: as a low-precedence operator that ignores operands
# and just returns the RHS.
def binary : 1 (x y) y;
# Recursive fib, we could do this before.
def fib(x)
if (x < 3) then
1
else
fib(x-1)+fib(x-2);
# Iterative fib.
def fibi(x)
var a = 1, b = 1, c in
(for i = 3, i < x in
c = a + b :
a = b :
b = c) :
b;
# Call it.
fibi(10);
define double @fib(double %x) {
entry:
%x1 = alloca double ; alloca in entry block
store double %x, double* %x1 ; store %x to alloca
%x2 = load double, double* %x1 ; load from alloca to %x2
%cmptmp = fcmp ult double %x2, 3.000000e+00 ; check if %x2 <= 3.0
%booltmp = uitofp i1 %cmptmp to double ; i1 => double
%ifcond = fcmp one double %booltmp, 0.000000e+00 ; check if cmptmp != 0.0
br i1 %ifcond, label %then, label %else ; if %x < 3.0 goto then else goto else
then: ; preds = %entry
br label %ifcont
else: ; preds = %entry
%x3 = load double, double* %x1 ; load from alloca to %x3
%subtmp = fsub double %x3, 1.000000e+00 ; %x3 - 1
%calltmp = call double @fib(double %subtmp) ; fib(%x3 - 1)
%x4 = load double, double* %x1 ; load from alloca to %x4
%subtmp5 = fsub double %x4, 2.000000e+00 ; %x4 - 2
%calltmp6 = call double @fib(double %subtmp5) ; fib(%x4 - 2)
%addtmp = fadd double %calltmp, %calltmp6 ; fib(%x3 - 1) + fib(%x4 - 2)
br label %ifcont
ifcont: ; preds = %else, %then
%iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
; then => 1, else => fib(x - 1) + fib(x - 2)
ret double %iftmp
}
mem2reg优化后IR
define double @fib(double %x) {
entry:
%cmptmp = fcmp ult double %x, 3.000000e+00
%booltmp = uitofp i1 %cmptmp to double
%ifcond = fcmp one double %booltmp, 0.000000e+00
br i1 %ifcond, label %then, label %else
then:
br label %ifcont
else:
%subtmp = fsub double %x, 1.000000e+00
%calltmp = call double @fib(double %subtmp)
%subtmp5 = fsub double %x, 2.000000e+00
%calltmp6 = call double @fib(double %subtmp5)
%addtmp = fadd double %calltmp, %calltmp6
br label %ifcont
ifcont: ; preds = %else, %then
%iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
ret double %iftmp
}