0, 基本情况介绍
这篇的代码目前只适合在llvm 3.4.0及相差不远的版本上编译,较新的llvm版本,例如 llvm-18等,有些函数或者结构已经改掉了。
1,原始c语言文件
sum.c
int sum(int a, int b)
{
return a+b;
}
2,编译成为 LLVM-IR 汇编语言
clang sum.c -emit-llvm -S -c -o sum.ll
3,手工把 llvm IR 汇编语言解析成 bitcode
3.1,源码
gen_llvm_ir.cpp
#include <llvm/ADT/SmallVector.h>
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/CallingConv.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/ToolOutputFile.h>
using namespace llvm;
//手工解析 sum.ll 成为 bitcode
Module *makeLLVMModule()
{
Module *mod = new Module("sum.ll", getGlobalContext());
SmallVector<Type*, 2> FuncTyArgs;
// 设定两个参数的类型为 int32
FuncTyArgs.push_back(IntegerType::get(mod->getContext(), 32));
FuncTyArgs.push_back(IntegerType::get(mod->getContext(), 32));
// FunctionType.get(返回值类ing,参数类型)
FunctionType *FuncTy = FunctionType::get(IntegerType::get(mod->getContext(), 32), FuncTyArgs, false);
// define function sum return type;
//定义了函数对象,参数为函数原型FuncTy和函数名sum
Function *funcSum = Function::Create(FuncTy, GlobalValue::ExternalLinkage, "sum", mod);
//define Function object
//按照C语言的规范来调用本函数;其余的还有cuda 的PTX方式,或者openCL,x86 system V等。
funcSum->setCallingConv(CallingConv::C);
// 取参数,给参数命名
Function::arg_iterator args = funcSum->arg_begin();
// define arguments of sum()
Value *int32_a = args++;
int32_a->setName("a");
// name the first argument
Value *int32_b = args++;
int32_b->setName("b");
// name the second argument
// 创建一个entry label,然后按顺序往 BasicBlock 中塞语句;
BasicBlock *labelEntry = BasicBlock::Create(mod->getContext(), "entry", funcSum, 0);
// define entry of function
AllocaInst *ptrA = new AllocaInst(IntegerType::get(mod->getContext(), 32), "a.addr", labelEntry);
//给变量 a 分配内存空间, %a.addr = alloca i32, align 4
ptrA->setAlignment(4);// 设置4字节对齐
AllocaInst *ptrB = new AllocaInst(IntegerType::get(mod->getContext(), 32), "b.addr", labelEntry);
// %b.addr = alloca i32, align 4
ptrB->setAlignment(4);
StoreInst *st0 = new StoreInst(int32_a, ptrA, false, labelEntry);
// store i32 %a, i32* %a.addr, align 4
st0->setAlignment(4);
StoreInst *st1 = new StoreInst(int32_b, ptrB, false, labelEntry);
// store i32 %b, i32* %b.addr, align 4
st1->setAlignment(4);
LoadInst *ld0 = new LoadInst(ptrA, "", false, labelEntry);
// 将内存的数据载入寄存器: %0 = load i32* %a.addr, align 4
ld0->setAlignment(4);
LoadInst *ld1 = new LoadInst(ptrB, "", false, labelEntry);
// %1 = load i32* %b.addr, align 4
ld1->setAlignment(4);
BinaryOperator *addRes = BinaryOperator::Create(Instruction::Add, ld0, ld1, "add", labelEntry);
// 执行加法操作: %add = add nsw i32 %0, %1
ReturnInst::Create(mod->getContext(), addRes, labelEntry);
// ret i32 %add
return mod;
}
int main(int argc, char **argv)
{
Module *Mod = makeLLVMModule();// 将 llvm ir 源文件转化成了内存中的 bitcode 表达, 写入了 Module 之中。
std::string ErrorInfo;
// 定义输出文件
std::unique_ptr<tool_output_file> Out(new tool_output_file("./sum.bc", ErrorInfo, sys::fs::F_None));
if (!ErrorInfo.empty()) {
errs() << ErrorInfo << "\n";
return -1;
}
// 将内存中的 bitcode 格式写入输出文件,即压缩的 bitcode
WriteBitcodeToFile(Mod, Out->os());
Out->keep();
return 0;
}
Makefile
LLVM_CONFIG ?= llvm-config
#CXX := clang++
ifndef VERBOSE
QUIET :=@
endif
SRC_DIR ?= $(PWD)
LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
COMMON_FLAGS = -Wall -Wextra
CXXFLAGS += $(COMMON_FLAGS) $(shell $(LLVM_CONFIG) --cxxflags)
LCXX :=$(shell $(LLVM_CONFIG) --cxxflags)
CPPFLAGS += $(shell $(LLVM_CONFIG) --cppflags) -I$(SRC_DIR)
CLANGLIBS = \
-Wl,--start-group \
-lclang \
-lclangFrontend \
-lclangDriver \
-lclangSerialization \
-lclangParse \
-lclangSema \
-lclangAnalysis \
-lclangEdit \
-lclangAST \
-lclangLex \
-lclangBasic \
-Wl,--end-group
LLVMLIBS = $(shell $(LLVM_CONFIG) --libs)
PROJECT = gen_llvm_ir
PROJECT_OBJECTS = gen_llvm_ir.o
default: $(PROJECT)
%.o : $(SRC_DIR)/%.cpp
@echo Compiling $*.cpp
$(QUIET)$(CXX) -c $(CPFLAGS) $(CXXFLAGS) $<
$(PROJECT) : $(PROJECT_OBJECTS)
@echo Linking $@
$(QUIET)$(CXX) -o $@ $(LDFLAGS) $^ $(CLANGLIBS) $(LLVMLIBS) -lncurses
.PHONY: clean
clean:
$(QUIET)rm -f $(PROJECT) $(PROJECT_OBJECTS)
.PHONY: echo
echo:
@echo "CXX is $(CXX)"
@echo "LDFLAGS is $(LDFLAGS)}"
@echo "CXXFLAGS is $(CXXFLAGS)"
@echo "CPPFLAGS is $(CPPFLAGS)"
@echo "SRC_DIR is $(SRC_DIR)"
3.2,测试
注意:
使用 llvm-dis 出来的 sum.ll 文件中,最后的%add = add i32 %0, %1中,没有nsw关键字。