背景:
我现在工作中要写个fuzz引擎,语法分析部分用到了clang的接口,打算写一些博客记录绍下,ast(抽象语法树),libtooling接口的使用等等
介绍
在本教程中,您将学习如何使用一个RecursiveASTVisitor
创建一个FrontendAction
,以查找指定目录下文件VarDecl AST
节点,并用rewriter接口修改变量的右值,重新写入文件
rewriter是什么?
rewriter是llvm libtooling的一个接口,功能是重新写入被解析的cpp文件(AST节点),可以用于cpp代码的重新编写,hook等技术
前置:
要编译安装llvm,具体步骤在这里
包含的头文件:
#include "clang/Rewrite/Core/Rewriter.h"
创建一个 FrontendAction
首先从MyAction开始,它继承自ASTFrontendAction 这个抽象基类
CreateASTConsumer创建消费者,里面初始化Rewriter
EndSourceFileAction最终要写入文件的接口
class MyAction : public ASTFrontendAction {
public:
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, StringRef) override {
Rewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
return std::make_unique<MyConsumer>(&CI.getASTContext(), Rewriter);
}
//这个接口是修改后,最终写入的接口
void EndSourceFileAction() override {
Rewriter.overwriteChangedFiles();
cout << "overwriteChangedFiles" << endl;
}
clang::Rewriter Rewriter;
};
创建一个 ASTConsumer
ASTConsumer将读取AST。它提供了许多接口,当某种类型的AST节点被解析时,或者在所有的翻译单元被解析之后,这些接口将被重写。
我们将重写ASTConsumer::HandleTranslationUnit,以便在获得文件所需的所有信息后读取AST
class MyConsumer : public ASTConsumer {
public:
explicit MyConsumer(ASTContext *Context, clang::Rewriter &Rewriter) : Context(Context), Rewriter(Rewriter) {}
void HandleTranslationUnit(ASTContext &Context) override {
MyVisitor visitor(Rewriter);
visitor.TraverseDecl(Context.getTranslationUnitDecl());
}
private:
ASTContext *Context;
clang::Rewriter &Rewriter;
};
使用 RecursiveASTVisitor
下一步是实现一个RecursiveASTVisitor来从AST中提取相关信息。
RecursiveASTVisitor为大多数 AST (llvm clang AST 介绍)节点提供bool VisitNodeType(NodeType *)形式的 hooks;异常是按值传递的TypeLoc节点。我们只需要实现相关节点类型的方法。
让我们从编写一个RecursiveASTVisitor开始,它访问所有的VarDecl,即遍历所有变量
class MyVisitor : public RecursiveASTVisitor<MyVisitor> {
public:
explicit MyVisitor(clang::Rewriter &Rewriter) : Rewriter(Rewriter) {}
bool VisitVarDecl(VarDecl *VD) {
.....
}
private:
clang::Rewriter &Rewriter;
};
合并代码
接下来我们把所有代码整合一下
#include <clang/AST/AST.h>
#include <clang/AST/RecursiveASTVisitor.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Tooling/CommonOptionsParser.h>
#include <clang/ASTMatchers/ASTMatchFinder.h>
#include <clang/Tooling/Tooling.h>
#include "clang/Rewrite/Core/Rewriter.h"
#include <iostream>
using namespace clang;
using namespace clang::tooling;
using namespace clang::ast_matchers;
using namespace llvm;
using namespace std;
const char *toolOverview = "demo";
static cl::OptionCategory optCat("demo Options");
bool isFileInDirectory(string& filePath, string& directoryPath) {
std::size_t found = filePath.find(directoryPath);
if (found != std::string::npos && (found == 0 || filePath[found - 1] == '/')) {
return true;
}
return false;
}
bool ends_with(const std::string& str, const std::string& suffix) {
if (suffix.size() > str.size()) {
return false;
}
return str.rfind(suffix) == str.size() - suffix.size();
}
class MyVisitor : public RecursiveASTVisitor<MyVisitor> {
public:
explicit MyVisitor(clang::Rewriter &Rewriter) : Rewriter(Rewriter) {}
bool VisitVarDecl(VarDecl *VD) {
SourceLocation srcLoc = VD->getLocation();
// 这部分是重点!!!!不然会写入到系统文件里!!!!
// 主要是排除其他系统文件,就是解析并重写被传入的cpp文件
ASTContext & context = VD->getASTContext();
SourceManager &SM = context.getSourceManager();
FileID FID = SM.getFileID(VD->getBeginLoc());
const FileEntry *FE = SM.getFileEntryForID(FID);
if (!FE) {
return true;
}
std::string FileName = FE->getName().str();
if (ends_with(FileName, ".c") || ends_with(FileName, ".cpp")) {
//被解析的源码路径
string TargetDirectory = "/home/cpp_parser/llvm-project-llvmorg-15.0.4/clang/tools/test";
// cout << FileName << endl;
if (isFileInDirectory(FileName, TargetDirectory)) {
} else {
return true;
}
}
else {
return true;
}
// 检查变量是否有初始化表达式
if (!VD->hasInit()) {
return true;
}
//打印ast节点
VD->dump();
//这部分代码,查找变量的初始化的右值,在右值上加上宏定义
std::string macroName = "UTF";
SourceRange InitRange = VD->getInit()->getSourceRange();
StringRef InitText = Lexer::getSourceText(
CharSourceRange::getTokenRange(InitRange), context.getSourceManager(),
context.getLangOpts());
std::string NewInit = macroName + "(" + InitText.str() + ")";
cout << "NewInit:" << NewInit << endl;
//重写cpp代码
Rewriter.ReplaceText(InitRange.getBegin(), NewInit);
return true;
}
private:
clang::Rewriter &Rewriter;
};
class MyConsumer : public ASTConsumer {
public:
explicit MyConsumer(ASTContext *Context, clang::Rewriter &Rewriter) : Context(Context), Rewriter(Rewriter) {}
void HandleTranslationUnit(ASTContext &Context) override {
MyVisitor visitor(Rewriter);
visitor.TraverseDecl(Context.getTranslationUnitDecl());
}
private:
ASTContext *Context;
clang::Rewriter &Rewriter;
};
class MyAction : public ASTFrontendAction {
public:
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI, StringRef) override {
Rewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
return std::make_unique<MyConsumer>(&CI.getASTContext(), Rewriter);
}
//这个接口是一定要写的
void EndSourceFileAction() override {
Rewriter.overwriteChangedFiles();
cout << "overwriteChangedFiles" << endl;
}
clang::Rewriter Rewriter;
};
int main(int argc, const char **argv) {
auto OptionsParserExpected = CommonOptionsParser::create(argc, argv, optCat, cl::OneOrMore, toolOverview);
if (!OptionsParserExpected) {
llvm::errs() << "Failed to parse options\n";
return 1;
}
CommonOptionsParser &OptionsParser = *OptionsParserExpected;
ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList());
return Tool.run(newFrontendActionFactory<MyAction>().get());
}
cmake文件信息
cmake_minimum_required(VERSION 3.4.3)
project(demo)
add_compile_options(-g -O0)
add_definitions(-w)
add_definitions(${Clang_DEFINITIONS})
set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} asmparser support mc)
set(CMAKE_CXX_COMPILER "clang++")
set(CMAKE_CXX_STANDARD 17)
find_package(LLVM REQUIRED HINTS "${LLVM_CMAKE_PATH}") #导入LLVMConfig.cmake
list(APPEND CMAKE_MODULE_PATH ${LLVM_DIR})
find_package(Clang REQUIRED)
message(STATUS "LLVM include dirs ${Clang_INCLUDE_DIRS}")
include_directories(${Clang_INCLUDE_DIRS} ${LLVM_MAIN_INCLUDE_DIR})
link_directories(${Clang_LIBRARY_DIRS})
add_executable(demo
demo.cpp
)
target_link_libraries(demo
PUBLIC
clangAST
clangASTMatchers
clangBasic
clangFrontend
clangSerialization
clangTooling
)
解析cpp文件代码
#include <string>
#include <iostream>
using namespace std;
int main(int argc, const char *argv[]){
string s = "abc";
return 0;
}
编译:
mkdir build
cd build
cmake..
make
运行
./demo /xxx/test.cpp
控制台打印信息
root@db7f363f8dd1:/home/cpp_parser/llvm-project-llvmorg-15.0.4/clang/tools/syn-analyze/demo2/build# ./demo /home/cpp_parser/llvm-project-llvmorg-15.0.4/clang/tools/syn-analyze/test/tmp_case5/e.cpp
Error while trying to load a compilation database:
Could not auto-detect compilation database for file "/home/cpp_parser/llvm-project-llvmorg-15.0.4/clang/tools/syn-analyze/test/tmp_case5/e.cpp"
No compilation database found in /home/cpp_parser/llvm-project-llvmorg-15.0.4/clang/tools/syn-analyze/test/tmp_case5 or any parent directory
fixed-compilation-database: Error while opening fixed database: No such file or directory
json-compilation-database: Error while opening JSON database: No such file or directory
Running without flags.
/home/cpp_parser/llvm-project-llvmorg-15.0.4/clang/tools/syn-analyze/test/tmp_case5/e.cpp
/home/cpp_parser/llvm-project-llvmorg-15.0.4/clang/tools/syn-analyze/test/tmp_case5/e.cpp
/home/cpp_parser/llvm-project-llvmorg-15.0.4/clang/tools/syn-analyze/test/tmp_case5/e.cpp
VarDecl 0x55f55e2b6090 </home/cpp_parser/llvm-project-llvmorg-15.0.4/clang/tools/syn-analyze/test/tmp_case5/e.cpp:6:2, col:13> col:9 s 'std::string':'std::basic_string<char>' cinit destroyed
`-ExprWithCleanups 0x55f55e2b62c0 <col:9, col:13> 'std::string':'std::basic_string<char>'
`-CXXConstructExpr 0x55f55e2b6290 <col:9, col:13> 'std::string':'std::basic_string<char>' 'void (std::basic_string<char> &&) noexcept' elidable
`-MaterializeTemporaryExpr 0x55f55e2b6278 <col:13> 'std::string':'std::basic_string<char>' xvalue
`-CXXBindTemporaryExpr 0x55f55e2b6258 <col:13> 'std::string':'std::basic_string<char>' (CXXTemporary 0x55f55e2b6258)
`-ImplicitCastExpr 0x55f55e2b6238 <col:13> 'std::string':'std::basic_string<char>' <ConstructorConversion>
`-CXXConstructExpr 0x55f55e2b6200 <col:13> 'std::string':'std::basic_string<char>' 'void (const char *, const std::allocator<char> &)'
|-ImplicitCastExpr 0x55f55e2b6118 <col:13> 'const char *' <ArrayToPointerDecay>
| `-StringLiteral 0x55f55e2b60f8 <col:13> 'const char[4]' lvalue "abc"
`-CXXDefaultArgExpr 0x55f55e2b61e0 <<invalid sloc>> 'const std::allocator<char>':'const std::allocator<char>' lvalue
NewInit:UTF(s = "abc")
overwriteChangedFiles
待续:
接下来我会介绍ASTVisitor使用,遍历其他ast节点,全局变量,函数,类,结构体,模板函数等等,
欢迎关注专栏: