LLVM CFG/DFG控制流图和数据流图可视化

最新推荐文章于 2024-06-03 09:35:13 发布

Assiduousss

最新推荐文章于 2024-06-03 09:35:13 发布

阅读量5k

点赞数 4

文章标签： c++ 开发语言 linux

本文链接：https://blog.csdn.net/weixin_43854617/article/details/127757060

版权

1.引言

由于最近在学习数据流分析的相关知识，记录一下利用LLVM生成CFG和DFG的学习过程，参考文献和网址放在文章末尾。

2.实验环境

操作系统：Ubuntu 20.04.3 LTS 64bit；

硬件设备：Intel® Celeron(R) CPU N3450 @ 1.10GHz × 4

AMD® Hainan / Mesa Intel® HD Graphics 500 (APL 2)

LLVM 10.0.0-4

Clang version 10.0.0-4ubuntu1

3.实验记录

3.1 实验步骤

1.首先利用LLVM的编译前端Clang将高级语言程序（C/C++）转换成LLVM IR中间表示结果；

2.利用事先写好的分析Pass（分析Pass写好后需要进行重新编译LLVM，然后在LLVM的lib库中生成运行时的库.so文件），具体的CFG生成Pass的伪代码如下所示，主要流程是遍历整个函数，找到基本块然后对每个基本块进行遍历，找到目标操作符，锁定目标操作符的行号上下级信息完成控制流信息的获取；

DFG的分析Pass类似CFG分析Pass，基本步骤一样，但是根据我在网上看到一些调用命令，发现CFG的生成好像可以直接利用LLVM的opt工具获得dot文件，但是DFG的似乎没有，因为本人刚接触这个工具，所以不是很熟悉，知道的朋友可以在评论区补充一下，关于CFG/DFG的分析Pass放在文章的附录部分，有需要的可以自行查看。

3.利用该Pass分析程序的控制流信息和程序调用信息，并通过LLVM的opt工具调用分析Pass生成的.so文件生成.dot文件；

4.由于LLVM没有可视化工具，需要借助第三方可视化平台graphviz，利用其dot文件转化工具将.dot转化为.png或.pdf文件，安装命令如下：

sudo apt-get install -y graphviz-doc libgraphviz-dev graphviz

3.2 实验结果

本文实验程序如下所示：

//file test.c
#include<srdio.h>
int add(int c,int e){
    return c+e;
}
int main(){
    int a = 10;
    int b = a;
    return add(a,b);
}

首先通过Clang生成LLVM IR文件：

Clang -S -emit-llvm test.c -o test.ll

生成调用的控制流图：

opt -dot-callgraph test.ll

根据生成的dot文件名称，使用以下命令来生成图片：

dot callgraph.dot -Tpng -o testgraph.png #生成调用图片

得到的控制流图如下：

接下来获取函数内部指令调度的命令如下：

opt -dot-cfg test.ll #生成dot文件

运行后，会在当前的文件夹目录下得到一个.dot文件，不同版本的LLVM得到的.dot文件可能不一样，LLVM10.0.0得到的文件名为.main.dot，然后输入以下命令生成图片：

dot .main.dot -Tpng -o cc.png

得到以下IR指令的控制流图：

接下来获取程序的DFG图，同样是在程序转换成中间代码LLVM IR的基础上进行操作，首先将自己写好的分析Pass进行编译，生成.so文件放在build文件夹中。使用如下命令：

opt -load /home/lwq/Desktop/LLVM-CFG-DFG-pass-master/CDFG/DFGPass/build/DFG/libLLVMDFG.so -DFGPass<test.ll> /dev/null

其中load指令后面为编译后文件所在位置，接下来会得到不同函数的.dot文件，选择all.dot文件，输入以下命令：

dot -Tpng all.dot -o ccc.png

则得到下面的DFG图

4.总结

这些简单的CFG/DFG图生成只是博主简单调用了一些相关工具，在此基础上可以进行代码优化和改进，或者写出一些效率更好的Pass，并且由于LLVM安装过程中的编译问题，似乎只有Debug版本的LLVM才可以使用可视化的命令，本来还想利用LLC工具中的llc -view-combine1-dags test.ll输出程序的DAG图，但是本人是直接使用sudo apt install llvm命令下载的LLVM工具，据说是release版本，所以我调用LLC工具的时候发现并没有上述的DAG指令，因此感兴趣的朋友可以自己试试生成DAG图，成功的话可以在评论区告诉一下我。

最后DAGpass分析的程序如下：

#include"graph.h"

using namespace llvm;
namespace {

	struct DFGPass : public ModulePass {
	public:
		static char ID;
		map<string, Graph*> DFGs;
		map<string, Graph*> CFGs;

		DFGPass() : ModulePass(ID) {}

		bool runOnModule(Module &M) override {
			for (Module::iterator iter_F = M.begin(), FEnd = M.end(); iter_F != FEnd; ++iter_F) {
				Function *F = &*iter_F;
				Graph* control_flow_G = new Graph(F);
				Graph* data_flow_G = new Graph(F);
				// F->viewCFG();
				DFGs.insert(pair<string, Graph*>(F->getName().str(), data_flow_G));
				CFGs.insert(pair<string, Graph*>(F->getName().str(), control_flow_G));

				control_flow_G->head.push_back(pair<Value*, Value*>(&*(F->begin())->begin(), &*(F->begin())->begin()));
				for (Function::iterator BB = F->begin(), BEnd = F->end(); BB != BEnd; ++BB) {
					BasicBlock *curBB = &*BB;
					for (BasicBlock::iterator II = curBB->begin(), IEnd = curBB->end(); II != IEnd; ++II) {
						Instruction* curII = &*II;
						switch (curII->getOpcode())
						{
							// for the case of load operation, we should save the value of it
							case llvm::Instruction::Load:
							{
								LoadInst* linst = dyn_cast<LoadInst>(curII);
								Value* loadValPtr = linst->getPointerOperand();
								insert(data_flow_G, pair<Value*, Value*>(loadValPtr, curII));
								break;
							}
							// for the case of store operation, both of the pointer and value should be recoded
							case llvm::Instruction::Store: {
								StoreInst* sinst = dyn_cast<StoreInst>(curII);
								Value* storeValPtr = sinst->getPointerOperand();
								Value* storeVal = sinst->getValueOperand();
								insert(data_flow_G, pair<Value*, Value*>(storeVal, curII));
								insert(data_flow_G, pair<Value*, Value*>(curII, storeValPtr));
								data_flow_G->head.push_back(pair<Value*, Value*>(storeValPtr, storeVal));
								break;
							}

							case llvm::Instruction::Call: {
								CallInst* cinst = dyn_cast<CallInst>(curII);
								string f_name = cinst->getCalledFunction()->getName();
								for(auto iter = DFGs[f_name]->F->arg_begin(), iter_end = DFGs[f_name]->F->arg_end(); iter != iter_end; iter++){
									data_flow_G->link.push_back(pair<Value*, Value*>(cinst, iter));
									errs()<<*cinst<<cinst<<"->"<<*iter<<iter<<"\n";
									// insert(data_flow_G, pair<Value*, Value*>(cinst, iter));
								}
								if(!DFGs[f_name]->F->doesNotReturn()){
									Value* ret_i = &*(--(--DFGs[f_name]->F->end())->end());
									data_flow_G->link.push_back(pair<Value*, Value*>(ret_i, cinst));
									// insert(data_flow_G, pair<Value*, Value*>(ret_i, cinst));
								}
							}
							// for other operation, we get all the operand point to the current instruction
							default: {
								for (Instruction::op_iterator op = curII->op_begin(), opEnd = curII->op_end(); op != opEnd; ++op)
								{
									Instruction* tempIns;
									if (dyn_cast<Instruction>(*op))
									{
										insert(data_flow_G, pair<Value*, Value*>(op->get(), curII));
									}
								}
								break;
							}
						}
						BasicBlock::iterator next = II;
						++next;
						if (next != IEnd) {
							insert(control_flow_G, pair<Value*, Value*>(curII, &*next));
						}
					}

					Instruction* terminator = curBB->getTerminator();
					for (BasicBlock* sucBB : successors(curBB)) {
						Instruction* first = &*(sucBB->begin());
						insert(control_flow_G, pair<Value*, Value*>(terminator, first));
					}
				}
				writeFileByGraph(F);
			}

			// NOTWITHCFHG indicate the fianl graph represents no CFG information
			writeFileByGraphGloble(NOTWITHCFG);
			errs()<<"end\n";
			return false;
		}

		void DFS_plot(Edge* v, Graph* G, raw_fd_ostream& file)
		{
			Edge* p = v;
			while (p)
			{
				if (mark.find(pair<int, int>(p->v_from, p->v_to)) == mark.end()) 
				{
					mark.insert(pair<int, int>(p->v_from, p->v_to));
					file << "\tNode" << G->v[p->v_from]->va << " -> Node" << G->v[p->v_to]->va << "\n";
					DFS_plot(G->v[p->v_to]->first_out, G, file);
				}
				p = p->out_edge;
			}
		}

		void writeFileByGraph(Function *F){
			std::error_code error;
			enum sys::fs::OpenFlags F_None;
			StringRef fileName(F->getName().str() + ".dot");
			raw_fd_ostream file(fileName, error, F_None);
			Graph* data_flow_G =  DFGs[F->getName().str()];
			Graph* control_flow_G = CFGs[F->getName().str()];

			file << "digraph \"DFG for'" + F->getName() + "\' function\" {\n";
			for (auto node_iter = DFGs[F->getName()]->v.begin(), node_end = DFGs[F->getName()]->v.end(); node_iter != node_end; ++node_iter) 
			{
				Value* p = (*node_iter)->va;
				if(isa<Instruction>(*p))
				{
					file << "\tNode" << p << "[shape=record, label=\"" << *p << "\"];\n";
				}
				else
				{
					file << "\tNode" << p << "[shape=ellipse, label=\"" << *p << "\\l" << p << "\"];\n";
				}
			}
			// plot the instruction flow edge
			mark.clear();
			for(auto iter = control_flow_G->head.begin(), iter_end = control_flow_G->head.end(); iter != iter_end; iter++){
				DFS_plot(control_flow_G->v[find(control_flow_G->v, iter->second)]->first_out, control_flow_G, file);
			}

			// plot the data flow edge
			file << "edge [color=red]" << "\n";
			mark.clear();
			for(auto iter = data_flow_G->head.begin(), iter_end = data_flow_G->head.end(); iter != iter_end; iter++){
				DFS_plot(data_flow_G->v[find(data_flow_G->v, iter->second)]->first_out, data_flow_G, file);
			}
			file << "}\n";
			file.close();
		}

		void writeFileByGraphGloble(Mode m){
			std::error_code error;
			enum sys::fs::OpenFlags F_None;
			StringRef fileName("all.dot");
			raw_fd_ostream file(fileName, error, F_None);

			file << "digraph \"DFG for all\" {\n";
			for(auto F_iter = DFGs.begin(), F_iter_end = DFGs.end(); F_iter != F_iter_end; F_iter++){
				Graph* data_flow_G =  DFGs[F_iter->first];
				Graph* control_flow_G = CFGs[F_iter->first];
				auto nodes = F_iter->second->v;
				for (auto node_iter = nodes.begin(), node_end =  nodes.end(); node_iter != node_end; ++node_iter) 
				{
					Value* p = (*node_iter)->va;
					if(isa<Instruction>(*p))
					{
						file << "\tNode" << p << "[shape=record, label=\"" << *p << "\"];\n";
					}
					else
					{
						file << "\tNode" << p << "[shape=ellipse, label=\"" << *p << "\\l" << p << "\"];\n";
					}
				}
				// plot the instruction flow edge
				if(m != NOTWITHCFG){
					file << "edge [color=black]" << "\n";
					mark.clear();
					for(auto iter = control_flow_G->head.begin(), iter_end = control_flow_G->head.end(); iter != iter_end; iter++){
						DFS_plot(control_flow_G->v[find(control_flow_G->v, iter->second)]->first_out, control_flow_G, file);
					}
				}

				// plot the data flow edge
				vector<string> color_set = {"red", "blue", "cyan", "orange", "yellow"};
				mark.clear();
				int count = 0;
				for(auto iter = data_flow_G->head.begin(), iter_end = data_flow_G->head.end(); iter != iter_end; iter++){
					file << "edge [color=" << color_set[count++] << "]" << "\n";
					DFS_plot(data_flow_G->v[find(data_flow_G->v, iter->second)]->first_out, data_flow_G, file);
				}

				for(auto iter = data_flow_G->link.begin(), iter_end = data_flow_G->link.end(); iter != iter_end; iter++){
					file << "edge [color=grey]" << "\n";
					file << "\tNode" << iter->first << " -> Node" << iter->second << "\n";
					errs() << *iter->first << *iter->second << "\n";
				}
			}
			file << "}\n";
			file.close();
		}

	};
}

char DFGPass::ID = 0;
static RegisterPass<DFGPass> X("DFGPass", "DFG Pass Analyse",
	false, false
);