1.Pycparser介绍
项目链接:https://github.com/eliben/pycparser
Pycparser是C语言的解析器,支持完整的C99
标准,用纯Python
编写。
非常方便对C语言源码的解析和处理,如生成AST、提取源码中函数调用关系等。
Pycparser非常容易上手,需重点阅读examples
目录和c_ast.py
文件
2.源码解读
-
重要文件介绍
_c_ast.cfg
和c_ast.py
提供了C99的语法和实现,如_c_ast.cfg
对IF语句的描述:
If: [cond*, iftrue*, iffalse*]
表示If节点由三个子节点构成,分别是condition、iftrue、iffalse
,同BNF范式的描述 -
c_ast.py
中对If节点定义class If(Node): __slots__ = ('cond', 'iftrue', 'iffalse', 'coord', '__weakref__') def __init__(self, cond, iftrue, iffalse, coord=None): self.cond = cond self.iftrue = iftrue self.iffalse = iffalse self.coord = coord def children(self): nodelist = [] if self.cond is not None: nodelist.append(("cond", self.cond)) if self.iftrue is not None: nodelist.append(("iftrue", self.iftrue)) if self.iffalse is not None: nodelist.append(("iffalse", self.iffalse)) return tuple(nodelist) def __iter__(self): if self.cond is not None: yield self.cond if self.iftrue is not None: yield self.iftrue if self.iffalse is not None: yield self.iffalse attr_names = ()
发现
__init__
方法除了三个子节点,还多了一个coord节点,该节点是用来表示源码中节点的位置信息的,比如代码行号等。观察children方法,用
nodelist
管理子节点,每一个子节点用tuple表示,如条件语句用("cond",self.cond)
表示,self.cond
才是真实的条件节点,"cond"
是该节点的TAG。 -
提取源码中所有If节点的条件节点
参考examples文件,可实现在C语言源码中提取所有If节点的条件节点def find_If(node,if_list): if node is None: return if isinstance(node,c_ast.If): if_list.append(node.cond) # iterator its children for item in node.children(): # deep search # item is a tuple , item[0] is type, item[1] is Node t_node = item[1] if isinstance(t_node, c_ast.If): if_list.append(t_node.cond) find_If(t_node.iftrue,if_list) find_If(t_node.iffalse,if_list) else: find_If(t_node,if_list) filename = "notes.c" ifcondList = [] ast = parse_file(filename, use_cpp=True) find_If(ast,ifcondList)
提取If节点后,就可以做很多事情了,如输出所有条件语句的代码,实现如下:
from pycparser import c_generator generator = c_generator.CGenerator() for cond_tuple in ifcondList: cond_node = cond_tuple[1] #每一个Item由tuple组成,第二个元素才是真实Node cond_code = generator.visit(cond_node) #获取条件的代码 print(cond_code)
对条件节点的处理,还可继续分析,如提取条件中的常量、操作符等
3.实现cflow
工具中的函数调用关系功能
Pycparser有一个访问者模式的设计模式的应用,用来解析目标节点,具体使用可参考项目examples目录下的func_calls.py
和func_defs.py
文件
from __future__ import print_function
import sys
import re
import json
sys.path.extend(['.', '..'])
from pycparser import c_parser, c_ast, parse_file, c_generator
def extract_funcDef(node,defList):
if node is None:
return
childrens = [item[1] for item in node.children()]
for item in childrens:
if isinstance(item,c_ast.FuncDef):
defList.append(item)
else:
extract_funcDef(item,defList)
def extract_funcCall(node,funcList):
if isinstance(node, c_ast.Node): # for AST node
node = (node,None)
if node[0] is None:
return
childrens = [item[1] for item in node[0].children()]
for item in childrens:
if isinstance(item, c_ast.FuncCall):
funcList.append(item)
else:
extract_funcCall(item,funcList)
class FuncDefVisitor(c_ast.NodeVisitor):
def __init__(self,funcname,funcList):
self.funcname = funcname
self.funcList = funcList
def visit_FuncDef(self, node):
if node.decl.name == self.funcname:
extract_funcCall(node,self.funcList)
# print('%s at %s' % (node.decl.name, node.decl.coord))
def show_deflist(defList):
for defFunc in defList:
name = defFunc.decl.name
# print(name,defFunc.decl.coord)
# pass
def show_func_defs(ast, funcname,the_dict,invoke_dict):
# ast = parse_file(filename, use_cpp=True)
funcList = []
v = FuncDefVisitor(funcname,funcList)
v.visit(ast)
# print(len(funcList))
invoke_dict[funcname] = [func.name.name for func in funcList]
for func in funcList:
try:
the_dict[func.name.name].append(funcname)
except Exception as e:
the_dict[func.name.name] = [funcname]
# raise e
# print('funcDefs:',func.name.name,func.name.coord)
if __name__ == '__main__':
filename = "./codes/notes.c"
defList = []
the_dict = {}
invoke_dict = {}
ast = parse_file(filename, use_cpp=True)
extract_funcDef(ast,defList)
# print(len(defList))
show_deflist(defList)
nameList = [item.decl.name for item in defList]
for name in nameList:
show_func_defs(ast,name,the_dict,invoke_dict)
# parser(filename)
print('====Ref_dict====')
for k,v in the_dict.items():
print('{}:{}'.format(k,v))
print('====Invoke_dict====')
for k,v in invoke_dict.items():
print('{}:{}'.format(k,v))
得到的输出结果:
下一篇有空再介绍如何用Invoke_dict
生成调用图,需要用到graphviz
,此部分已经实现,可看效果图