[python] 解析源码中的structs，并建立它们之间的引用关系

最新推荐文章于 2024-06-06 13:20:02 发布

巴山独钓

最新推荐文章于 2024-06-06 13:20:02 发布

阅读量2k

点赞数

分类专栏： python 文章标签： python struct dictionary regex header graphviz

本文链接：https://blog.csdn.net/tankles/article/details/6976429

版权

python 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

阅读开源软件过程中，如果各数据结构（struct）之间引用关系比较简单时，很容易理解，但如果引用关系非常复杂后，将给我们的理解过程带来很大的挑战。

本文将引入一个python脚本，主要用来提取源码中的struct结构定义，建立相应的引用关系，然后通过一些图形化工具（这里采用了graphviz的dot工具），将引用关系图形化，这样对理解源码具有一定的帮助。

首先，看看具体的效果，特此写了一个简单的struct定义和引用实例。

实例源码：

typedef struct hello1_s hello1_t ;
typedef struct hello1_s
{
  int a ;
  unsigned short b;          // comments                                         
  char c ;

};

typedef struct hello2_s
{
  char c ;
  hello1_t h1 ;           /* comments */

}hello2_t ;

typedef struct hello3_s
{
  char c ;
  struct hello2_s h2 ;
} ;

typedef struct
{
  char c ;
  hello2_t h2 ;

} hello4_t ;

typedef struct hello5_s
{
  char c ;
  hello1_t h1 ;
  hello4_t h4 ;

} hello5_t ;

...

通过python脚本抽取structs定义及其相互引用信息，保存为*.dot脚本，然后通过graphviz的dot工具将产生的*.dot脚本转换为图形形式。

具体的操作步骤如下：

python graphviz_struct.py <dir>

dot -Tpng <dir>/dots/bigraph.dot > <dir>/dots/bigraph.png

产生的效果图如下：

这样struct之间的引用关系将一目了然，对于我们理解源码具有非常好的帮助作用。

python脚本为：

#!/usr/bin/env python 

import re
import os
import sys
import struct_to_dot
#import basic_defined_type                                                                                                          

# regex for whole struct regex                                                                                                      
# Such as:                                                                                                                          
# typedef struct hello_s                                                                                                            
# {                                                                                                                                 
#    ...                                                                                                                            
#    ...                                                                                                                            
# } hello_t ;                                                                                                                       
#                                                                                                                                   
# '?': minimum match                                                                                                                
#struct_regex = r'(?:typedef)*[ \t]+struct[ \t]+(\w*)[ \t\n]*{(.*?)}[ \t\n]*(\w*)[ \t\n];'                                          
struct_regex = r'struct[ \t]+(\w*)[ \t\n]*{(.*?)}[ \t\n]*(\w*)[ \t]*;'
struct_pattern = re.compile(struct_regex, re.DOTALL)
#struct_pattern = re.compile(r'[ \t\n]*typedef[ \t]+struct[ \t]+(\w+)[ \t\n]*{(.*?)}[ \t\n]*(\w+)[ \t\n];', re.DOTALL)              

# regex for declare sentences                                                                                                       
# 'int a;' --> ('int', 'a')                                                                                                         
# 'unsigned int a ;'  --> ('unsigned int', 'a')                                                                                     
#sentence_regex = r'[ \t]*([ \w]+)[ \t]+(\w+)[ \t]*;'                                                                               
sentence_regex =  r'[ \t]*(.*)[ \t]+([\*\w]+)[ \t]*;'
sentence_pattern = re.compile(sentence_regex)

sentence_regex2 = r'[ \t]*struct[ \t]+(\w+)[ \t]*'
sentence_pattern2 = re.compile(sentence_regex2)
# regex for typedef struct, such as:                                                                                                
# typedef struct hello_s hello_t ;                                                                                                  
typedef_struct_regex = r'typedef[ \t]+struct[ \t]+(\w+)[ \t]+(\w+)[ \t]*;'
typedef_struct_pattern = re.compile(typedef_struct_regex)

TARGET_FILE_EXT_regex = r'.*(\.h|\.hpp|\.c|\.cpp|\.cx|\.cxx|\.cc)

def walk_dir(dir, file_list, topdown=True):
    for root, dirs, files in os.walk(dir, topdown):
        for name in files:
            if re.match(TARGET_FILE_EXT_regex, name):
                file_list.append(os.path.join(root,name))
        for name in dirs:
            if re.match(TARGET_FILE_EXT_regex, name):
                file_list.append(os.path.join(root, name))

def build_names_dictionary(src_filename, names_Dictionary, alias_Dictionary):
    # read from file                                                                                                                
    fd_read = open(src_filename, 'r')
    orig_source_text = fd_read.read()
    fd_read.close()

    # foreach file, build the alias mapping firstly                                                                                 
    source_text = orig_source_text
    while True:
        typedef_struct_match = typedef_struct_pattern.search(source_text)
        if typedef_struct_match:
            struct_name = typedef_struct_match.group(1)
            struct_alias_name = typedef_struct_match.group(2)
            struct_alias_name.strip()
            alias_Dictionary[struct_alias_name] = struct_name
            #print typedef_struct_match.groups()                                                                                    
            source_text = source_text[typedef_struct_match.end():]
        else:
            break

    source_text = orig_source_text
    while True:
        #    struct_match = struct_patten.match(text) # match: match the begining of text                                           
        struct_match = struct_pattern.search(source_text) # use search to locate the RE matched position                            
        if struct_match:
            source_text = source_text[struct_match.end():]

            #print struct_match.groups()                                                                                            
            struct_name = struct_match.group(1)  # null is impossible, either 'struct name' or 'struct_alias_name' is not null      
            struct_name = struct_name.strip()

            # struct_body = struct_match.group(2)                                                                                   
            struct_alias_name = struct_match.group(3)
            struct_alias_name = struct_alias_name.strip()

            # typedef struct {                                                                                                      
            #    ...                                                                                                                
            # }struct_name;                                                                                                         
            if len(struct_name) == 0:
                struct_name = struct_alias_name
                struct_alias_name = ""

            # add to struct names mapping                                                                                           
            names_Dictionary[struct_name] = 1

            # add to the alias mapping                                                                                              
            if len(struct_alias_name)>0:
                alias_Dictionary[struct_alias_name] = struct_name
        else:
            #print 'None'                                                                                                           
            break

def struct_to_dot_subgraph(src_filename, names_Dictionary, alias_Dictionary, fd_write):
    # read from file                                                                                                                
    fd_read = open(src_filename, 'r')
    source_text = fd_read.read()
    fd_read.close()

    while True:
        # struct_match = struct_patten.match(text) # match: match the begining of text                                              
        struct_match = struct_pattern.search(source_text) # use search to locate the RE matched position                            
        if struct_match:
            source_text = source_text[struct_match.end():]

            #print struct_match.groups()                                                                                            
            struct_name = struct_match.group(1)  # null is impossible, either 'struct name' or 'struct_alias_name' is not null      
            struct_name = struct_name.strip()

            struct_body = struct_match.group(2)
            struct_alias_name = struct_match.group(3)
            struct_alias_name = struct_alias_name.strip()
            # typedef struct {                                                                                                      
            #    ...                                                                                                                
            # }struct_name;                                                                                                         
            #                                                                                                                       
            if len(struct_name) == 0:
                struct_name = struct_alias_name
                struct_alias_name = ""

            # subgraph header                                                                                                       
            struct_to_dot.dot_add_subgraph_header(fd_write, struct_name)

            link_string = ''
            # parse struct body                                                                                                     
            index = 0
            while True:
                sentence_match = sentence_pattern.search(struct_body)
                if sentence_match:
                    struct_body = struct_body[sentence_match.end():]

                    #print sentence_match.groups()                                                                                  
                    sentence_type = sentence_match.group(1)
                    sentence_type = sentence_type.strip()

                    sentence_var = sentence_match.group(2)
                    sentence_var = sentence_var.strip()

                    # record one subgraph attribute                                                                                 
                    index = index + 1
                   struct_to_dot.dot_add_subgraph_body(fd_write, index, sentence_type, sentence_var)

                    # if sentence looks like 'struct xxxx', then parse the 'xxxx' type                                              
                    m = sentence_pattern2.match(sentence_type)
                    if m:
                        sentence_type = m.group(1)

                    # if the 'sentence_type' is not a pre-defined type, add link                                                    
                    #if len(sentence_type)>0 and  not sentence_type in basic_defined_type.Dictionary:                               
                        # find out the original name                                                                                
                    while len(sentence_type)>0 and sentence_type in alias_Dictionary:
                        sentence_type_new = alias_Dictionary[sentence_type]
                        if sentence_type_new == sentence_type:
                            break
                        sentence_type = sentence_type_new

                    # print '#', sentence_type, '#'                                                                                 
                    #if len(sentence_type)>0:                                                                                       
                    if len(sentence_type)>0 and sentence_type in names_Dictionary:      # known structs                             
                        link_string = link_string + struct_to_dot.dot_add_subgraph_link(struct_name, index, sentence_type)
                        # print 'in names_Dictionary'                                                                               
                        #print link_string                                                                                          
                else:
                    break

            # subgraph tail                                                                                                         
            struct_to_dot.dot_add_subgraph_tail(fd_write)


            # append link info                                                                                                      
            #print link_string                                                                                                      
            struct_to_dot.dot_append_subgraph_link(fd_write, link_string)

        else:
            #print 'None'                                                                                                           
            break

def whole_dot_process():

    if len(sys.argv) <= 1 or len(sys.argv) > 3:
        print 'Usage: sys.argv[0] <src_path> '
        sys.exit()

    src_path = sys.argv[1]
    if src_path[-1] == '/':
        src_path = src_path[0:-1]

    DOT_ROOT = src_path + '/dots/'
    if not os.path.isdir(DOT_ROOT):
        os.mkdir(DOT_ROOT)

    # struct names Dictionary                                                                                                       
    names_Dictionary = {}
    # alias_Dictionary: mapping from hello_t to hello_s in ' typedef struct hello_s hello_t;'                                       
    alias_Dictionary = {}

    # file list                                                                                                                     
    files_list = []
    walk_dir(src_path, files_list, True)

    for filename in files_list:
        #print filename                                                                                                             
        build_names_dictionary(filename, names_Dictionary, alias_Dictionary)

    #target_name = os.path.basename(src_path)                                                                                       
    #if len(target_name) == 0:                                                                                                      
    target_name = 'bigraph'

    dst_filename = DOT_ROOT + target_name + '.dot'
    #print "create dot file:", dst_filename                                                                                         
    fd_write = open(dst_filename, 'w')

    struct_to_dot.dot_digraph_header(fd_write, target_name)

    for filename in files_list:
        #print 'processing:', filename                                                                                              
        struct_to_dot_subgraph(filename, names_Dictionary, alias_Dictionary, fd_write)
        # print 'processing ', filename, 'finished.'                                                                                

    struct_to_dot.dot_digraph_tail(fd_write)
    print "create dot file:", dst_filename

print 'process beginning ...'
whole_dot_process()
print 'process finished.'

/pre>

	         import struct_to_dot 语句导入的struct_to_dot.py脚本如下：


	   

#!/usr/bin/env python                                                                                               
# graph example: 
#f = open('sssssss.dot', 'w') 
#dot_digraph_header(f, 'hello')
#dot_add_subgraph_header(f, 'hello') 
#dot_add_subgraph_body(f, 1, 'struct A', 'a') 
#dot_add_subgraph_body(f, 2, 'int', 's')
#dot_add_subgrap_tail(f)
#dot_digraph_tail(f)           
#f.close()                                                                                                          
digraph_header = r'''               
digraph graph_%s {
  node [shape=record fontsize=12 fontname=Courier style=filled];                  
  edge[color=blue];                  
  rankdir=LR;    
'''
digraph_tail = r'''
}                                                                                                                                   
'''

subgraph_header = r'''                                                                                                              
  subgraph struct_%s {                                                                                                              
    node [shape=record fontsize=12 fontname=Courier style=filled]; 
    color = lightgray;
    style = filled;  
    label = "%s" ;   
    edge[color="#2e3436"];   
    node_%s[shape=record label="\  
      <f0>*** STRUCT %s ***'''

subgraph_body = r'''|\ 
      <f%d> %s %s'''

subgraph_tail = r'''\ 
    "];           
  }               
'''

subgraph_link = r''' 
    node_%s:f%d -> node_%s:f0[color=brown] 
'''
def dot_digraph_header(f, struct_name):
    f.write(digraph_header % (struct_name))

def dot_digraph_tail(f):
    f.write(digraph_tail)

def dot_add_subgraph_header(f, struct_name):
    f.write(subgraph_header % (struct_name, struct_name, struct_name, struct_name))

def dot_add_subgraph_body(f, index, type_, variable):
    f.write(subgraph_body % (index, type_, variable))

def dot_add_subgraph_link(struct_name, i, link_name):
    return subgraph_link % (struct_name, i, link_name)

def dot_add_subgraph_tail(f):
    f.write(subgraph_tail)

def dot_append_subgraph_link(f, string):
    f.write(string)

#f = open('sssssss.dot', 'w')     
#dot_digraph_header(f, 'hello')   
#                 
#dot_add_subgraph_header(f, 'hello')   
#dot_add_subgraph_body(f, 1, 'struct A', 'a')   
#dot_add_subgraph_body(f, 2, 'int', 's')      
#                 
#dot_add_subgraph_tail(f)   
#dot_digraph_tail(f)  
#                 
#f.close()                                                                                                                          



     熟悉脚本前建议熟悉一下graphviz工具的使用及dot脚本，引用python脚本主要就是自动化提取struct及引用关系，产生dot脚本的过程。
        由于刚刚接触python及re，并没有考虑过多考虑效率问题，但是经过memcached和nginx等开源软件的测试，处理速度还是非常快的，另外re对于抽取嵌套定义其它包含"{..}"（如：struct，union）的struct时，存在错误。