【分析】生成dalvik解释器原文件的脚本：gen-mterp.py

最新推荐文章于 2024-04-02 06:32:09 发布

原创最新推荐文章于 2024-04-02 06:32:09 发布 · 2.8k 阅读

2 ·

CC 4.0 BY-SA版权

文章标签：

#dalvik #gen-mterp.py

Android研究专栏收录该内容

11 篇文章

订阅专栏

本文深入探讨了gen-mterp.py脚本的功能，该脚本用于根据特定架构配置文件生成Dalvik解释器的C和汇编源码。文章详细分析了脚本的主要组件，包括主要代码段、配置文件处理、时序图以及关键函数的作用，特别是如何解析命令行参数、提取opcode列表和处理配置文件以生成所需的源码。

源码参考：进入

路径：dalvik/vm/mterp/gen-mterp.py

----------------------------------------------------------------------------------------------------

概述

gen-mterp.py根据特定体系结构配置文件，生成dalvik解释器的C和汇编源码。

portable模式

portable模式的配置文件分析可以参考config-portable文件。

时序图

代码分析

首先进入main代码，解析命令行参数，然后提取opcode列表：

#
# ===========================================================================
# "main" code
#

# @param[in] target_arch 要生成的模式。
#            如：portable。输入portable时，程序就会去找同目录下的config-portable配置文件。
# @param[in] output_dir 输出目录。
#
# Check args.
#
if len(sys.argv) != 3:
    print "Usage: %s target-arch output-dir" % sys.argv[0]
    sys.exit(2)

target_arch = sys.argv[1]
output_dir = sys.argv[2]

# 提取 opcode 列表。
#
# Extract opcode list.
#
opcodes = getOpcodeList()
......

getOpcodeList()函数：

# 读取文件，获得goto表。
#
# Extract an ordered list of instructions from the VM sources.  We use the
# "goto table" definition macro, which has exactly kNumPackedOpcodes
# entries.
#
def getOpcodeList():
    opcodes = []
    # 打开定义opcode的文件。
    opcode_fp = open(interp_defs_file)
    # 利用正则表达式匹配行，如：H(OP_NOT)
    opcode_re = re.compile(r"^\s*H\(OP_(\w+)\),.*", re.DOTALL)
    for line in opcode_fp:
        match = opcode_re.match(line)
        if not match:
            continue    # 如果不匹配，continue
        # 如果匹配，则在opcodes列表中添加一条记录。
        opcodes.append("OP_" + match.group(1))
    opcode_fp.close()

    # 如果列表中元素的个数与kNumPackedOpcodes的值不相等，则抛出异常。
    if len(opcodes) != kNumPackedOpcodes:
        print "ERROR: found %d opcodes in Interp.h (expected %d)" \
                % (len(opcodes), kNumPackedOpcodes)
        raise SyntaxError, "bad opcode count"
    return opcodes

interp_defs_file的值在文件的开始处定义： interp_defs_file = "../../libdex/DexOpcodes.h"。打开 DexOpcodes.h 文件，读取匹配的行，它会匹配这个文件中下面以空格和’H’开头的行：

/*
 * Macro used to generate a computed goto table for use in implementing
 * an interpreter in C.
 */
#define DEFINE_GOTO_TABLE(_name) \
    static const void* _name[kNumPackedOpcodes] = {                      \
        /* BEGIN(libdex-goto-table); GENERATED AUTOMATICALLY BY opcode-gen */ \
        H(OP_NOP),                                                            \
        H(OP_MOVE),                                                           \
        H(OP_MOVE_FROM16),                                                    \
        H(OP_MOVE_16),                                                        \
        ......

匹配结束后，opcodes中的元素是这种格式的：OP_NOP、OP_MOVE……也就是说去掉了”H”和括号。

继续看main代码：

#
# 打开配置文件。
#
try:
    config_fp = open("config-%s" % target_arch)
except:
    print "Unable to open config file 'config-%s'" % target_arch
    sys.exit(1)

# 打开两个输出文件，一个是 C 文件，一个是汇编文件。
#
# 打开并准备输出文件。
#
try:
    c_fp = open("%s/InterpC-%s.cpp" % (output_dir, target_arch), "w")
    asm_fp = open("%s/InterpAsm-%s.S" % (output_dir, target_arch), "w")
except:
    print "Unable to open output files"
    print "Make sure directory '%s' exists and existing files are writable" \
            % output_dir
    # Ideally we'd remove the files to avoid confusing "make", but if they
    # failed to open we probably won't be able to remove them either.
    sys.exit(1)

print "Generating %s, %s" % (c_fp.name, asm_fp.name)

file_header = """/*
 * This file was generated automatically by gen-mterp.py for '%s'.
 *
 * --> DO NOT EDIT <--
 */

""" % (target_arch)

# 写文件头部。
c_fp.write(file_header)
asm_fp.write(file_header)
......

上面的代码中先打开了两个输出文件，一个是 C 文件，一个是汇编文件。然后将 file_header变量保存的内容写入文件。
继续，处理配置文件：

#
# 处理配置文件。
#
failed = False
try:
    # 循环逐行读取配置文件内容。
    for line in config_fp:
        line = line.strip()         # remove CRLF, leading spaces
        tokens = line.split(' ')    # tokenize
        #print "%d: %s" % (len(tokens), tokens)
        if len(tokens[0]) == 0:
            #print "  blank"
            pass
        elif tokens[0][0] == '#':
            #print "  comment"
            pass
        else:
            if tokens[0] == "handler-size":
                setHandlerSize(tokens)
            elif tokens[0] == "import":
                importFile(tokens)  # 对配置文件中的import导入的文件进行解析，然后写入到输出文件。
            elif tokens[0] == "asm-stub":
                setAsmStub(tokens)
            elif tokens[0] == "asm-alt-stub":
                setAsmAltStub(tokens)
            elif tokens[0] == "op-start":
                opStart(tokens)
            elif tokens[0] == "op-end":
                opEnd(tokens)
            elif tokens[0] == "alt":
                altEntry(tokens)
            elif tokens[0] == "op":
                opEntry(tokens)
            elif tokens[0] == "handler-style":
                setHandlerStyle(tokens)
            elif tokens[0] == "alt-ops":
                genaltop(tokens)
            elif tokens[0] == "split-ops":
                splitops = True
            else:
                raise DataParseError, "unrecognized command '%s'" % tokens[0]
            if style == None:
                print "tokens[0] = %s" % tokens[0]
                raise DataParseError, "handler-style must be first command"
except DataParseError, err:
    print "Failed: " + str(err)
    # TODO: remove output files so "make" doesn't get confused
    failed = True
    c_fp.close()
    asm_fp.close()
    c_fp = asm_fp = None

config_fp.close()

#
# Done!
#
if c_fp:
    c_fp.close()
if asm_fp:
    asm_fp.close()

sys.exit(failed)

config-portable配置文件中有这么几个关键字： handler-style、 import、 op-start、 op-end，所以解析配置文件的函数我们只需要关注这些：

setHandlerStyle - handler-style
importFile - import
opStart - op-start
opEnd - op-end

setHandlerStyle函数设置全局变量 style，这个变量代表解释器的风格。通过配置文件得知，portable模式的解释器风格是 all-c，它构建出来的源码只有cpp文件。

importFile函数：

# 解析配置文件 -- 拷贝一个文件输出到C或asm文件中。
#
# Parse arch config file --
# Copy a file in to the C or asm output file.
#
def importFile(tokens):
    if len(tokens) != 2:
        raise DataParseError("import requires one argument")
    source = tokens[1]  # 源文件路径。据我查看配置文件发现，这个源文件路径都是相对路径。

    # 函数getGlobalSubDict()返回一个map，里面有两个key，分别为：handler_size_bits、handler_size_bits。
    # 这两个key对应的值是同名的全局变量。
    # 如果是portable模式，这两个值均会保持默认值：-1000。

    # appendSourceFile函数将导入的文件内容解析并写入到输出文件。
    # 当是portable模式时，只是把文件中的内容写入输出文件，并不需要解析。

    # 如果是portable模式，不可能导入".S"文件，即汇编文件。

    if source.endswith(".cpp"):
        appendSourceFile(tokens[1], getGlobalSubDict(), c_fp, None)
    elif source.endswith(".S"):
        appendSourceFile(tokens[1], getGlobalSubDict(), asm_fp, None)
    else:
        raise DataParseError("don't know how to import %s (expecting .cpp/.S)"
                % source)

opStart函数：

# 函数设置全局变量in_op_start的值为1。
# 全局变量default_op_dir则被设置为操作码所在的目录。
# 在portable模式下，操作码在c目录中。
#
# Parse arch config file --
# Start of opcode list.
#
def opStart(tokens):
    global in_op_start
    global default_op_dir
    if len(tokens) != 2:
        raise DataParseError("opStart takes a directory name argument")
    if in_op_start != 0:
        raise DataParseError("opStart can only be specified once")
    default_op_dir = tokens[1]  # 操作码所在目录
    in_op_start = 1

opEnd函数：

# 设置全局变量in_op_start的值为2。
#
# Parse arch config file --
# End of opcode list; emit instruction blocks.
#
def opEnd(tokens):
    global in_op_start
    if len(tokens) != 1:
        raise DataParseError("opEnd takes no arguments")
    if in_op_start != 1:
        raise DataParseError("opEnd must follow opStart, and only appear once")
    in_op_start = 2

    # 读取opcode文件并解析，然后输出到文件。
    loadAndEmitOpcodes()
    if splitops == False:   # 在portable模式下，splitops为false。
        if generate_alt_table:  # portable模式下，generate_alt_table为false。
            loadAndEmitAltOpcodes()
            if style == "jump-table":
                emitJmpTable("dvmAsmInstructionStart", label_prefix);
                emitJmpTable("dvmAsmAltInstructionStart", alt_label_prefix);

loadAndEmitOpcodes函数：

#
# Load and emit opcodes for all kNumPackedOpcodes instructions.
#
def loadAndEmitOpcodes():

    ......

    # 循环读取opcode文件并解析，然后输出到文件中。
    # 在portable模式下，只会调用loadAndEmitC函数，输出到 C 文件中。
    for i in xrange(kNumPackedOpcodes):
        op = opcodes[i]

        # portable模式下，opcode_locations应该是空的。
        if opcode_locations.has_key(op):
            location = opcode_locations[op]
        else:
            location = default_op_dir

        if location == "c": # portable模式下，这里我只关心loadAndEmitC函数。
            # 读取opcode文件并解析，然后输出到 C 文件中。
            loadAndEmitC(location, i)
            # portable模式下，asm_stub_text的长度应该为0。
            if len(asm_stub_text) == 0:
                need_dummy_start = True
        else:
            loadAndEmitAsm(location, i, sister_list)

    ......

    if style == "computed-goto":
        ......

loadAndEmitC函数：

# 读取opcode文件并解析，然后输出到 C 文件中。
# @param[in] location opcode文件所在目录。
# @param[in] opindex opcode在列表中的索引。
#
# Load a C fragment and emit it, then output an assembly stub.
#
def loadAndEmitC(location, opindex):
    # 根据opindex来获得opcode文件名，然后拼接出文件的路径。
    op = opcodes[opindex]
    source = "%s/%s.cpp" % (location, op)
    if verbose:
        print " emit %s --> C++" % source
    dict = getGlobalSubDict()
    # 更新列表中的键值。
    # op是opcode的名字，opindex是索引。
    dict.update({ "opcode":op, "opnum":opindex })

    # 读取原文件并解析，然后输出到c_fp文件中。
    appendSourceFile(source, dict, c_fp, None)

    # portable模式下，asm_stub_text的值应该为0。
    if len(asm_stub_text) != 0:
        emitAsmStub(asm_fp, dict)

appendSourceFile函数：

# 写portable风格的文件时，只是把配置文件中import导入的C文件，写入输出文件中。
#
# Append the file specified by "source" to the open "outfp".  Each line will
# be template-replaced using the substitution dictionary "dict".
#
# If the first line of the file starts with "%" it is taken as a directive.
# A "%include" line contains a filename and, optionally, a Python-style
# dictionary declaration with substitution strings.  (This is implemented
# with recursion.)
#
# If "sister_list" is provided, and we find a line that contains only "&",
# all subsequent lines from the file will be appended to sister_list instead
# of copied to the output.
#
# This may modify "dict".
#
def appendSourceFile(source, dict, outfp, sister_list):
    outfp.write("/* File: %s */\n" % source)    # "/* File: %s */" 用于说明这部分代码是从哪个文件拷贝出来的。
    infp = open(source, "r")    # 从源文件读取内容。
    in_sister = False
    for line in infp:
        ......

        # perform keyword substitution if a dictionary was provided
        if dict != None:
            templ = Template(line)
            try:
                subline = templ.substitute(dict)
            except KeyError, err:
                raise DataParseError("keyword substitution failed in %s: %s"
                        % (source, str(err)))
            except:
                print "ERROR: substitution failed: " + line
                raise
        else:
            subline = line

        ......

    # 写文件。
    outfp.write("\n")
    infp.close()