前两天有事情耽搁了。今天开始分析dexdump代码。代码位与android-4.0.1_r1/dalvik/dexdump中里面只有一个CPP文件。通过文件包含可以看出。主要的一些代码都在libdex目录实现。这个库也在dalvik目录中与dexdump目录同级。

以下是头包含文件。

#include "libdex/DexFile.h"
#include "libdex/CmdUtils.h"
#include "libdex/DexCatch.h"
#include "libdex/DexClass.h"
#include "libdex/DexDebugInfo.h"
#include "libdex/DexOpcodes.h"
#include "libdex/DexProto.h"
#include "libdex/InstrUtils.h"
#include "libdex/SysUtil.h"

我准备从main开始以此对每个调用的函数进行分析。

/*
 * Parse args.
 *
 * I'm not using getopt_long() because we may not have it in libc.
 * 从以上注释可以看出NDK没有实现getopt_long函数
 */
int main(int argc, char* const argv[])
{
    bool wantUsage = false;
    int ic;
    // 初始化命令行参数结构
    memset(&gOptions, 0, sizeof(gOptions));
    gOptions.verbose = true;
    while (1) {
        ic = getopt(argc, argv, "cdfhil:mt:");
        if (ic < 0)
            break;
        switch (ic) {
        case 'c':       // 检验文件并且退出
            gOptions.checksumOnly = true;
            break;
        case 'd':       // 反汇编
            gOptions.disassemble = true;
            break;
        case 'f':       // 打印出文件头
            gOptions.showFileHeaders = true;
            break;
        case 'h':       // 打印出每个节头的信息
            gOptions.showSectionHeaders = true;
            break;
        case 'i':       // 忽略校验和
            gOptions.ignoreBadChecksum = true;
            break;
        case 'l':       // 输出模式
            if (strcmp(optarg, "plain") == 0) {
                gOptions.outputFormat = OUTPUT_PLAIN;
            } else if (strcmp(optarg, "xml") == 0) {
                gOptions.outputFormat = OUTPUT_XML;
                gOptions.verbose = false;
                gOptions.exportsOnly = true;
            } else {
                wantUsage = true;
            }
            break;
        case 'm':       // 打印寄存器图
            gOptions.dumpRegisterMaps = true;
            break;
        case 't':       // 在APK解压时的临时文件
            gOptions.tempFileName = optarg;
            break;
        default:
            wantUsage = true;
            break;
        }
    }
    if (optind == argc) {
        fprintf(stderr, "%s: no file specified\n", gProgName);
        wantUsage = true;
    }
    // -c 与 -i 两个选项不能同时使用
    if (gOptions.checksumOnly && gOptions.ignoreBadChecksum) {
        fprintf(stderr, "Can't specify both -c and -i\n");
        wantUsage = true;
    }
    if (wantUsage) {
        usage();
        return 2;
    }
    // 这里是一个循环,看来可以同时处理多个dex文件。
    int result = 0;
    while (optind < argc) {
        result |= process(argv[optind++]);
    }
    return (result != 0);
}

main函数也就做了一个命令行分析而已。最后使用process函数对每个文件进行处理。

/*
 * Process one file.
 * 主要的处理函数
 */
int process(const char* fileName)
{
    // DexFile文件结构
    DexFile* pDexFile = NULL;
    MemMapping map;
    bool mapped = false;
    int result = -1;
    if (gOptions.verbose)
        printf("Processing '%s'...\n", fileName);
    // 映射dex文件并将映射结果存入MemMapping结构中。
    if (dexOpenAndMap(fileName, gOptions.tempFileName, &map, false) != 0) {
        return result;
    }
    mapped = true;
    // 这里按照标志选项是否忽略校验和检查
    int flags = kDexParseVerifyChecksum;
    if (gOptions.ignoreBadChecksum)
        flags |= kDexParseContinueOnError;
    // 这个函数是开始分析DEX文件结构,并返回一个DexFile的指针
    pDexFile = dexFileParse((u1*)map.addr, map.length, flags);
    if (pDexFile == NULL) {
        fprintf(stderr, "ERROR: DEX parse failed\n");
        goto bail;
    }
    // 如果以上分析完成,说明校验和没问题,这里如果设置了-c选项则直接打印后退出
    if (gOptions.checksumOnly) {
        printf("Checksum verified\n");
    } else {
        // 这里应该是开始按照选项输出pDexFile的结果
        processDexFile(fileName, pDexFile);
    }
    result = 0;
    // 释放资源
bail:
    if (mapped)
        sysReleaseShmem(&map);
    if (pDexFile != NULL)
        dexFileFree(pDexFile);
    return result;
}

从以上代码来看主要有两个函数和一个结构需要重点分析,一个是dexOpenAndMap另一个更加重要是dexFileParse。一个结构是struct DexFile结构。这两个函数和结构都位于libdex中。首先来看下DexFile结构长的是什么样子。

struct DexFile {
    /* directly-mapped "opt" header */
    const DexOptHeader* pOptHeader;
    /* pointers to directly-mapped structs and arrays in base DEX */
    // 映射Dex整体文件
    const DexHeader*    pHeader;
    const DexStringId*  pStringIds;
    const DexTypeId*    pTypeIds;
    const DexFieldId*   pFieldIds;
    const DexMethodId*  pMethodIds;
    const DexProtoId*   pProtoIds;
    const DexClassDef*  pClassDefs;
    const DexLink*      pLinkData;
    /*
     * These are mapped out of the "auxillary" section, and may not be
     * included in the file.
     */
    // 一些附加的节段信息
    const DexClassLookup* pClassLookup;
    const void*         pRegisterMapPool;       // RegisterMapClassPool
    /* points to start of DEX file data */
    // 指向Dex文件开始
    const u1*           baseAddr;
    /* track memory overhead for auxillary structures */
    // 附属的内存数据
    int                 overhead;
    /* additional app-specific data structures associated with the DEX */
    //void*               auxData;
};


以上就是DexFile结构,可以看出它开始保存了文件头以及若干个数据目录的指针。并且还有一些其他附属数据的指针。dexOpenAndMap就是要将一个DEX文件映射到这个结构中。

dexOpenAndMap位于libdex\CmdUtils.cpp文件中,这个函数的流程如下:

1.判断文件后缀名是否是dex如果不是则尝试解压,解压失败则退出。解压成功则取出其中的classes.dex文件。

2.打开dex文件

3.使用位于libdex\SysUtils.cpp文件中的sysMapFileInShmemWritableReadOnly函数进行映射

4.使用libdex\SysUtils.cpp中的sysChangeMapAccess先将映射修改为可读可写

5.使用libdex\DexSwapVerify.cpp中的dexSwapAndVerifyIfNecessary函数进行验证

6.sysChangeMapAccess修改映射到只可写

7.退出

函数代码如


UnzipToFileResult dexOpenAndMap(const char* fileName, const char* tempFileName,
    MemMapping* pMap, bool quiet)
{
    UnzipToFileResult result = kUTFRGenericFailure;
    int len = strlen(fileName);//计算文件名长度
    char tempNameBuf[32];
    bool removeTemp = false;
    int fd = -1;
    if (len < 5) {
        // 如果没有设定安静执行则输出
        if (!quiet) {
            fprintf(stderr,
                "ERROR: filename must end in .dex, .zip, .jar, or .apk\n");
        }
        result = kUTFRBadArgs;
        goto bail;
    }
    // 判断后缀是否是dex
    if (strcasecmp(fileName + len -3, "dex") != 0) {
        if (tempFileName == NULL) {
            /*
             * Try .zip/.jar/.apk, all of which are Zip archives with
             * "classes.dex" inside.  We need to extract the compressed
             * data to a temp file, the location of which varies.
             *
             * On the device we must use /sdcard because most other
             * directories aren't writable (either because of permissions
             * or because the volume is mounted read-only).  On desktop
             * it's nice to use the designated temp directory.
             */
            // 尝试解压,获取解压的临时文件
            if (access("/tmp", W_OK) == 0) {
                sprintf(tempNameBuf, "/tmp/dex-temp-%d", getpid());
            } else if (access("/sdcard", W_OK) == 0) {
                sprintf(tempNameBuf, "/sdcard/dex-temp-%d", getpid());
            } else {
                fprintf(stderr,
                    "NOTE: /tmp and /sdcard unavailable for temp files\n");
                sprintf(tempNameBuf, "dex-temp-%d", getpid());
            }
            // 解压的目录
            tempFileName = tempNameBuf;
        }
        // 解压文件
        result = dexUnzipToFile(fileName, tempFileName, quiet);
        if (result == kUTFRSuccess) {
            //printf("+++ Good unzip to '%s'\n", tempFileName);
            fileName = tempFileName;//重新设定文件名
            removeTemp = true;
        } else if (result == kUTFRNotZip) {// 不是一个压缩包
            if (!quiet) {
                fprintf(stderr, "Not Zip, retrying as DEX\n");
            }
        } else {// 压缩包内没有dex文件
            if (!quiet && result == kUTFRNoClassesDex) {
                fprintf(stderr, "Zip has no classes.dex\n");
            }
            goto bail;
        }
    }
    result = kUTFRGenericFailure;
    /*
     * Pop open the (presumed) DEX file.
     */
    // 打开DEX文件
    fd = open(fileName, O_RDONLY | O_BINARY);
    if (fd < 0) {
        if (!quiet) {
            fprintf(stderr, "ERROR: unable to open '%s': %s\n",
                fileName, strerror(errno));
        }
        goto bail;
    }
    // 映射dex文件到一个仅读内存,以下函数在SysUntil.cpp中进行实现
    if (sysMapFileInShmemWritableReadOnly(fd, pMap) != 0) {
        fprintf(stderr, "ERROR: Unable to map '%s'\n", fileName);
        goto bail;
    }
    /*
     * This call will fail if the file exists on a filesystem that
     * doesn't support mprotect(). If that's the case, then the file
     * will have already been mapped private-writable by the previous
     * call, so we don't need to do anything special if this call
     * returns non-zero.
     *
     * 当前函数调用失败,如果是因为文件系统不支持mprotect函数。如果是这种情况,此文件已经有
     * 可以被所有者写入的权限,因此不需要为这个调用返回任何特殊的标记。
     */
    // 设置访问权限为可读可写
    sysChangeMapAccess(pMap->addr, pMap->length, true, pMap);
    // 验证签名
    if (dexSwapAndVerifyIfNecessary((u1*) pMap->addr, pMap->length)) {
        fprintf(stderr, "ERROR: Failed structural verification of '%s'\n",
            fileName);
        goto bail;
    }
    /*
     * Similar to above, this call will fail if the file wasn't ever
     * read-only to begin with. This is innocuous, though it is
     * undesirable from a memory hygiene perspective.
     */
    // 重新设定访问权限
    sysChangeMapAccess(pMap->addr, pMap->length, false, pMap);
    /*
     * Success!  Close the file and return with the start/length in pMap.
     */
    result = kUTFRSuccess;
bail:
    if (fd >= 0)
        close(fd);
    if (removeTemp) {
        /* this will fail if the OS doesn't allow removal of a mapped file */
        if (unlink(tempFileName) != 0) {
            fprintf(stderr, "WARNING: unable to remove temp '%s'\n",
                tempFileName);
        }
    }
    return result;
}z和

这里可以看出主要的验证在libdex\DexSwapVerify.cpp中的dexSwapAndVerifyIfNecessary中进行验证。在第三篇在分析这个验证的过程。