clamav --reload 加载病毒库源码分析

基本流程可以参考clamav中clamdscan --version 不生效

我们直接从解析command开始。parse_command函数返回COMMAND_RELOAD类型。然后进入execute_or_dispatch_command函数处理。

recvloop=>parse_dispatch_cmd=>execute_or_dispatch_command


/* returns:
 *  <0 for error
 *     -1 out of memory
 *     -2 other
 *   0 for async dispatched
 *   1 for command completed (connection can be closed)
 */
int execute_or_dispatch_command(client_conn_t *conn, enum commands cmd, const char *argument)
{
    int desc                       = conn->sd;
    char term                      = conn->term;
    const struct cl_engine *engine = conn->engine;
    /* execute commands that can be executed quickly on the recvloop thread,
     * these must:
     *  - not involve any operation that can block for a long time, such as disk
     *  I/O
     *  - send of atomic message is allowed.
     * Dispatch other commands */
    if (conn->group) {
        switch (cmd) {
            case COMMAND_FILDES:
            case COMMAND_SCAN:
            case COMMAND_END:
            case COMMAND_INSTREAM:
            case COMMAND_INSTREAMSCAN:
            case COMMAND_VERSION:
            case COMMAND_PING:
            case COMMAND_STATS:
            case COMMAND_COMMANDS:
                /* These commands are accepted inside IDSESSION */
                break;
            default:
                /* these commands are not recognized inside an IDSESSION */
                conn_reply_error(conn, "Command invalid inside IDSESSION.");
                logg(LOGG_DEBUG_NV, "SESSION: command is not valid inside IDSESSION: %d\n", cmd);
                conn->group = NULL;
                return 1;
        }
    }

    switch (cmd) {
        case COMMAND_SHUTDOWN:
            pthread_mutex_lock(&exit_mutex);
            progexit = 1;
            pthread_mutex_unlock(&exit_mutex);
            return 1;
        case COMMAND_RELOAD:
            pthread_mutex_lock(&reload_mutex);
            reload = 1;//设置reload标志
            pthread_mutex_unlock(&reload_mutex);
            mdprintf(desc, "RELOADING%c", term);
            /* we set reload flag, and we'll reload before closing the
             * connection */
            return 1;
......
    }
}

然后返回到recvloop函数中,进行加载病毒库。由于代码太长,只贴出部分代码。

recvloop:

        /* DB reload */
        pthread_mutex_lock(&reload_mutex);
        if (reload) {
            pthread_mutex_unlock(&reload_mutex);
            /* Reload was requested */
            pthread_mutex_lock(&reload_stage_mutex);
            if (reload_stage == RELOAD_STAGE__IDLE) {
                /* Reloading not already taking place */
                reload_stage = RELOAD_STAGE__RELOADING;
                pthread_mutex_unlock(&reload_stage_mutex);
                if (CL_SUCCESS != reload_db(&engine, dboptions, opts, thr_pool)) {
                    logg(LOGG_WARNING, "Database reload setup failed, keeping the previous instance\n");
                    pthread_mutex_lock(&reload_mutex);
                    reload = 0;
                    pthread_mutex_unlock(&reload_mutex);
                    pthread_mutex_lock(&reload_stage_mutex);
                    reload_stage = RELOAD_STAGE__IDLE;
                    pthread_mutex_unlock(&reload_stage_mutex);
                }
                pthread_mutex_lock(&reload_stage_mutex);
            }
            if (reload_stage == RELOAD_STAGE__NEW_DB_AVAILABLE) {
                /* New database available */
                if (g_newengine) {
                    /* Reload succeeded */
                    logg(LOGG_INFO, "Activating the newly loaded database...\n");
                    thrmgr_setactiveengine(g_newengine);
                    if (optget(opts, "ConcurrentDatabaseReload")->enabled) {
                        /* If concurrent database reload, we now need to free the old engine. */
                        cl_engine_free(engine);
                    }
                    engine      = g_newengine;
                    g_newengine = NULL;
                } else {
                    logg(LOGG_WARNING, "Database reload failed, keeping the previous instance\n");
                }
                reload_stage = RELOAD_STAGE__IDLE;
                pthread_mutex_unlock(&reload_stage_mutex);
                pthread_mutex_lock(&reload_mutex);
                reload = 0;
                pthread_mutex_unlock(&reload_mutex);
                time(&reloaded_time);
            } else {
                pthread_mutex_unlock(&reload_stage_mutex);
            }
        } else {
            pthread_mutex_unlock(&reload_mutex);
        }

recvloop=>reload_db


/**
 * @brief Reload the database.
 *
 * @param[in,out] engine    The current scan engine, used to copy the settings.
 * @param dboptions         The current database options, used to copy the options.
 * @param opts              The command line options, used to get the database directory.
 * @return cl_error_t       CL_SUCCESS if the reload thread was successfully started. This does not mean that the database has reloaded successfully.
 */
static cl_error_t reload_db(struct cl_engine **engine, unsigned int dboptions, const struct optstruct *opts, threadpool_t *thr_pool)
{
    cl_error_t status = CL_EMALFDB;
    cl_error_t retval;
    struct reload_th_t *rldata = NULL;
    pthread_t th;
    pthread_attr_t th_attr;

    if (NULL == opts || NULL == engine) {
        logg(LOGG_ERROR, "reload_db: Invalid arguments, unable to load signature databases.\n");
        status = CL_EARG;
        goto done;
    }

    rldata = malloc(sizeof(struct reload_th_t));
    if (!rldata) {
        logg(LOGG_ERROR, "Failed to allocate reload context\n");
        status = CL_EMEM;
        goto done;
    }
    memset(rldata, 0, sizeof(struct reload_th_t));

    rldata->dboptions = dboptions;

    if (*engine) {
        /* copy current settings */
        rldata->settings = cl_engine_settings_copy(*engine);//备份旧引擎的配置信息,在后面创建完新引擎后,复制过去
        if (!rldata->settings) {
            logg(LOGG_ERROR, "Can't make a copy of the current engine settings\n");
            goto done;
        }
    }

    rldata->dbdir = strdup(optget(opts, "DatabaseDirectory")->strarg);//获取配置文件中的指定的病毒库路径
    if (!rldata->dbdir) {
        logg(LOGG_ERROR, "Can't duplicate the database directory path\n");
        goto done;
    }

    if (dbstat.entries) {
        cl_statfree(&dbstat);
    }
    memset(&dbstat, 0, sizeof(struct cl_stat));

    retval = cl_statinidir(rldata->dbdir, &dbstat);//检查路径下的文件
    if (CL_SUCCESS != retval) {
        logg(LOGG_ERROR, "cl_statinidir() failed: %s\n", cl_strerror(retval));
        goto done;
    }

    if (*engine) {
        if (!optget(opts, "ConcurrentDatabaseReload")->enabled) {
            /*
             * If concurrent reload disabled, we'll NULL out the current engine and deref it.
             * It will only actually be free'd once the last scan finishes.
             */
            thrmgr_setactiveengine(NULL);
            cl_engine_free(*engine);
            *engine = NULL;

            /* Wait for all scans to finish */
            thrmgr_wait_for_threads(thr_pool);
        }
    }

    if (pthread_attr_init(&th_attr)) {
        logg(LOGG_ERROR, "Failed to init reload thread attributes\n");
        goto done;
    }

    if (optget(opts, "ConcurrentDatabaseReload")->enabled) {
        /* For concurrent reloads: set detached, so we don't leak thread resources */
        pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_DETACHED);
    }

    retval = pthread_create(&th, &th_attr, reload_th, rldata);//启动线程进行加载病毒库
    if (pthread_attr_destroy(&th_attr))
        logg(LOGG_WARNING, "Failed to release reload thread attributes\n");
    if (retval) {
        logg(LOGG_ERROR, "Failed to spawn reload thread\n");
        goto done;
    }

    if (!optget(opts, "ConcurrentDatabaseReload")->enabled) {
        /* For non-concurrent reloads: join the thread */
        int join_ret = pthread_join(th, NULL);
        switch (join_ret) {
            case 0:
                logg(LOGG_INFO, "Database reload completed.\n");
                break;

            case EDEADLK:
                logg(LOGG_ERROR, "A deadlock was detected when waiting for the database reload thread.\n");
                goto done;

            case ESRCH:
                logg(LOGG_ERROR, "Failed to find database reload thread.\n");
                goto done;

            case EINVAL:
                logg(LOGG_ERROR, "The database reload thread is not a joinable thread.\n");
                goto done;

            default:
                logg(LOGG_ERROR, "An unknown error occured when waiting for the database reload thread: %d\n", join_ret);
                goto done;
        }
    }

    status = CL_SUCCESS;

done:

    if (CL_SUCCESS != status) {
        /*
         * Failed to spawn reload thread, so we're responsible for cleaning up
         * the rldata structure.
         */
        if (NULL != rldata) {
            if (NULL != rldata->settings) {
                cl_engine_settings_free(rldata->settings);
            }
            if (NULL != rldata->dbdir) {
                free(rldata->dbdir);
            }
            free(rldata);
        }
    }

    return status;
}

recvloop=>reload_db=>reload_th


/**
 * @brief Thread entry point to load the signature databases & compile a new scanning engine.
 *
 * Once loaded, an event will be set to indicate that the new engine is ready.
 *
 * @param arg   A reload_th_t structure defining the db directory, db settings, engine settings.
 * @return void*
 */
static void *reload_th(void *arg)
{
    cl_error_t status = CL_EMALFDB;

    struct reload_th_t *rldata = arg;
    struct cl_engine *engine   = NULL;
    unsigned int sigs          = 0;
    int retval;

    if (NULL == rldata || NULL == rldata->dbdir || NULL == rldata->settings) {
        logg(LOGG_ERROR, "reload_th: Invalid arguments, unable to load signature databases.\n");
        status = CL_EARG;
        goto done;
    }

    logg(LOGG_INFO, "Reading databases from %s\n", rldata->dbdir);

    if (NULL == (engine = cl_engine_new())) {//创建引擎对象
        logg(LOGG_ERROR, "reload_th: Can't initialize antivirus engine\n");
        goto done;
    }

    retval = cl_engine_settings_apply(engine, rldata->settings);//复制旧引擎的配置过来
    if (CL_SUCCESS != retval) {
        logg(LOGG_ERROR, "reload_th: Failed to apply previous engine settings: %s\n", cl_strerror(retval));
        status = CL_EMEM;
        goto done;
    }

    retval = cl_load(rldata->dbdir, engine, &sigs, rldata->dboptions);//加载病毒库
    if (CL_SUCCESS != retval) {
        logg(LOGG_ERROR, "reload_th: Database load failed: %s\n", cl_strerror(retval));
        goto done;
    }

    retval = cl_engine_compile(engine);//编译病毒库
    if (CL_SUCCESS != retval) {
        logg(LOGG_ERROR, "reload_th: Database initialization error: can't compile engine: %s\n", cl_strerror(retval));
        goto done;
    }

    logg(LOGG_INFO, "Database correctly reloaded (%u signatures)\n", sigs);
    status = CL_SUCCESS;

done:

    if (NULL != rldata) {
        if (NULL != rldata->settings) {
            cl_engine_settings_free(rldata->settings);
        }
        if (NULL != rldata->dbdir) {
            free(rldata->dbdir);
        }
        free(rldata);
    }

    if (CL_SUCCESS != status) {
        if (NULL != engine) {
            cl_engine_free(engine);
            engine = NULL;
        }
    }

    pthread_mutex_lock(&reload_stage_mutex);
    reload_stage = RELOAD_STAGE__NEW_DB_AVAILABLE; /* New DB available */
    g_newengine  = engine;
    pthread_mutex_unlock(&reload_stage_mutex);

#ifdef _WIN32
    SetEvent(event_wake_recv);
#else
    if (syncpipe_wake_recv_w != -1)
        if (write(syncpipe_wake_recv_w, "", 1) != 1)
            logg(LOGG_DEBUG_NV, "Failed to write to syncpipe\n");
#endif

    return NULL;
}

recvloop=>reload_db=>reload_th=>cl_load


cl_error_t cl_load(const char *path, struct cl_engine *engine, unsigned int *signo, unsigned int dboptions)
{
    STATBUF sb;
    int ret;

    if (!engine) {
        cli_errmsg("cl_load: engine == NULL\n");
        return CL_ENULLARG;
    }

    if (engine->dboptions & CL_DB_COMPILED) {
        cli_errmsg("cl_load(): can't load new databases when engine is already compiled\n");
        return CL_EARG;
    }

    if (CLAMSTAT(path, &sb) == -1) {//检查访问权限
        switch (errno) {
#if defined(EACCES)
            case EACCES:
                cli_errmsg("cl_load(): Access denied for path: %s\n", path);
                break;
#endif
#if defined(ENOENT)
            case ENOENT:
                cli_errmsg("cl_load(): No such file or directory: %s\n", path);
                break;
#endif
#if defined(ELOOP)
            case ELOOP:
                cli_errmsg("cl_load(): Too many symbolic links encountered in path: %s\n", path);
                break;
#endif
#if defined(EOVERFLOW)
            case EOVERFLOW:
                cli_errmsg("cl_load(): File size is too large to be recognized. Path: %s\n", path);
                break;
#endif
#if defined(EIO)
            case EIO:
                cli_errmsg("cl_load(): An I/O error occurred while reading from path: %s\n", path);
                break;
#endif
            default:
                cli_errmsg("cl_load: Can't get status of: %s\n", path);
                break;
        }
        return CL_ESTAT;
    }

    if ((dboptions & CL_DB_PHISHING_URLS) && !engine->phishcheck && (engine->dconf->phishing & PHISHING_CONF_ENGINE))
        if (CL_SUCCESS != (ret = phishing_init(engine)))
            return ret;

    if ((dboptions & CL_DB_BYTECODE) && !engine->bcs.inited) {
        if (CL_SUCCESS != (ret = cli_bytecode_init(&engine->bcs)))
            return ret;
    } else {
        cli_dbgmsg("Bytecode engine disabled\n");
    }

    if (!engine->cache && cli_cache_init(engine))
        return CL_EMEM;

    engine->dboptions |= dboptions;

    switch (sb.st_mode & S_IFMT) {//检查路径是目录还是文件,目录需要按优先级循环读取所有病毒库文件,我们这里是个目录。
        case S_IFREG:
            /* Count # of sigs in the database now */
            engine->num_total_signatures += count_signatures(path, engine, dboptions);

            ret = cli_load(path, engine, signo, dboptions, NULL);
            break;

        case S_IFDIR:
            /* Count # of signatures inside cli_loaddbdir(), before loading */
            ret = cli_loaddbdir(path, engine, signo, dboptions | CL_DB_DIRECTORY);
            break;

        default:
            cli_errmsg("cl_load(%s): Not supported database file type\n", path);
            return CL_EOPEN;
    }

    if (engine->cb_sigload_progress) {
        /* Let the progress callback function know we're done! */
        (void)engine->cb_sigload_progress(*signo, *signo, engine->cb_sigload_progress_ctx);
    }

#ifdef YARA_PROTO
    if (yara_total) {
        cli_yaramsg("$$$$$$$$$$$$ YARA $$$$$$$$$$$$\n");
        cli_yaramsg("\tTotal Rules: %u\n", yara_total);
        cli_yaramsg("\tRules Loaded: %u\n", yara_loaded);
        cli_yaramsg("\tComplex Conditions: %u\n", yara_complex);
        cli_yaramsg("\tMalformed/Unsupported Rules: %u\n", yara_malform);
        cli_yaramsg("\tEmpty Rules: %u\n", yara_empty);
        cli_yaramsg("$$$$$$$$$$$$ YARA $$$$$$$$$$$$\n");
    }
#endif
    return ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir


static cl_error_t cli_loaddbdir(const char *dirname, struct cl_engine *engine, unsigned int *signo, unsigned int options)
{
    cl_error_t ret = CL_EOPEN;

    DIR *dd = NULL;
    struct dirent *dent;
    char *dbfile      = NULL;
    int ends_with_sep = 0;
    size_t dirname_len;
    struct cl_cvd *daily_cld = NULL;
    struct cl_cvd *daily_cvd = NULL;
    struct db_ll_entry *head = NULL;
    struct db_ll_entry *iter;
    struct db_ll_entry *next;

    cli_dbgmsg("Loading databases from %s\n", dirname);

    if ((dd = opendir(dirname)) == NULL) {//打开目录
        cli_errmsg("cli_loaddbdir: Can't open directory %s\n", dirname);
        ret = CL_EOPEN;
        goto done;
    }

    dirname_len = strlen(dirname);
    if (dirname_len >= strlen(PATHSEP)) {
        if (strcmp(dirname + dirname_len - strlen(PATHSEP), PATHSEP) == 0) {
            cli_dbgmsg("cli_loaddbdir: dirname ends with separator\n");
            ends_with_sep = 1;
        }
    }

    while ((dent = readdir(dd))) {//循环读取目录下的文件
        struct db_ll_entry *entry;
        unsigned int load_priority;

        if (!dent->d_ino) {
            continue;
        }
        if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
            continue;
        }
        if (!CLI_DBEXT(dent->d_name)) {
            continue;
        }

        dbfile = (char *)cli_malloc(strlen(dent->d_name) + dirname_len + 2);
        if (!dbfile) {
            cli_errmsg("cli_loaddbdir: dbfile == NULL\n");
            ret = CL_EMEM;
            goto done;
        }
        if (ends_with_sep)
            sprintf(dbfile, "%s%s", dirname, dent->d_name);
        else
            sprintf(dbfile, "%s" PATHSEP "%s", dirname, dent->d_name);

#define DB_LOAD_PRIORITY_IGN 1
#define DB_LOAD_PRIORITY_DAILY_CLD 2
#define DB_LOAD_PRIORITY_DAILY_CVD 3
#define DB_LOAD_PRIORITY_LOCAL_GDB 4
#define DB_LOAD_PRIORITY_DAILY_CFG 5
#define DB_LOAD_PRIORITY_CRB 6
#define DB_LOAD_PRIORITY_NORMAL 7

        if (cli_strbcasestr(dent->d_name, ".ign") || cli_strbcasestr(dent->d_name, ".ign2")) {
            /* load .ign and .ign2 files first */
            load_priority = DB_LOAD_PRIORITY_IGN;

            engine->num_total_signatures += count_line_based_signatures(dbfile);

        } else if (!strcmp(dent->d_name, "daily.cld")) {
            /* The daily db must be loaded before main, this way, the
               daily ign & ign2 signatures prevent ign'ored signatures
               in all databases from being loaded. */
            load_priority = DB_LOAD_PRIORITY_DAILY_CLD;

            if (0 == access(dbfile, R_OK)) {
                daily_cld = cl_cvdhead(dbfile);
                if (!daily_cld) {
                    cli_errmsg("cli_loaddbdir: error parsing header of %s\n", dbfile);
                    ret = CL_EMALFDB;
                    goto done;
                }

                /* Successfully opened the daily CLD file and read the header info. */
                engine->num_total_signatures += daily_cld->sigs;
            } else {
                free(dbfile);
                dbfile = NULL;
                continue;
            }

        } else if (!strcmp(dent->d_name, "daily.cvd")) {
            load_priority = DB_LOAD_PRIORITY_DAILY_CVD;

            if (0 == access(dbfile, R_OK)) {
                daily_cvd = cl_cvdhead(dbfile);
                if (!daily_cvd) {
                    cli_errmsg("cli_loaddbdir: error parsing header of %s\n", dbfile);
                    ret = CL_EMALFDB;
                    goto done;
                }
                /* Successfully opened the daily CVD file and ready the header info. */
                engine->num_total_signatures += daily_cvd->sigs;
            } else {
                free(dbfile);
                dbfile = NULL;
                continue;
            }

        } else if (!strcmp(dent->d_name, "local.gdb")) {
            load_priority = DB_LOAD_PRIORITY_LOCAL_GDB;

            engine->num_total_signatures += count_line_based_signatures(dbfile);

        } else if (!strcmp(dent->d_name, "daily.cfg")) {
            load_priority = DB_LOAD_PRIORITY_DAILY_CFG;

            engine->num_total_signatures += count_line_based_signatures(dbfile);

        } else if ((options & CL_DB_OFFICIAL_ONLY) &&
                   !strstr(dirname, "clamav-") &&            // Official databases that are temp-files (in the process of updating).
                   !cli_strbcasestr(dent->d_name, ".cld") && // Official databases that have been updated using incremental updates.
                   !cli_strbcasestr(dent->d_name, ".cvd")) { // Official databases.
            // TODO Should this be higher up in the list? Should we
            // ignore .ign/.ign2 files and the local.gdb file when this
            // flag is set?
            cli_dbgmsg("Skipping unofficial database %s\n", dent->d_name);
            free(dbfile);
            dbfile = NULL;
            continue;

        } else if (cli_strbcasestr(dent->d_name, ".crb")) {
            /* .cat files cannot be loaded successfully unless there are .crb
             * rules that trust the certs used to sign the catalog files.
             * Therefore, we need to ensure the .crb rules are loaded prior */
            load_priority = DB_LOAD_PRIORITY_CRB;

            engine->num_total_signatures += count_line_based_signatures(dbfile);

        } else {
            load_priority = DB_LOAD_PRIORITY_NORMAL;

            engine->num_total_signatures += count_signatures(dbfile, engine, options);//统计累加特征库中的特征条数
        }

        entry = malloc(sizeof(*entry));
        if (NULL == entry) {
            cli_errmsg("cli_loaddbdir: failed to allocate memory for database load list entry\n");
            ret = CL_EMEM;
            goto done;
        }

        entry->path          = dbfile;//保存文件名
        dbfile               = NULL;
        entry->load_priority = load_priority;
        cli_insertdbtoll(&head, entry);//添加到head链表中
    }

    /* The list entries are stored in priority order, so now just loop through
     * and load everything.
     * NOTE: If there's a daily.cld and a daily.cvd, we'll only load whichever
     * has the highest version number.  If they have the same version number
     * we load daily.cld, since that will load faster (it won't attempt to
     * verify the digital signature of the db).
     *
     * TODO It'd be ideal if we treated all cld/cvd pairs like we do the daily
     * ones, and only loaded the one with the highest version. */
    for (iter = head; iter != NULL; iter = iter->next) {//循环遍历加载病毒库

        if (DB_LOAD_PRIORITY_DAILY_CLD == iter->load_priority) {
            /* iter is the daily.cld. If we also have the cvd and the cvd is newer, skip the cld. */
            if ((NULL != daily_cvd) && (daily_cld->version < daily_cvd->version)) {
                continue;
            }

        } else if (DB_LOAD_PRIORITY_DAILY_CVD == iter->load_priority) {
            /* iter is the daily.cvd. If we also have the cld and the cld is same or newer, skip the cvd. */
            if ((NULL != daily_cld) && (daily_cld->version >= daily_cvd->version)) {
                continue;
            }
        }

        ret = cli_load(iter->path, engine, signo, options, NULL);
        if (ret) {
            cli_errmsg("cli_loaddbdir: error loading database %s\n", iter->path);
            goto done;
        }
    }

done:
    for (iter = head; iter != NULL; iter = next) {
        next = iter->next;
        free(iter->path);
        free(iter);
    }

    if (NULL != dbfile) {
        free(dbfile);
    }

    if (NULL != dd) {
        closedir(dd);
    }

    if (NULL != daily_cld) {
        cl_cvdfree(daily_cld);
    }

    if (NULL != daily_cvd) {
        cl_cvdfree(daily_cvd);
    }

    if (ret == CL_EOPEN)
        cli_errmsg("cli_loaddbdir: No supported database files found in %s\n", dirname);

    return ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>count_signatures


/**
 * @brief Count the number of signatures in a database file.
 *
 * Non-database files will be ignored, and count as 0 signatures.
 * Database validation is not done, just signature counting.
 *
 * CVD/CLD/CUD database archives are not counted the hard way, we just trust
 * signature count in the header. Yara rules and bytecode sigs count as 1 each.
 *
 * @param filepath  Filepath of the database file to count.
 * @return size_t   The number of signatures.
 */
static size_t count_signatures(const char *filepath, struct cl_engine *engine, unsigned int options)
{
    size_t num_signatures            = 0;
    struct cl_cvd *db_archive_header = NULL;

    if (cli_strbcasestr(filepath, ".cld") ||
        cli_strbcasestr(filepath, ".cvd") ||
        cli_strbcasestr(filepath, ".cud")) {
        /* use the CVD head to get the sig count. */
        if (0 == access(filepath, R_OK)) {
            db_archive_header = cl_cvdhead(filepath);//读取文件头信息,里面包含了sig个数
            if (!db_archive_header) {
                cli_errmsg("cli_loaddbdir: error parsing header of %s\n", filepath);
                goto done;
            }

            num_signatures += db_archive_header->sigs;//累加sig 个数
        }

    } else if ((CL_BYTECODE_TRUST_ALL == engine->bytecode_security) &&
               cli_strbcasestr(filepath, ".cbc")) {
        /* Counts as 1 signature if loading plain .cbc files. */
        num_signatures += 1;

    } else if ((options & CL_DB_YARA_ONLY) &&
               (cli_strbcasestr(filepath, ".yar") || cli_strbcasestr(filepath, ".yara"))) {
        /* Counts as 1 signature. */
        num_signatures += 1;

    } else if (cli_strbcasestr(filepath, ".db") ||
               cli_strbcasestr(filepath, ".crb") ||
               cli_strbcasestr(filepath, ".hdb") || cli_strbcasestr(filepath, ".hsb") ||
               cli_strbcasestr(filepath, ".hdu") || cli_strbcasestr(filepath, ".hsu") ||
               cli_strbcasestr(filepath, ".fp") || cli_strbcasestr(filepath, ".sfp") ||
               cli_strbcasestr(filepath, ".mdb") || cli_strbcasestr(filepath, ".msb") ||
               cli_strbcasestr(filepath, ".imp") ||
               cli_strbcasestr(filepath, ".mdu") || cli_strbcasestr(filepath, ".msu") ||
               cli_strbcasestr(filepath, ".ndb") || cli_strbcasestr(filepath, ".ndu") || cli_strbcasestr(filepath, ".sdb") ||
               cli_strbcasestr(filepath, ".ldb") || cli_strbcasestr(filepath, ".ldu") ||
               cli_strbcasestr(filepath, ".zmd") || cli_strbcasestr(filepath, ".rmd") ||
               cli_strbcasestr(filepath, ".cfg") ||
               cli_strbcasestr(filepath, ".wdb") ||
               cli_strbcasestr(filepath, ".pdb") || cli_strbcasestr(filepath, ".gdb") ||
               cli_strbcasestr(filepath, ".ftm") ||
               cli_strbcasestr(filepath, ".ign") || cli_strbcasestr(filepath, ".ign2") ||
               cli_strbcasestr(filepath, ".idb") ||
               cli_strbcasestr(filepath, ".cdb") ||
               cli_strbcasestr(filepath, ".cat") ||
               cli_strbcasestr(filepath, ".ioc") ||
               cli_strbcasestr(filepath, ".pwdb")) {
        /* Should be a line-based signaure file, count it the old fashioned way */
        num_signatures += count_line_based_signatures(filepath);
    }

done:
    if (NULL != db_archive_header) {
        cl_cvdfree(db_archive_header);
    }

    return num_signatures;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load


cl_error_t cli_load(const char *filename, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio)
{
    cl_error_t ret = CL_SUCCESS;

    FILE *fs        = NULL;
    uint8_t skipped = 0;
    const char *dbname;
    char buff[FILEBUFF];

    if (dbio && dbio->chkonly) {
        while (cli_dbgets(buff, FILEBUFF, NULL, dbio)) continue;
        return CL_SUCCESS;
    }

    if (!dbio && (fs = fopen(filename, "rb")) == NULL) {//打开文件
        if (options & CL_DB_DIRECTORY) { /* bb#1624 */
            if (access(filename, R_OK)) {
                if (errno == ENOENT) {
                    cli_dbgmsg("Detected race condition, ignoring old file %s\n", filename);
                    return CL_SUCCESS;
                }
            }
        }
        cli_errmsg("cli_load(): Can't open file %s\n", filename);
        return CL_EOPEN;
    }

    if ((dbname = strrchr(filename, *PATHSEP)))
        dbname++;
    else
        dbname = filename;

#ifdef HAVE_YARA
    if (options & CL_DB_YARA_ONLY) {
        if (cli_strbcasestr(dbname, ".yar") || cli_strbcasestr(dbname, ".yara"))
            ret = cli_loadyara(fs, engine, signo, options, dbio, filename);
        else
            skipped = 1;
    } else
#endif
        if (cli_strbcasestr(dbname, ".db")) {
        ret = cli_loaddb(fs, engine, signo, options, dbio, dbname);

    } else if (cli_strbcasestr(dbname, ".cvd")) {//先走这里
        ret = cli_cvdload(fs, engine, signo, options, 0, filename, 0);

    } else if (cli_strbcasestr(dbname, ".cld")) {
        ret = cli_cvdload(fs, engine, signo, options, 1, filename, 0);

    } else if (cli_strbcasestr(dbname, ".cud")) {
        ret = cli_cvdload(fs, engine, signo, options, 2, filename, 0);

    } else if (cli_strbcasestr(dbname, ".crb")) {
        ret = cli_loadcrt(fs, engine, dbio);

    } else if (cli_strbcasestr(dbname, ".hdb") || cli_strbcasestr(dbname, ".hsb")) {
        ret = cli_loadhash(fs, engine, signo, MD5_HDB, options, dbio, dbname);
    } else if (cli_strbcasestr(dbname, ".hdu") || cli_strbcasestr(dbname, ".hsu")) {
        if (options & CL_DB_PUA)
            ret = cli_loadhash(fs, engine, signo, MD5_HDB, options | CL_DB_PUA_MODE, dbio, dbname);
        else
            skipped = 1;

    } else if (cli_strbcasestr(dbname, ".fp") || cli_strbcasestr(dbname, ".sfp")) {
        ret = cli_loadhash(fs, engine, signo, MD5_FP, options, dbio, dbname);
    } else if (cli_strbcasestr(dbname, ".mdb") || cli_strbcasestr(dbname, ".msb")) {
        ret = cli_loadhash(fs, engine, signo, MD5_MDB, options, dbio, dbname);
    } else if (cli_strbcasestr(dbname, ".imp")) {
        ret = cli_loadhash(fs, engine, signo, MD5_IMP, options, dbio, dbname);

    } else if (cli_strbcasestr(dbname, ".mdu") || cli_strbcasestr(dbname, ".msu")) {
        if (options & CL_DB_PUA)
            ret = cli_loadhash(fs, engine, signo, MD5_MDB, options | CL_DB_PUA_MODE, dbio, dbname);
        else
            skipped = 1;

    } else if (cli_strbcasestr(dbname, ".ndb")) {
        ret = cli_loadndb(fs, engine, signo, 0, options, dbio, dbname);

    } else if (cli_strbcasestr(dbname, ".ndu")) {
        if (!(options & CL_DB_PUA))
            skipped = 1;
        else
            ret = cli_loadndb(fs, engine, signo, 0, options | CL_DB_PUA_MODE, dbio, dbname);

    } else if (cli_strbcasestr(filename, ".ldb")) {
        ret = cli_loadldb(fs, engine, signo, options, dbio, dbname);

    } else if (cli_strbcasestr(filename, ".ldu")) {
        if (options & CL_DB_PUA)
            ret = cli_loadldb(fs, engine, signo, options | CL_DB_PUA_MODE, dbio, dbname);
        else
            skipped = 1;
    } else if (cli_strbcasestr(filename, ".cbc")) {
        if (options & CL_DB_BYTECODE)
            ret = cli_loadcbc(fs, engine, signo, options, dbio, dbname);
        else
            skipped = 1;
    } else if (cli_strbcasestr(dbname, ".sdb")) {
        ret = cli_loadndb(fs, engine, signo, 1, options, dbio, dbname);

    } else if (cli_strbcasestr(dbname, ".zmd")) {
        ret = cli_loadmd(fs, engine, signo, 1, options, dbio, dbname);

    } else if (cli_strbcasestr(dbname, ".rmd")) {
        ret = cli_loadmd(fs, engine, signo, 2, options, dbio, dbname);

    } else if (cli_strbcasestr(dbname, ".cfg")) {
        ret = cli_dconf_load(fs, engine, options, dbio);

    } else if (cli_strbcasestr(dbname, ".info")) {
        ret = cli_loadinfo(fs, engine, options, dbio);

    } else if (cli_strbcasestr(dbname, ".wdb")) {
        if (options & CL_DB_PHISHING_URLS) {
            ret = cli_loadwdb(fs, engine, options, dbio);
        } else
            skipped = 1;
    } else if (cli_strbcasestr(dbname, ".pdb") || cli_strbcasestr(dbname, ".gdb")) {
        if (options & CL_DB_PHISHING_URLS) {
            ret = cli_loadpdb(fs, engine, signo, options, dbio);
        } else
            skipped = 1;
    } else if (cli_strbcasestr(dbname, ".ftm")) {
        ret = cli_loadftm(fs, engine, options, 0, dbio);

    } else if (cli_strbcasestr(dbname, ".ign") || cli_strbcasestr(dbname, ".ign2")) {
        ret = cli_loadign(fs, engine, options, dbio);

    } else if (cli_strbcasestr(dbname, ".idb")) {
        ret = cli_loadidb(fs, engine, signo, options, dbio);

    } else if (cli_strbcasestr(dbname, ".cdb")) {
        ret = cli_loadcdb(fs, engine, signo, options, dbio);
    } else if (cli_strbcasestr(dbname, ".cat")) {
        ret = cli_loadmscat(fs, dbname, engine, options, dbio);
    } else if (cli_strbcasestr(dbname, ".ioc")) {
        ret = cli_loadopenioc(fs, dbname, engine, options);
#ifdef HAVE_YARA
    } else if (cli_strbcasestr(dbname, ".yar") || cli_strbcasestr(dbname, ".yara")) {
        if (!(options & CL_DB_YARA_EXCLUDE))
            ret = cli_loadyara(fs, engine, signo, options, dbio, filename);
        else
            skipped = 1;
#endif
    } else if (cli_strbcasestr(dbname, ".pwdb")) {
        ret = cli_loadpwdb(fs, engine, options, 0, dbio);
    } else {
        cli_warnmsg("cli_load: unknown extension - skipping %s\n", filename);
        skipped = 1;
    }

    if (ret) {
        cli_errmsg("Can't load %s: %s\n", filename, cl_strerror(ret));
    } else {
        if (skipped)
            cli_dbgmsg("%s skipped\n", filename);
        else
            cli_dbgmsg("%s loaded\n", filename);
    }

    if (fs)
        fclose(fs);

    if (engine->cb_sigload_progress) {
        /* Let the progress callback function know how we're doing */
        (void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx);
    }

    return ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload


int cli_cvdload(FILE *fs, struct cl_engine *engine, unsigned int *signo, unsigned int options, unsigned int dbtype, const char *filename, unsigned int chkonly)
{
    struct cl_cvd cvd, dupcvd;
    FILE *dupfs;
    int ret;
    time_t s_time;
    int cfd;
    struct cli_dbio dbio;
    struct cli_dbinfo *dbinfo = NULL;
    char *dupname;

    dbio.hashctx = NULL;

    cli_dbgmsg("in cli_cvdload()\n");

    /* verify */
    if ((ret = cli_cvdverify(fs, &cvd, dbtype)))
        return ret;

    if (dbtype <= 1) {
        /* check for duplicate db */
        dupname = cli_strdup(filename);
        if (!dupname)
            return CL_EMEM;
        dupname[strlen(dupname) - 2] = (dbtype == 1 ? 'v' : 'l');
        if (!access(dupname, R_OK) && (dupfs = fopen(dupname, "rb"))) {
            if ((ret = cli_cvdverify(dupfs, &dupcvd, !dbtype))) {
                fclose(dupfs);
                free(dupname);
                return ret;
            }
            fclose(dupfs);
            if (dupcvd.version > cvd.version) {
                cli_warnmsg("Detected duplicate databases %s and %s. The %s database is older and will not be loaded, you should manually remove it from the database directory.\n", filename, dupname, filename);
                free(dupname);
                return CL_SUCCESS;
            } else if (dupcvd.version == cvd.version && !dbtype) {
                cli_warnmsg("Detected duplicate databases %s and %s, please manually remove one of them\n", filename, dupname);
                free(dupname);
                return CL_SUCCESS;
            }
        }
        free(dupname);
    }

    if (strstr(filename, "daily.")) {
        time(&s_time);
        if (cvd.stime > s_time) {
            if (cvd.stime - (unsigned int)s_time > 3600) {
                cli_warnmsg("******************************************************\n");
                cli_warnmsg("***      Virus database timestamp in the future!   ***\n");
                cli_warnmsg("***  Please check the timezone and clock settings  ***\n");
                cli_warnmsg("******************************************************\n");
            }
        } else if ((unsigned int)s_time - cvd.stime > 604800) {
            cli_warnmsg("**************************************************\n");
            cli_warnmsg("***  The virus database is older than 7 days!  ***\n");
            cli_warnmsg("***   Please update it as soon as possible.    ***\n");
            cli_warnmsg("**************************************************\n");
        }
        engine->dbversion[0] = cvd.version;
        engine->dbversion[1] = cvd.stime;
    }

    if (cvd.fl > cl_retflevel()) {
        cli_warnmsg("*******************************************************************\n");
        cli_warnmsg("***  This version of the ClamAV engine is outdated.             ***\n");
        cli_warnmsg("***   Read https://docs.clamav.net/manual/Installing.html       ***\n");
        cli_warnmsg("*******************************************************************\n");
    }

    cfd          = fileno(fs);
    dbio.chkonly = 0;
    if (dbtype == 2)
        ret = cli_tgzload(cfd, engine, signo, options | CL_DB_UNSIGNED, &dbio, NULL);
    else
        ret = cli_tgzload(cfd, engine, signo, options | CL_DB_OFFICIAL, &dbio, NULL);//加载病毒库
    if (ret != CL_SUCCESS)
        return ret;

    dbinfo = engine->dbinfo;
    if (!dbinfo || !dbinfo->cvd || (dbinfo->cvd->version != cvd.version) || (dbinfo->cvd->sigs != cvd.sigs) || (dbinfo->cvd->fl != cvd.fl) || (dbinfo->cvd->stime != cvd.stime)) {
        cli_errmsg("cli_cvdload: Corrupted CVD header\n");
        return CL_EMALFDB;
    }
    dbinfo = engine->dbinfo ? engine->dbinfo->next : NULL;
    if (!dbinfo) {
        cli_errmsg("cli_cvdload: dbinfo error\n");
        return CL_EMALFDB;
    }

    dbio.chkonly = chkonly;
    if (dbtype == 2)
        options |= CL_DB_UNSIGNED;
    else
        options |= CL_DB_SIGNED | CL_DB_OFFICIAL;

    ret = cli_tgzload(cfd, engine, signo, options, &dbio, dbinfo);

    while (engine->dbinfo) {
        dbinfo         = engine->dbinfo;
        engine->dbinfo = dbinfo->next;
        MPOOL_FREE(engine->mempool, dbinfo->name);
        MPOOL_FREE(engine->mempool, dbinfo->hash);
        if (dbinfo->cvd)
            cl_cvdfree(dbinfo->cvd);
        MPOOL_FREE(engine->mempool, dbinfo);
    }

    return ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload

static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio, struct cli_dbinfo *dbinfo)
{
    char osize[13], name[101];
    char block[TAR_BLOCKSIZE];
    int nread, fdd, ret;
    unsigned int type, size, pad, compr = 1;
    off_t off;
    struct cli_dbinfo *db;
    char hash[32];

    cli_dbgmsg("in cli_tgzload()\n");

    if (lseek(fd, 512, SEEK_SET) < 0) {
        return CL_ESEEK;
    }

    if (cli_readn(fd, block, 7) != 7)
        return CL_EFORMAT; /* truncated file? */

    if (!strncmp(block, "COPYING", 7))//检查是否为压缩文件
        compr = 0;

    if (lseek(fd, 512, SEEK_SET) < 0) {
        return CL_ESEEK;
    }

    if ((fdd = dup(fd)) == -1) {
        cli_errmsg("cli_tgzload: Can't duplicate descriptor %d\n", fd);
        return CL_EDUP;
    }

    if (compr) {
        if ((dbio->gzs = gzdopen(fdd, "rb")) == NULL) {//打开压缩文件
            cli_errmsg("cli_tgzload: Can't gzdopen() descriptor %d, errno = %d\n", fdd, errno);
            if (fdd > -1)
                close(fdd);
            return CL_EOPEN;
        }
        dbio->fs = NULL;
    } else {
        if ((dbio->fs = fdopen(fdd, "rb")) == NULL) {//打开普通文件
            cli_errmsg("cli_tgzload: Can't fdopen() descriptor %d, errno = %d\n", fdd, errno);
            if (fdd > -1)
                close(fdd);
            return CL_EOPEN;
        }
        dbio->gzs = NULL;
    }

    dbio->bufsize = CLI_DEFAULT_DBIO_BUFSIZE;
    dbio->buf     = cli_malloc(dbio->bufsize);
    if (!dbio->buf) {
        cli_errmsg("cli_tgzload: Can't allocate memory for dbio->buf\n");
        cli_tgzload_cleanup(compr, dbio, fdd);
        return CL_EMALFDB;
    }
    dbio->bufpt  = NULL;
    dbio->usebuf = 1;
    dbio->readpt = dbio->buf;

    while (1) {

        if (compr)
            nread = gzread(dbio->gzs, block, TAR_BLOCKSIZE);
        else
            nread = fread(block, 1, TAR_BLOCKSIZE, dbio->fs);

        if (!nread)
            break;

        if (nread != TAR_BLOCKSIZE) {
            cli_errmsg("cli_tgzload: Incomplete block read\n");
            cli_tgzload_cleanup(compr, dbio, fdd);
            return CL_EMALFDB;
        }

        if (block[0] == '\0') /* We're done *///读取结束
            break;

        strncpy(name, block, 100);
        name[100] = '\0';//获取文件名

        if (strchr(name, '/')) {
            cli_errmsg("cli_tgzload: Slash separators are not allowed in CVD\n");
            cli_tgzload_cleanup(compr, dbio, fdd);
            return CL_EMALFDB;
        }

        type = block[156];

        switch (type) {
            case '0':
            case '\0':
                break;
            case '5':
                cli_errmsg("cli_tgzload: Directories are not supported in CVD\n");
                cli_tgzload_cleanup(compr, dbio, fdd);
                return CL_EMALFDB;
            default:
                cli_errmsg("cli_tgzload: Unknown type flag '%c'\n", type);
                cli_tgzload_cleanup(compr, dbio, fdd);
                return CL_EMALFDB;
        }

        strncpy(osize, block + 124, 12);
        osize[12] = '\0';

        if ((sscanf(osize, "%o", &size)) == 0) {//获取文件大小
            cli_errmsg("cli_tgzload: Invalid size in header\n");
            cli_tgzload_cleanup(compr, dbio, fdd);
            return CL_EMALFDB;
        }
        dbio->size     = size;
        dbio->readsize = dbio->size < dbio->bufsize ? dbio->size : dbio->bufsize - 1;
        dbio->bufpt    = NULL;
        dbio->readpt   = dbio->buf;
        if (!(dbio->hashctx)) {
            dbio->hashctx = cl_hash_init("sha256");//初始化hash上下文,后面会用这个对文件内容进行计算hash验证
            if (!(dbio->hashctx)) {
                cli_tgzload_cleanup(compr, dbio, fdd);
                return CL_EMALFDB;
            }
        }
        dbio->bread = 0;

        /* cli_dbgmsg("cli_tgzload: Loading %s, size: %u\n", name, size); */
        if (compr)
            off = (off_t)gzseek(dbio->gzs, 0, SEEK_CUR);
        else
            off = ftell(dbio->fs);

        if ((!dbinfo && cli_strbcasestr(name, ".info")) || (dbinfo && CLI_DBEXT(name))) {
            ret = cli_load(name, engine, signo, options, dbio);//加载病毒库
            if (ret) {
                cli_errmsg("cli_tgzload: Can't load %s\n", name);
                cli_tgzload_cleanup(compr, dbio, fdd);
                return CL_EMALFDB;
            }
            if (!dbinfo) {
                cli_tgzload_cleanup(compr, dbio, fdd);
                return CL_SUCCESS;
            } else {
                db = dbinfo;
                while (db && strcmp(db->name, name))
                    db = db->next;
                if (!db) {
                    cli_errmsg("cli_tgzload: File %s not found in .info\n", name);
                    cli_tgzload_cleanup(compr, dbio, fdd);
                    return CL_EMALFDB;
                }
                if (dbio->bread) {
                    if (db->size != dbio->bread) {
                        cli_errmsg("cli_tgzload: File %s not correctly loaded\n", name);
                        cli_tgzload_cleanup(compr, dbio, fdd);
                        return CL_EMALFDB;
                    }
                    cl_finish_hash(dbio->hashctx, hash);
                    dbio->hashctx = cl_hash_init("sha256");
                    if (!(dbio->hashctx)) {
                        cli_tgzload_cleanup(compr, dbio, fdd);
                        return CL_EMALFDB;
                    }
                    if (memcmp(db->hash, hash, 32)) {//验证hash
                        cli_errmsg("cli_tgzload: Invalid checksum for file %s\n", name);
                        cli_tgzload_cleanup(compr, dbio, fdd);
                        return CL_EMALFDB;
                    }
                }
            }
        }
        pad = size % TAR_BLOCKSIZE ? (TAR_BLOCKSIZE - (size % TAR_BLOCKSIZE)) : 0;
        if (compr) {
            if (off == gzseek(dbio->gzs, 0, SEEK_CUR))
                gzseek(dbio->gzs, size + pad, SEEK_CUR);
            else if (pad)
                gzseek(dbio->gzs, pad, SEEK_CUR);
        } else {
            if (off == ftell(dbio->fs))
                fseek(dbio->fs, size + pad, SEEK_CUR);
            else if (pad)
                fseek(dbio->fs, pad, SEEK_CUR);
        }
    }

    cli_tgzload_cleanup(compr, dbio, fdd);
    return CL_SUCCESS;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load

/*我们以最简单的hdb病毒库为例,进行讲解,加压后原始内容格式如下
 * 44d88612fea8a8f36de82e1278abb02f:68:Eicar-Test-Signature
 */
cl_error_t cli_load(const char *filename, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio)
{
    cl_error_t ret = CL_SUCCESS;

    FILE *fs        = NULL;
    uint8_t skipped = 0;
    const char *dbname;
    char buff[FILEBUFF];

    if (dbio && dbio->chkonly) {
        while (cli_dbgets(buff, FILEBUFF, NULL, dbio)) continue;
        return CL_SUCCESS;
    }

    if (!dbio && (fs = fopen(filename, "rb")) == NULL) {
        if (options & CL_DB_DIRECTORY) { /* bb#1624 */
            if (access(filename, R_OK)) {
                if (errno == ENOENT) {
                    cli_dbgmsg("Detected race condition, ignoring old file %s\n", filename);
                    return CL_SUCCESS;
                }
            }
        }
        cli_errmsg("cli_load(): Can't open file %s\n", filename);
        return CL_EOPEN;
    }

    if ((dbname = strrchr(filename, *PATHSEP)))
        dbname++;
    else
        dbname = filename;

#ifdef HAVE_YARA
    if (options & CL_DB_YARA_ONLY) {
        if (cli_strbcasestr(dbname, ".yar") || cli_strbcasestr(dbname, ".yara"))
            ret = cli_loadyara(fs, engine, signo, options, dbio, filename);
        else
            skipped = 1;
    } else
#endif
        if (cli_strbcasestr(dbname, ".db")) {
        ret = cli_loaddb(fs, engine, signo, options, dbio, dbname);

    } else if (cli_strbcasestr(dbname, ".cvd")) {
        ret = cli_cvdload(fs, engine, signo, options, 0, filename, 0);

    } else if (cli_strbcasestr(dbname, ".cld")) {
        ret = cli_cvdload(fs, engine, signo, options, 1, filename, 0);

    } else if (cli_strbcasestr(dbname, ".cud")) {
        ret = cli_cvdload(fs, engine, signo, options, 2, filename, 0);

    } else if (cli_strbcasestr(dbname, ".crb")) {
        ret = cli_loadcrt(fs, engine, dbio);

    } else if (cli_strbcasestr(dbname, ".hdb") || cli_strbcasestr(dbname, ".hsb")) {
        ret = cli_loadhash(fs, engine, signo, MD5_HDB, options, dbio, dbname);//进入此函数进行读取
    }
......

    if (fs)
        fclose(fs);

    if (engine->cb_sigload_progress) {
        /* Let the progress callback function know how we're doing */
        (void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx);
    }

    return ret;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load=>cli_loadhash

static int cli_loadhash(FILE *fs, struct cl_engine *engine, unsigned int *signo, unsigned int mode, unsigned int options, struct cli_dbio *dbio, const char *dbname)
{
    const char *tokens[MD5_TOKENS + 1];
    char buffer[FILEBUFF], *buffer_cpy = NULL;
    const char *pt, *virname;
    int ret                 = CL_SUCCESS;
    unsigned int size_field = 1, md5_field = 0, line = 0, sigs = 0, tokens_count;
    unsigned int req_fl = 0;
    struct cli_matcher *db;
    unsigned long size;

    if (mode == MD5_MDB) {
        size_field = 0;
        md5_field  = 1;
        db         = engine->hm_mdb;
    } else if (mode == MD5_HDB)//类型为MD5_HDB, md5_field  为0, size_field 为1
        db = engine->hm_hdb;
    else if (mode == MD5_IMP)
        db = engine->hm_imp;
    else
        db = engine->hm_fp;

    if (!db) {
        if (!(db = MPOOL_CALLOC(engine->mempool, 1, sizeof(*db))))
            return CL_EMEM;
#ifdef USE_MPOOL
        db->mempool = engine->mempool;
#endif
        if (mode == MD5_HDB)
            engine->hm_hdb = db;
        else if (mode == MD5_MDB)
            engine->hm_mdb = db;
        else if (mode == MD5_IMP)
            engine->hm_imp = db;
        else
            engine->hm_fp = db;
    }

    if (engine->ignored)
        if (!(buffer_cpy = cli_malloc(FILEBUFF))) {
            cli_errmsg("cli_loadhash: Can't allocate memory for buffer_cpy\n");
            return CL_EMEM;
        }

    while (cli_dbgets(buffer, FILEBUFF, fs, dbio)) {//循环读取一行内容来处理
        line++;
        if (buffer[0] == '#')//注释跳过
            continue;
        cli_chomp(buffer);
        if (engine->ignored)
            strcpy(buffer_cpy, buffer);

        tokens_count = cli_strtokenize(buffer, ':', MD5_TOKENS + 1, tokens);
        if (tokens_count < 3) {//少于3个,报错
            ret = CL_EMALFDB;
            break;
        }
        if (tokens_count > MD5_TOKENS - 2) {
            req_fl = atoi(tokens[MD5_TOKENS - 2]);

            if (tokens_count > MD5_TOKENS) {
                ret = CL_EMALFDB;
                break;
            }

            if (cl_retflevel() < req_fl)
                continue;
            if (tokens_count == MD5_TOKENS) {
                int max_fl = atoi(tokens[MD5_TOKENS - 1]);
                if (cl_retflevel() > (unsigned int)max_fl)
                    continue;
            }
        }

        if (strcmp(tokens[size_field], "*")) {//指定了size 为68,进行转换成数值类型
            size = strtoul(tokens[size_field], (char **)&pt, 10);
            if (*pt || !size || size >= 0xffffffff) {
                cli_errmsg("cli_loadhash: Invalid value for the size field\n");
                ret = CL_EMALFDB;
                break;
            }
        } else {
            size = 0;
            // The wildcard feature was added in FLEVEL 73, so for backwards
            // compatibility with older clients, ensure that a minimum FLEVEL
            // is specified.  This check doesn't apply to .imp rules, though,
            // since this rule category wasn't introduced until FLEVEL 90, and
            // has always supported wildcard usage in rules.
            if (mode != MD5_IMP && ((tokens_count < MD5_TOKENS - 1) || (req_fl < 73))) {
                cli_errmsg("cli_loadhash: Minimum FLEVEL field must be at least 73 for wildcard size hash signatures."
                           " For reference, running FLEVEL is %d\n",
                           cl_retflevel());
                ret = CL_EMALFDB;
                break;
            }
        }

        pt = tokens[2]; /* virname *///病毒名Eicar-Test-Signature
        if (engine->pua_cats && (options & CL_DB_PUA_MODE) && (options & (CL_DB_PUA_INCLUDE | CL_DB_PUA_EXCLUDE)))
            if (cli_chkpua(pt, engine->pua_cats, options))
                continue;

        if (engine->ignored && cli_chkign(engine->ignored, pt, buffer_cpy))
            continue;

        if (engine->cb_sigload) {
            const char *dot = strchr(dbname, '.');
            if (!dot)
                dot = dbname;
            else
                dot++;
            if (engine->cb_sigload(dot, pt, ~options & CL_DB_OFFICIAL, engine->cb_sigload_ctx)) {
                cli_dbgmsg("cli_loadhash: skipping %s (%s) due to callback\n", pt, dot);
                continue;
            }
        }

        virname = CLI_MPOOL_VIRNAME(engine->mempool, pt, options & CL_DB_OFFICIAL);
        if (!virname) {
            ret = CL_EMALFDB;
            break;
        }

        /*md5 hash 44d88612fea8a8f36de82e1278abb02f 转换成实际的hash,小一倍长度,并存入一个关于长度的hash数组(我们的长度为16),后面编译时,对其进行排序,匹配时按二分查找定位*/
        if (CL_SUCCESS != (ret = hm_addhash_str(db, tokens[md5_field], size, virname))) {
            cli_errmsg("cli_loadhash: Malformed hash string at line %u\n", line);
            MPOOL_FREE(engine->mempool, (void *)virname);
            break;
        }

        sigs++;//sig计数加一, 一个sig解析成功,为下面的进度计算提供输入

        if (engine->cb_sigload_progress && ((*signo + sigs) % 10000 == 0)) {//sig进度条
            /* Let the progress callback function know how we're doing */
            (void)engine->cb_sigload_progress(engine->num_total_signatures, *signo + sigs, engine->cb_sigload_progress_ctx);
        }
    }
    if (engine->ignored)
        free(buffer_cpy);

    if (!line) {
        cli_errmsg("cli_loadhash: Empty database file\n");
        return CL_EMALFDB;
    }

    if (ret) {
        cli_errmsg("cli_loadhash: Problem parsing database at line %u\n", line);
        return ret;
    }

    if (signo)
        *signo += sigs;

    return CL_SUCCESS;
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load=>cli_loadhash=>hm_addhash_str

int hm_addhash_str(struct cli_matcher *root, const char *strhash, uint32_t size, const char *virusname)
{
    enum CLI_HASH_TYPE type;
    char binhash[CLI_HASHLEN_MAX];
    int hlen;

    if (!root || !strhash) {
        cli_errmsg("hm_addhash_str: NULL root or hash\n");
        return CL_ENULLARG;
    }

    /* size 0 here is now a wildcard size match */
    if (size == (uint32_t)-1) {
        cli_errmsg("hm_addhash_str: null or invalid size (%u)\n", size);
        return CL_EARG;
    }

    hlen = strlen(strhash);
    switch (hlen) {
        case 32:
            type = CLI_HASH_MD5;//此例为这个type
            break;
        case 40:
            type = CLI_HASH_SHA1;
            break;
        case 64:
            type = CLI_HASH_SHA256;
            break;
        default:
            cli_errmsg("hm_addhash_str: invalid hash %s -- FIXME!\n", strhash);
            return CL_EARG;
    }
    if (cli_hex2str_to(strhash, (char *)binhash, hlen)) {//hex转换成str
        cli_errmsg("hm_addhash_str: invalid hash %s\n", strhash);
        return CL_EARG;
    }

    return hm_addhash_bin(root, binhash, type, size, virusname);//存入哈希数组
}

recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load=>cli_loadhash=>hm_addhash_str=>hm_addhash_bin

int hm_addhash_bin(struct cli_matcher *root, const void *binhash, enum CLI_HASH_TYPE type, uint32_t size, const char *virusname)
{
    const unsigned int hlen = hashlen[type];
    const struct cli_htu32_element *item;
    struct cli_sz_hash *szh;
    struct cli_htu32 *ht;
    int i;

    if (size) {
        /* size non-zero, find sz_hash element in size-driven hashtable hash长度哈希表 */
        ht = &root->hm.sizehashes[type];
        if (!root->hm.sizehashes[type].capacity) {
            i = cli_htu32_init(ht, 64, root->mempool);
            if (i) return i;
        }

        item = cli_htu32_find(ht, size);
        if (!item) {
            struct cli_htu32_element htitem;
            szh = MPOOL_CALLOC(root->mempool, 1, sizeof(*szh));
            if (!szh) {
                cli_errmsg("hm_addhash_bin: failed to allocate size hash\n");
                return CL_EMEM;
            }

            htitem.key         = size;//长度作为key
            htitem.data.as_ptr = szh;
            i                  = cli_htu32_insert(ht, &htitem, root->mempool);
            if (i) {
                cli_errmsg("hm_addhash_bin: failed to add item to hashtab");
                MPOOL_FREE(root->mempool, szh);
                return i;
            }
        } else
            szh = (struct cli_sz_hash *)item->data.as_ptr;
    } else {
        /* size 0 = wildcard */
        szh = &root->hwild.hashes[type];
    }
    szh->items++;//增加key对应value的元素个数,好重新分配内存

    szh->hash_array = MPOOL_REALLOC2(root->mempool, szh->hash_array, hlen * szh->items);
    if (!szh->hash_array) {
        cli_errmsg("hm_addhash_bin: failed to grow hash array to %u entries\n", szh->items);
        szh->items = 0;
        MPOOL_FREE(root->mempool, szh->virusnames);
        szh->virusnames = NULL;
        return CL_EMEM;
    }

    szh->virusnames = MPOOL_REALLOC2(root->mempool, szh->virusnames, sizeof(*szh->virusnames) * szh->items);//分配内存存入对应的病毒名称
    if (!szh->virusnames) {
        cli_errmsg("hm_addhash_bin: failed to grow virusname array to %u entries\n", szh->items);
        szh->items = 0;
        MPOOL_FREE(root->mempool, szh->hash_array);
        szh->hash_array = NULL;
        return CL_EMEM;
    }

    memcpy(&szh->hash_array[(szh->items - 1) * hlen], binhash, hlen);//存入病毒的哈希
    szh->virusnames[(szh->items - 1)] = virusname;//存入病毒名称

    return 0;
}

接着就是循环调用读取文件中的sig然后解析,添加到这个哈希数组中。

我们加载完病毒库后,需要对其进行编译。

recvloop=>reload_db=>reload_th=>cl_engine_compile


cl_error_t cl_engine_compile(struct cl_engine *engine)
{
......
    if (engine->hm_hdb)
        hm_flush(engine->hm_hdb);//编译哈希数组
    TASK_COMPLETE();
.....
    engine->dboptions |= CL_DB_COMPILED;
    return CL_SUCCESS;
}

recvloop=>reload_db=>reload_th=>cl_engine_compile

/* flush both size-specific and agnostic hash sets */
void hm_flush(struct cli_matcher *root)
{
    enum CLI_HASH_TYPE type;
    unsigned int keylen;
    struct cli_sz_hash *szh;

    if (!root)
        return;

    for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
        struct cli_htu32 *ht                 = &root->hm.sizehashes[type];
        const struct cli_htu32_element *item = NULL;
        szh                                  = NULL;

        if (!root->hm.sizehashes[type].capacity)
            continue;

        while ((item = cli_htu32_next(ht, item))) {
            szh    = (struct cli_sz_hash *)item->data.as_ptr;
            keylen = hashlen[type];

            if (szh->items > 1)
                hm_sort(szh, 0, szh->items, keylen);//排序
        }
    }

    for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
        szh    = &root->hwild.hashes[type];
        keylen = hashlen[type];

        if (szh->items > 1)
            hm_sort(szh, 0, szh->items, keylen);
    }
}

recvloop=>reload_db=>reload_th=>cl_engine_compile=>hm_sort

/*md5的长度都是固定的, 所以每个元素的长度也是固定,然后对其进行排序*/
static void hm_sort(struct cli_sz_hash *szh, size_t l, size_t r, unsigned int keylen)
{
    uint8_t piv[CLI_HASHLEN_MAX], tmph[CLI_HASHLEN_MAX];
    size_t l1, r1;

    const char *tmpv;

    if (l + 1 >= r)
        return;

    l1 = l + 1, r1 = r;

    memcpy(piv, &szh->hash_array[keylen * l], keylen);
    while (l1 < r1) {
        if (hm_cmp(&szh->hash_array[keylen * l1], piv, keylen) > 0) {
            r1--;
            if (l1 == r1) break;
            memcpy(tmph, &szh->hash_array[keylen * l1], keylen);
            tmpv = szh->virusnames[l1];
            memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * r1], keylen);
            szh->virusnames[l1] = szh->virusnames[r1];
            memcpy(&szh->hash_array[keylen * r1], tmph, keylen);
            szh->virusnames[r1] = tmpv;
        } else
            l1++;
    }

    l1--;
    if (l1 != l) {
        memcpy(tmph, &szh->hash_array[keylen * l1], keylen);
        tmpv = szh->virusnames[l1];
        memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * l], keylen);
        szh->virusnames[l1] = szh->virusnames[l];
        memcpy(&szh->hash_array[keylen * l], tmph, keylen);
        szh->virusnames[l] = tmpv;
    }

    hm_sort(szh, l, l1, keylen);
    hm_sort(szh, r1, r, keylen);
}

到此,整个病毒库的加载就算是结束了。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值