基本流程可以参考clamav中clamdscan --version 不生效
我们直接从解析command开始。parse_command函数返回COMMAND_RELOAD类型。然后进入execute_or_dispatch_command函数处理。
recvloop=>parse_dispatch_cmd=>execute_or_dispatch_command
/* returns:
* <0 for error
* -1 out of memory
* -2 other
* 0 for async dispatched
* 1 for command completed (connection can be closed)
*/
int execute_or_dispatch_command(client_conn_t *conn, enum commands cmd, const char *argument)
{
int desc = conn->sd;
char term = conn->term;
const struct cl_engine *engine = conn->engine;
/* execute commands that can be executed quickly on the recvloop thread,
* these must:
* - not involve any operation that can block for a long time, such as disk
* I/O
* - send of atomic message is allowed.
* Dispatch other commands */
if (conn->group) {
switch (cmd) {
case COMMAND_FILDES:
case COMMAND_SCAN:
case COMMAND_END:
case COMMAND_INSTREAM:
case COMMAND_INSTREAMSCAN:
case COMMAND_VERSION:
case COMMAND_PING:
case COMMAND_STATS:
case COMMAND_COMMANDS:
/* These commands are accepted inside IDSESSION */
break;
default:
/* these commands are not recognized inside an IDSESSION */
conn_reply_error(conn, "Command invalid inside IDSESSION.");
logg(LOGG_DEBUG_NV, "SESSION: command is not valid inside IDSESSION: %d\n", cmd);
conn->group = NULL;
return 1;
}
}
switch (cmd) {
case COMMAND_SHUTDOWN:
pthread_mutex_lock(&exit_mutex);
progexit = 1;
pthread_mutex_unlock(&exit_mutex);
return 1;
case COMMAND_RELOAD:
pthread_mutex_lock(&reload_mutex);
reload = 1;//设置reload标志
pthread_mutex_unlock(&reload_mutex);
mdprintf(desc, "RELOADING%c", term);
/* we set reload flag, and we'll reload before closing the
* connection */
return 1;
......
}
}
然后返回到recvloop函数中,进行加载病毒库。由于代码太长,只贴出部分代码。
recvloop:
/* DB reload */
pthread_mutex_lock(&reload_mutex);
if (reload) {
pthread_mutex_unlock(&reload_mutex);
/* Reload was requested */
pthread_mutex_lock(&reload_stage_mutex);
if (reload_stage == RELOAD_STAGE__IDLE) {
/* Reloading not already taking place */
reload_stage = RELOAD_STAGE__RELOADING;
pthread_mutex_unlock(&reload_stage_mutex);
if (CL_SUCCESS != reload_db(&engine, dboptions, opts, thr_pool)) {
logg(LOGG_WARNING, "Database reload setup failed, keeping the previous instance\n");
pthread_mutex_lock(&reload_mutex);
reload = 0;
pthread_mutex_unlock(&reload_mutex);
pthread_mutex_lock(&reload_stage_mutex);
reload_stage = RELOAD_STAGE__IDLE;
pthread_mutex_unlock(&reload_stage_mutex);
}
pthread_mutex_lock(&reload_stage_mutex);
}
if (reload_stage == RELOAD_STAGE__NEW_DB_AVAILABLE) {
/* New database available */
if (g_newengine) {
/* Reload succeeded */
logg(LOGG_INFO, "Activating the newly loaded database...\n");
thrmgr_setactiveengine(g_newengine);
if (optget(opts, "ConcurrentDatabaseReload")->enabled) {
/* If concurrent database reload, we now need to free the old engine. */
cl_engine_free(engine);
}
engine = g_newengine;
g_newengine = NULL;
} else {
logg(LOGG_WARNING, "Database reload failed, keeping the previous instance\n");
}
reload_stage = RELOAD_STAGE__IDLE;
pthread_mutex_unlock(&reload_stage_mutex);
pthread_mutex_lock(&reload_mutex);
reload = 0;
pthread_mutex_unlock(&reload_mutex);
time(&reloaded_time);
} else {
pthread_mutex_unlock(&reload_stage_mutex);
}
} else {
pthread_mutex_unlock(&reload_mutex);
}
recvloop=>reload_db
/**
* @brief Reload the database.
*
* @param[in,out] engine The current scan engine, used to copy the settings.
* @param dboptions The current database options, used to copy the options.
* @param opts The command line options, used to get the database directory.
* @return cl_error_t CL_SUCCESS if the reload thread was successfully started. This does not mean that the database has reloaded successfully.
*/
static cl_error_t reload_db(struct cl_engine **engine, unsigned int dboptions, const struct optstruct *opts, threadpool_t *thr_pool)
{
cl_error_t status = CL_EMALFDB;
cl_error_t retval;
struct reload_th_t *rldata = NULL;
pthread_t th;
pthread_attr_t th_attr;
if (NULL == opts || NULL == engine) {
logg(LOGG_ERROR, "reload_db: Invalid arguments, unable to load signature databases.\n");
status = CL_EARG;
goto done;
}
rldata = malloc(sizeof(struct reload_th_t));
if (!rldata) {
logg(LOGG_ERROR, "Failed to allocate reload context\n");
status = CL_EMEM;
goto done;
}
memset(rldata, 0, sizeof(struct reload_th_t));
rldata->dboptions = dboptions;
if (*engine) {
/* copy current settings */
rldata->settings = cl_engine_settings_copy(*engine);//备份旧引擎的配置信息,在后面创建完新引擎后,复制过去
if (!rldata->settings) {
logg(LOGG_ERROR, "Can't make a copy of the current engine settings\n");
goto done;
}
}
rldata->dbdir = strdup(optget(opts, "DatabaseDirectory")->strarg);//获取配置文件中的指定的病毒库路径
if (!rldata->dbdir) {
logg(LOGG_ERROR, "Can't duplicate the database directory path\n");
goto done;
}
if (dbstat.entries) {
cl_statfree(&dbstat);
}
memset(&dbstat, 0, sizeof(struct cl_stat));
retval = cl_statinidir(rldata->dbdir, &dbstat);//检查路径下的文件
if (CL_SUCCESS != retval) {
logg(LOGG_ERROR, "cl_statinidir() failed: %s\n", cl_strerror(retval));
goto done;
}
if (*engine) {
if (!optget(opts, "ConcurrentDatabaseReload")->enabled) {
/*
* If concurrent reload disabled, we'll NULL out the current engine and deref it.
* It will only actually be free'd once the last scan finishes.
*/
thrmgr_setactiveengine(NULL);
cl_engine_free(*engine);
*engine = NULL;
/* Wait for all scans to finish */
thrmgr_wait_for_threads(thr_pool);
}
}
if (pthread_attr_init(&th_attr)) {
logg(LOGG_ERROR, "Failed to init reload thread attributes\n");
goto done;
}
if (optget(opts, "ConcurrentDatabaseReload")->enabled) {
/* For concurrent reloads: set detached, so we don't leak thread resources */
pthread_attr_setdetachstate(&th_attr, PTHREAD_CREATE_DETACHED);
}
retval = pthread_create(&th, &th_attr, reload_th, rldata);//启动线程进行加载病毒库
if (pthread_attr_destroy(&th_attr))
logg(LOGG_WARNING, "Failed to release reload thread attributes\n");
if (retval) {
logg(LOGG_ERROR, "Failed to spawn reload thread\n");
goto done;
}
if (!optget(opts, "ConcurrentDatabaseReload")->enabled) {
/* For non-concurrent reloads: join the thread */
int join_ret = pthread_join(th, NULL);
switch (join_ret) {
case 0:
logg(LOGG_INFO, "Database reload completed.\n");
break;
case EDEADLK:
logg(LOGG_ERROR, "A deadlock was detected when waiting for the database reload thread.\n");
goto done;
case ESRCH:
logg(LOGG_ERROR, "Failed to find database reload thread.\n");
goto done;
case EINVAL:
logg(LOGG_ERROR, "The database reload thread is not a joinable thread.\n");
goto done;
default:
logg(LOGG_ERROR, "An unknown error occured when waiting for the database reload thread: %d\n", join_ret);
goto done;
}
}
status = CL_SUCCESS;
done:
if (CL_SUCCESS != status) {
/*
* Failed to spawn reload thread, so we're responsible for cleaning up
* the rldata structure.
*/
if (NULL != rldata) {
if (NULL != rldata->settings) {
cl_engine_settings_free(rldata->settings);
}
if (NULL != rldata->dbdir) {
free(rldata->dbdir);
}
free(rldata);
}
}
return status;
}
recvloop=>reload_db=>reload_th
/**
* @brief Thread entry point to load the signature databases & compile a new scanning engine.
*
* Once loaded, an event will be set to indicate that the new engine is ready.
*
* @param arg A reload_th_t structure defining the db directory, db settings, engine settings.
* @return void*
*/
static void *reload_th(void *arg)
{
cl_error_t status = CL_EMALFDB;
struct reload_th_t *rldata = arg;
struct cl_engine *engine = NULL;
unsigned int sigs = 0;
int retval;
if (NULL == rldata || NULL == rldata->dbdir || NULL == rldata->settings) {
logg(LOGG_ERROR, "reload_th: Invalid arguments, unable to load signature databases.\n");
status = CL_EARG;
goto done;
}
logg(LOGG_INFO, "Reading databases from %s\n", rldata->dbdir);
if (NULL == (engine = cl_engine_new())) {//创建引擎对象
logg(LOGG_ERROR, "reload_th: Can't initialize antivirus engine\n");
goto done;
}
retval = cl_engine_settings_apply(engine, rldata->settings);//复制旧引擎的配置过来
if (CL_SUCCESS != retval) {
logg(LOGG_ERROR, "reload_th: Failed to apply previous engine settings: %s\n", cl_strerror(retval));
status = CL_EMEM;
goto done;
}
retval = cl_load(rldata->dbdir, engine, &sigs, rldata->dboptions);//加载病毒库
if (CL_SUCCESS != retval) {
logg(LOGG_ERROR, "reload_th: Database load failed: %s\n", cl_strerror(retval));
goto done;
}
retval = cl_engine_compile(engine);//编译病毒库
if (CL_SUCCESS != retval) {
logg(LOGG_ERROR, "reload_th: Database initialization error: can't compile engine: %s\n", cl_strerror(retval));
goto done;
}
logg(LOGG_INFO, "Database correctly reloaded (%u signatures)\n", sigs);
status = CL_SUCCESS;
done:
if (NULL != rldata) {
if (NULL != rldata->settings) {
cl_engine_settings_free(rldata->settings);
}
if (NULL != rldata->dbdir) {
free(rldata->dbdir);
}
free(rldata);
}
if (CL_SUCCESS != status) {
if (NULL != engine) {
cl_engine_free(engine);
engine = NULL;
}
}
pthread_mutex_lock(&reload_stage_mutex);
reload_stage = RELOAD_STAGE__NEW_DB_AVAILABLE; /* New DB available */
g_newengine = engine;
pthread_mutex_unlock(&reload_stage_mutex);
#ifdef _WIN32
SetEvent(event_wake_recv);
#else
if (syncpipe_wake_recv_w != -1)
if (write(syncpipe_wake_recv_w, "", 1) != 1)
logg(LOGG_DEBUG_NV, "Failed to write to syncpipe\n");
#endif
return NULL;
}
recvloop=>reload_db=>reload_th=>cl_load
cl_error_t cl_load(const char *path, struct cl_engine *engine, unsigned int *signo, unsigned int dboptions)
{
STATBUF sb;
int ret;
if (!engine) {
cli_errmsg("cl_load: engine == NULL\n");
return CL_ENULLARG;
}
if (engine->dboptions & CL_DB_COMPILED) {
cli_errmsg("cl_load(): can't load new databases when engine is already compiled\n");
return CL_EARG;
}
if (CLAMSTAT(path, &sb) == -1) {//检查访问权限
switch (errno) {
#if defined(EACCES)
case EACCES:
cli_errmsg("cl_load(): Access denied for path: %s\n", path);
break;
#endif
#if defined(ENOENT)
case ENOENT:
cli_errmsg("cl_load(): No such file or directory: %s\n", path);
break;
#endif
#if defined(ELOOP)
case ELOOP:
cli_errmsg("cl_load(): Too many symbolic links encountered in path: %s\n", path);
break;
#endif
#if defined(EOVERFLOW)
case EOVERFLOW:
cli_errmsg("cl_load(): File size is too large to be recognized. Path: %s\n", path);
break;
#endif
#if defined(EIO)
case EIO:
cli_errmsg("cl_load(): An I/O error occurred while reading from path: %s\n", path);
break;
#endif
default:
cli_errmsg("cl_load: Can't get status of: %s\n", path);
break;
}
return CL_ESTAT;
}
if ((dboptions & CL_DB_PHISHING_URLS) && !engine->phishcheck && (engine->dconf->phishing & PHISHING_CONF_ENGINE))
if (CL_SUCCESS != (ret = phishing_init(engine)))
return ret;
if ((dboptions & CL_DB_BYTECODE) && !engine->bcs.inited) {
if (CL_SUCCESS != (ret = cli_bytecode_init(&engine->bcs)))
return ret;
} else {
cli_dbgmsg("Bytecode engine disabled\n");
}
if (!engine->cache && cli_cache_init(engine))
return CL_EMEM;
engine->dboptions |= dboptions;
switch (sb.st_mode & S_IFMT) {//检查路径是目录还是文件,目录需要按优先级循环读取所有病毒库文件,我们这里是个目录。
case S_IFREG:
/* Count # of sigs in the database now */
engine->num_total_signatures += count_signatures(path, engine, dboptions);
ret = cli_load(path, engine, signo, dboptions, NULL);
break;
case S_IFDIR:
/* Count # of signatures inside cli_loaddbdir(), before loading */
ret = cli_loaddbdir(path, engine, signo, dboptions | CL_DB_DIRECTORY);
break;
default:
cli_errmsg("cl_load(%s): Not supported database file type\n", path);
return CL_EOPEN;
}
if (engine->cb_sigload_progress) {
/* Let the progress callback function know we're done! */
(void)engine->cb_sigload_progress(*signo, *signo, engine->cb_sigload_progress_ctx);
}
#ifdef YARA_PROTO
if (yara_total) {
cli_yaramsg("$$$$$$$$$$$$ YARA $$$$$$$$$$$$\n");
cli_yaramsg("\tTotal Rules: %u\n", yara_total);
cli_yaramsg("\tRules Loaded: %u\n", yara_loaded);
cli_yaramsg("\tComplex Conditions: %u\n", yara_complex);
cli_yaramsg("\tMalformed/Unsupported Rules: %u\n", yara_malform);
cli_yaramsg("\tEmpty Rules: %u\n", yara_empty);
cli_yaramsg("$$$$$$$$$$$$ YARA $$$$$$$$$$$$\n");
}
#endif
return ret;
}
recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir
static cl_error_t cli_loaddbdir(const char *dirname, struct cl_engine *engine, unsigned int *signo, unsigned int options)
{
cl_error_t ret = CL_EOPEN;
DIR *dd = NULL;
struct dirent *dent;
char *dbfile = NULL;
int ends_with_sep = 0;
size_t dirname_len;
struct cl_cvd *daily_cld = NULL;
struct cl_cvd *daily_cvd = NULL;
struct db_ll_entry *head = NULL;
struct db_ll_entry *iter;
struct db_ll_entry *next;
cli_dbgmsg("Loading databases from %s\n", dirname);
if ((dd = opendir(dirname)) == NULL) {//打开目录
cli_errmsg("cli_loaddbdir: Can't open directory %s\n", dirname);
ret = CL_EOPEN;
goto done;
}
dirname_len = strlen(dirname);
if (dirname_len >= strlen(PATHSEP)) {
if (strcmp(dirname + dirname_len - strlen(PATHSEP), PATHSEP) == 0) {
cli_dbgmsg("cli_loaddbdir: dirname ends with separator\n");
ends_with_sep = 1;
}
}
while ((dent = readdir(dd))) {//循环读取目录下的文件
struct db_ll_entry *entry;
unsigned int load_priority;
if (!dent->d_ino) {
continue;
}
if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
continue;
}
if (!CLI_DBEXT(dent->d_name)) {
continue;
}
dbfile = (char *)cli_malloc(strlen(dent->d_name) + dirname_len + 2);
if (!dbfile) {
cli_errmsg("cli_loaddbdir: dbfile == NULL\n");
ret = CL_EMEM;
goto done;
}
if (ends_with_sep)
sprintf(dbfile, "%s%s", dirname, dent->d_name);
else
sprintf(dbfile, "%s" PATHSEP "%s", dirname, dent->d_name);
#define DB_LOAD_PRIORITY_IGN 1
#define DB_LOAD_PRIORITY_DAILY_CLD 2
#define DB_LOAD_PRIORITY_DAILY_CVD 3
#define DB_LOAD_PRIORITY_LOCAL_GDB 4
#define DB_LOAD_PRIORITY_DAILY_CFG 5
#define DB_LOAD_PRIORITY_CRB 6
#define DB_LOAD_PRIORITY_NORMAL 7
if (cli_strbcasestr(dent->d_name, ".ign") || cli_strbcasestr(dent->d_name, ".ign2")) {
/* load .ign and .ign2 files first */
load_priority = DB_LOAD_PRIORITY_IGN;
engine->num_total_signatures += count_line_based_signatures(dbfile);
} else if (!strcmp(dent->d_name, "daily.cld")) {
/* The daily db must be loaded before main, this way, the
daily ign & ign2 signatures prevent ign'ored signatures
in all databases from being loaded. */
load_priority = DB_LOAD_PRIORITY_DAILY_CLD;
if (0 == access(dbfile, R_OK)) {
daily_cld = cl_cvdhead(dbfile);
if (!daily_cld) {
cli_errmsg("cli_loaddbdir: error parsing header of %s\n", dbfile);
ret = CL_EMALFDB;
goto done;
}
/* Successfully opened the daily CLD file and read the header info. */
engine->num_total_signatures += daily_cld->sigs;
} else {
free(dbfile);
dbfile = NULL;
continue;
}
} else if (!strcmp(dent->d_name, "daily.cvd")) {
load_priority = DB_LOAD_PRIORITY_DAILY_CVD;
if (0 == access(dbfile, R_OK)) {
daily_cvd = cl_cvdhead(dbfile);
if (!daily_cvd) {
cli_errmsg("cli_loaddbdir: error parsing header of %s\n", dbfile);
ret = CL_EMALFDB;
goto done;
}
/* Successfully opened the daily CVD file and ready the header info. */
engine->num_total_signatures += daily_cvd->sigs;
} else {
free(dbfile);
dbfile = NULL;
continue;
}
} else if (!strcmp(dent->d_name, "local.gdb")) {
load_priority = DB_LOAD_PRIORITY_LOCAL_GDB;
engine->num_total_signatures += count_line_based_signatures(dbfile);
} else if (!strcmp(dent->d_name, "daily.cfg")) {
load_priority = DB_LOAD_PRIORITY_DAILY_CFG;
engine->num_total_signatures += count_line_based_signatures(dbfile);
} else if ((options & CL_DB_OFFICIAL_ONLY) &&
!strstr(dirname, "clamav-") && // Official databases that are temp-files (in the process of updating).
!cli_strbcasestr(dent->d_name, ".cld") && // Official databases that have been updated using incremental updates.
!cli_strbcasestr(dent->d_name, ".cvd")) { // Official databases.
// TODO Should this be higher up in the list? Should we
// ignore .ign/.ign2 files and the local.gdb file when this
// flag is set?
cli_dbgmsg("Skipping unofficial database %s\n", dent->d_name);
free(dbfile);
dbfile = NULL;
continue;
} else if (cli_strbcasestr(dent->d_name, ".crb")) {
/* .cat files cannot be loaded successfully unless there are .crb
* rules that trust the certs used to sign the catalog files.
* Therefore, we need to ensure the .crb rules are loaded prior */
load_priority = DB_LOAD_PRIORITY_CRB;
engine->num_total_signatures += count_line_based_signatures(dbfile);
} else {
load_priority = DB_LOAD_PRIORITY_NORMAL;
engine->num_total_signatures += count_signatures(dbfile, engine, options);//统计累加特征库中的特征条数
}
entry = malloc(sizeof(*entry));
if (NULL == entry) {
cli_errmsg("cli_loaddbdir: failed to allocate memory for database load list entry\n");
ret = CL_EMEM;
goto done;
}
entry->path = dbfile;//保存文件名
dbfile = NULL;
entry->load_priority = load_priority;
cli_insertdbtoll(&head, entry);//添加到head链表中
}
/* The list entries are stored in priority order, so now just loop through
* and load everything.
* NOTE: If there's a daily.cld and a daily.cvd, we'll only load whichever
* has the highest version number. If they have the same version number
* we load daily.cld, since that will load faster (it won't attempt to
* verify the digital signature of the db).
*
* TODO It'd be ideal if we treated all cld/cvd pairs like we do the daily
* ones, and only loaded the one with the highest version. */
for (iter = head; iter != NULL; iter = iter->next) {//循环遍历加载病毒库
if (DB_LOAD_PRIORITY_DAILY_CLD == iter->load_priority) {
/* iter is the daily.cld. If we also have the cvd and the cvd is newer, skip the cld. */
if ((NULL != daily_cvd) && (daily_cld->version < daily_cvd->version)) {
continue;
}
} else if (DB_LOAD_PRIORITY_DAILY_CVD == iter->load_priority) {
/* iter is the daily.cvd. If we also have the cld and the cld is same or newer, skip the cvd. */
if ((NULL != daily_cld) && (daily_cld->version >= daily_cvd->version)) {
continue;
}
}
ret = cli_load(iter->path, engine, signo, options, NULL);
if (ret) {
cli_errmsg("cli_loaddbdir: error loading database %s\n", iter->path);
goto done;
}
}
done:
for (iter = head; iter != NULL; iter = next) {
next = iter->next;
free(iter->path);
free(iter);
}
if (NULL != dbfile) {
free(dbfile);
}
if (NULL != dd) {
closedir(dd);
}
if (NULL != daily_cld) {
cl_cvdfree(daily_cld);
}
if (NULL != daily_cvd) {
cl_cvdfree(daily_cvd);
}
if (ret == CL_EOPEN)
cli_errmsg("cli_loaddbdir: No supported database files found in %s\n", dirname);
return ret;
}
recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>count_signatures
/**
* @brief Count the number of signatures in a database file.
*
* Non-database files will be ignored, and count as 0 signatures.
* Database validation is not done, just signature counting.
*
* CVD/CLD/CUD database archives are not counted the hard way, we just trust
* signature count in the header. Yara rules and bytecode sigs count as 1 each.
*
* @param filepath Filepath of the database file to count.
* @return size_t The number of signatures.
*/
static size_t count_signatures(const char *filepath, struct cl_engine *engine, unsigned int options)
{
size_t num_signatures = 0;
struct cl_cvd *db_archive_header = NULL;
if (cli_strbcasestr(filepath, ".cld") ||
cli_strbcasestr(filepath, ".cvd") ||
cli_strbcasestr(filepath, ".cud")) {
/* use the CVD head to get the sig count. */
if (0 == access(filepath, R_OK)) {
db_archive_header = cl_cvdhead(filepath);//读取文件头信息,里面包含了sig个数
if (!db_archive_header) {
cli_errmsg("cli_loaddbdir: error parsing header of %s\n", filepath);
goto done;
}
num_signatures += db_archive_header->sigs;//累加sig 个数
}
} else if ((CL_BYTECODE_TRUST_ALL == engine->bytecode_security) &&
cli_strbcasestr(filepath, ".cbc")) {
/* Counts as 1 signature if loading plain .cbc files. */
num_signatures += 1;
} else if ((options & CL_DB_YARA_ONLY) &&
(cli_strbcasestr(filepath, ".yar") || cli_strbcasestr(filepath, ".yara"))) {
/* Counts as 1 signature. */
num_signatures += 1;
} else if (cli_strbcasestr(filepath, ".db") ||
cli_strbcasestr(filepath, ".crb") ||
cli_strbcasestr(filepath, ".hdb") || cli_strbcasestr(filepath, ".hsb") ||
cli_strbcasestr(filepath, ".hdu") || cli_strbcasestr(filepath, ".hsu") ||
cli_strbcasestr(filepath, ".fp") || cli_strbcasestr(filepath, ".sfp") ||
cli_strbcasestr(filepath, ".mdb") || cli_strbcasestr(filepath, ".msb") ||
cli_strbcasestr(filepath, ".imp") ||
cli_strbcasestr(filepath, ".mdu") || cli_strbcasestr(filepath, ".msu") ||
cli_strbcasestr(filepath, ".ndb") || cli_strbcasestr(filepath, ".ndu") || cli_strbcasestr(filepath, ".sdb") ||
cli_strbcasestr(filepath, ".ldb") || cli_strbcasestr(filepath, ".ldu") ||
cli_strbcasestr(filepath, ".zmd") || cli_strbcasestr(filepath, ".rmd") ||
cli_strbcasestr(filepath, ".cfg") ||
cli_strbcasestr(filepath, ".wdb") ||
cli_strbcasestr(filepath, ".pdb") || cli_strbcasestr(filepath, ".gdb") ||
cli_strbcasestr(filepath, ".ftm") ||
cli_strbcasestr(filepath, ".ign") || cli_strbcasestr(filepath, ".ign2") ||
cli_strbcasestr(filepath, ".idb") ||
cli_strbcasestr(filepath, ".cdb") ||
cli_strbcasestr(filepath, ".cat") ||
cli_strbcasestr(filepath, ".ioc") ||
cli_strbcasestr(filepath, ".pwdb")) {
/* Should be a line-based signaure file, count it the old fashioned way */
num_signatures += count_line_based_signatures(filepath);
}
done:
if (NULL != db_archive_header) {
cl_cvdfree(db_archive_header);
}
return num_signatures;
}
recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load
cl_error_t cli_load(const char *filename, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio)
{
cl_error_t ret = CL_SUCCESS;
FILE *fs = NULL;
uint8_t skipped = 0;
const char *dbname;
char buff[FILEBUFF];
if (dbio && dbio->chkonly) {
while (cli_dbgets(buff, FILEBUFF, NULL, dbio)) continue;
return CL_SUCCESS;
}
if (!dbio && (fs = fopen(filename, "rb")) == NULL) {//打开文件
if (options & CL_DB_DIRECTORY) { /* bb#1624 */
if (access(filename, R_OK)) {
if (errno == ENOENT) {
cli_dbgmsg("Detected race condition, ignoring old file %s\n", filename);
return CL_SUCCESS;
}
}
}
cli_errmsg("cli_load(): Can't open file %s\n", filename);
return CL_EOPEN;
}
if ((dbname = strrchr(filename, *PATHSEP)))
dbname++;
else
dbname = filename;
#ifdef HAVE_YARA
if (options & CL_DB_YARA_ONLY) {
if (cli_strbcasestr(dbname, ".yar") || cli_strbcasestr(dbname, ".yara"))
ret = cli_loadyara(fs, engine, signo, options, dbio, filename);
else
skipped = 1;
} else
#endif
if (cli_strbcasestr(dbname, ".db")) {
ret = cli_loaddb(fs, engine, signo, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".cvd")) {//先走这里
ret = cli_cvdload(fs, engine, signo, options, 0, filename, 0);
} else if (cli_strbcasestr(dbname, ".cld")) {
ret = cli_cvdload(fs, engine, signo, options, 1, filename, 0);
} else if (cli_strbcasestr(dbname, ".cud")) {
ret = cli_cvdload(fs, engine, signo, options, 2, filename, 0);
} else if (cli_strbcasestr(dbname, ".crb")) {
ret = cli_loadcrt(fs, engine, dbio);
} else if (cli_strbcasestr(dbname, ".hdb") || cli_strbcasestr(dbname, ".hsb")) {
ret = cli_loadhash(fs, engine, signo, MD5_HDB, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".hdu") || cli_strbcasestr(dbname, ".hsu")) {
if (options & CL_DB_PUA)
ret = cli_loadhash(fs, engine, signo, MD5_HDB, options | CL_DB_PUA_MODE, dbio, dbname);
else
skipped = 1;
} else if (cli_strbcasestr(dbname, ".fp") || cli_strbcasestr(dbname, ".sfp")) {
ret = cli_loadhash(fs, engine, signo, MD5_FP, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".mdb") || cli_strbcasestr(dbname, ".msb")) {
ret = cli_loadhash(fs, engine, signo, MD5_MDB, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".imp")) {
ret = cli_loadhash(fs, engine, signo, MD5_IMP, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".mdu") || cli_strbcasestr(dbname, ".msu")) {
if (options & CL_DB_PUA)
ret = cli_loadhash(fs, engine, signo, MD5_MDB, options | CL_DB_PUA_MODE, dbio, dbname);
else
skipped = 1;
} else if (cli_strbcasestr(dbname, ".ndb")) {
ret = cli_loadndb(fs, engine, signo, 0, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".ndu")) {
if (!(options & CL_DB_PUA))
skipped = 1;
else
ret = cli_loadndb(fs, engine, signo, 0, options | CL_DB_PUA_MODE, dbio, dbname);
} else if (cli_strbcasestr(filename, ".ldb")) {
ret = cli_loadldb(fs, engine, signo, options, dbio, dbname);
} else if (cli_strbcasestr(filename, ".ldu")) {
if (options & CL_DB_PUA)
ret = cli_loadldb(fs, engine, signo, options | CL_DB_PUA_MODE, dbio, dbname);
else
skipped = 1;
} else if (cli_strbcasestr(filename, ".cbc")) {
if (options & CL_DB_BYTECODE)
ret = cli_loadcbc(fs, engine, signo, options, dbio, dbname);
else
skipped = 1;
} else if (cli_strbcasestr(dbname, ".sdb")) {
ret = cli_loadndb(fs, engine, signo, 1, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".zmd")) {
ret = cli_loadmd(fs, engine, signo, 1, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".rmd")) {
ret = cli_loadmd(fs, engine, signo, 2, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".cfg")) {
ret = cli_dconf_load(fs, engine, options, dbio);
} else if (cli_strbcasestr(dbname, ".info")) {
ret = cli_loadinfo(fs, engine, options, dbio);
} else if (cli_strbcasestr(dbname, ".wdb")) {
if (options & CL_DB_PHISHING_URLS) {
ret = cli_loadwdb(fs, engine, options, dbio);
} else
skipped = 1;
} else if (cli_strbcasestr(dbname, ".pdb") || cli_strbcasestr(dbname, ".gdb")) {
if (options & CL_DB_PHISHING_URLS) {
ret = cli_loadpdb(fs, engine, signo, options, dbio);
} else
skipped = 1;
} else if (cli_strbcasestr(dbname, ".ftm")) {
ret = cli_loadftm(fs, engine, options, 0, dbio);
} else if (cli_strbcasestr(dbname, ".ign") || cli_strbcasestr(dbname, ".ign2")) {
ret = cli_loadign(fs, engine, options, dbio);
} else if (cli_strbcasestr(dbname, ".idb")) {
ret = cli_loadidb(fs, engine, signo, options, dbio);
} else if (cli_strbcasestr(dbname, ".cdb")) {
ret = cli_loadcdb(fs, engine, signo, options, dbio);
} else if (cli_strbcasestr(dbname, ".cat")) {
ret = cli_loadmscat(fs, dbname, engine, options, dbio);
} else if (cli_strbcasestr(dbname, ".ioc")) {
ret = cli_loadopenioc(fs, dbname, engine, options);
#ifdef HAVE_YARA
} else if (cli_strbcasestr(dbname, ".yar") || cli_strbcasestr(dbname, ".yara")) {
if (!(options & CL_DB_YARA_EXCLUDE))
ret = cli_loadyara(fs, engine, signo, options, dbio, filename);
else
skipped = 1;
#endif
} else if (cli_strbcasestr(dbname, ".pwdb")) {
ret = cli_loadpwdb(fs, engine, options, 0, dbio);
} else {
cli_warnmsg("cli_load: unknown extension - skipping %s\n", filename);
skipped = 1;
}
if (ret) {
cli_errmsg("Can't load %s: %s\n", filename, cl_strerror(ret));
} else {
if (skipped)
cli_dbgmsg("%s skipped\n", filename);
else
cli_dbgmsg("%s loaded\n", filename);
}
if (fs)
fclose(fs);
if (engine->cb_sigload_progress) {
/* Let the progress callback function know how we're doing */
(void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx);
}
return ret;
}
recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload
int cli_cvdload(FILE *fs, struct cl_engine *engine, unsigned int *signo, unsigned int options, unsigned int dbtype, const char *filename, unsigned int chkonly)
{
struct cl_cvd cvd, dupcvd;
FILE *dupfs;
int ret;
time_t s_time;
int cfd;
struct cli_dbio dbio;
struct cli_dbinfo *dbinfo = NULL;
char *dupname;
dbio.hashctx = NULL;
cli_dbgmsg("in cli_cvdload()\n");
/* verify */
if ((ret = cli_cvdverify(fs, &cvd, dbtype)))
return ret;
if (dbtype <= 1) {
/* check for duplicate db */
dupname = cli_strdup(filename);
if (!dupname)
return CL_EMEM;
dupname[strlen(dupname) - 2] = (dbtype == 1 ? 'v' : 'l');
if (!access(dupname, R_OK) && (dupfs = fopen(dupname, "rb"))) {
if ((ret = cli_cvdverify(dupfs, &dupcvd, !dbtype))) {
fclose(dupfs);
free(dupname);
return ret;
}
fclose(dupfs);
if (dupcvd.version > cvd.version) {
cli_warnmsg("Detected duplicate databases %s and %s. The %s database is older and will not be loaded, you should manually remove it from the database directory.\n", filename, dupname, filename);
free(dupname);
return CL_SUCCESS;
} else if (dupcvd.version == cvd.version && !dbtype) {
cli_warnmsg("Detected duplicate databases %s and %s, please manually remove one of them\n", filename, dupname);
free(dupname);
return CL_SUCCESS;
}
}
free(dupname);
}
if (strstr(filename, "daily.")) {
time(&s_time);
if (cvd.stime > s_time) {
if (cvd.stime - (unsigned int)s_time > 3600) {
cli_warnmsg("******************************************************\n");
cli_warnmsg("*** Virus database timestamp in the future! ***\n");
cli_warnmsg("*** Please check the timezone and clock settings ***\n");
cli_warnmsg("******************************************************\n");
}
} else if ((unsigned int)s_time - cvd.stime > 604800) {
cli_warnmsg("**************************************************\n");
cli_warnmsg("*** The virus database is older than 7 days! ***\n");
cli_warnmsg("*** Please update it as soon as possible. ***\n");
cli_warnmsg("**************************************************\n");
}
engine->dbversion[0] = cvd.version;
engine->dbversion[1] = cvd.stime;
}
if (cvd.fl > cl_retflevel()) {
cli_warnmsg("*******************************************************************\n");
cli_warnmsg("*** This version of the ClamAV engine is outdated. ***\n");
cli_warnmsg("*** Read https://docs.clamav.net/manual/Installing.html ***\n");
cli_warnmsg("*******************************************************************\n");
}
cfd = fileno(fs);
dbio.chkonly = 0;
if (dbtype == 2)
ret = cli_tgzload(cfd, engine, signo, options | CL_DB_UNSIGNED, &dbio, NULL);
else
ret = cli_tgzload(cfd, engine, signo, options | CL_DB_OFFICIAL, &dbio, NULL);//加载病毒库
if (ret != CL_SUCCESS)
return ret;
dbinfo = engine->dbinfo;
if (!dbinfo || !dbinfo->cvd || (dbinfo->cvd->version != cvd.version) || (dbinfo->cvd->sigs != cvd.sigs) || (dbinfo->cvd->fl != cvd.fl) || (dbinfo->cvd->stime != cvd.stime)) {
cli_errmsg("cli_cvdload: Corrupted CVD header\n");
return CL_EMALFDB;
}
dbinfo = engine->dbinfo ? engine->dbinfo->next : NULL;
if (!dbinfo) {
cli_errmsg("cli_cvdload: dbinfo error\n");
return CL_EMALFDB;
}
dbio.chkonly = chkonly;
if (dbtype == 2)
options |= CL_DB_UNSIGNED;
else
options |= CL_DB_SIGNED | CL_DB_OFFICIAL;
ret = cli_tgzload(cfd, engine, signo, options, &dbio, dbinfo);
while (engine->dbinfo) {
dbinfo = engine->dbinfo;
engine->dbinfo = dbinfo->next;
MPOOL_FREE(engine->mempool, dbinfo->name);
MPOOL_FREE(engine->mempool, dbinfo->hash);
if (dbinfo->cvd)
cl_cvdfree(dbinfo->cvd);
MPOOL_FREE(engine->mempool, dbinfo);
}
return ret;
}
recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload
static int cli_tgzload(int fd, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio, struct cli_dbinfo *dbinfo)
{
char osize[13], name[101];
char block[TAR_BLOCKSIZE];
int nread, fdd, ret;
unsigned int type, size, pad, compr = 1;
off_t off;
struct cli_dbinfo *db;
char hash[32];
cli_dbgmsg("in cli_tgzload()\n");
if (lseek(fd, 512, SEEK_SET) < 0) {
return CL_ESEEK;
}
if (cli_readn(fd, block, 7) != 7)
return CL_EFORMAT; /* truncated file? */
if (!strncmp(block, "COPYING", 7))//检查是否为压缩文件
compr = 0;
if (lseek(fd, 512, SEEK_SET) < 0) {
return CL_ESEEK;
}
if ((fdd = dup(fd)) == -1) {
cli_errmsg("cli_tgzload: Can't duplicate descriptor %d\n", fd);
return CL_EDUP;
}
if (compr) {
if ((dbio->gzs = gzdopen(fdd, "rb")) == NULL) {//打开压缩文件
cli_errmsg("cli_tgzload: Can't gzdopen() descriptor %d, errno = %d\n", fdd, errno);
if (fdd > -1)
close(fdd);
return CL_EOPEN;
}
dbio->fs = NULL;
} else {
if ((dbio->fs = fdopen(fdd, "rb")) == NULL) {//打开普通文件
cli_errmsg("cli_tgzload: Can't fdopen() descriptor %d, errno = %d\n", fdd, errno);
if (fdd > -1)
close(fdd);
return CL_EOPEN;
}
dbio->gzs = NULL;
}
dbio->bufsize = CLI_DEFAULT_DBIO_BUFSIZE;
dbio->buf = cli_malloc(dbio->bufsize);
if (!dbio->buf) {
cli_errmsg("cli_tgzload: Can't allocate memory for dbio->buf\n");
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
dbio->bufpt = NULL;
dbio->usebuf = 1;
dbio->readpt = dbio->buf;
while (1) {
if (compr)
nread = gzread(dbio->gzs, block, TAR_BLOCKSIZE);
else
nread = fread(block, 1, TAR_BLOCKSIZE, dbio->fs);
if (!nread)
break;
if (nread != TAR_BLOCKSIZE) {
cli_errmsg("cli_tgzload: Incomplete block read\n");
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
if (block[0] == '\0') /* We're done *///读取结束
break;
strncpy(name, block, 100);
name[100] = '\0';//获取文件名
if (strchr(name, '/')) {
cli_errmsg("cli_tgzload: Slash separators are not allowed in CVD\n");
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
type = block[156];
switch (type) {
case '0':
case '\0':
break;
case '5':
cli_errmsg("cli_tgzload: Directories are not supported in CVD\n");
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
default:
cli_errmsg("cli_tgzload: Unknown type flag '%c'\n", type);
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
strncpy(osize, block + 124, 12);
osize[12] = '\0';
if ((sscanf(osize, "%o", &size)) == 0) {//获取文件大小
cli_errmsg("cli_tgzload: Invalid size in header\n");
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
dbio->size = size;
dbio->readsize = dbio->size < dbio->bufsize ? dbio->size : dbio->bufsize - 1;
dbio->bufpt = NULL;
dbio->readpt = dbio->buf;
if (!(dbio->hashctx)) {
dbio->hashctx = cl_hash_init("sha256");//初始化hash上下文,后面会用这个对文件内容进行计算hash验证
if (!(dbio->hashctx)) {
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
}
dbio->bread = 0;
/* cli_dbgmsg("cli_tgzload: Loading %s, size: %u\n", name, size); */
if (compr)
off = (off_t)gzseek(dbio->gzs, 0, SEEK_CUR);
else
off = ftell(dbio->fs);
if ((!dbinfo && cli_strbcasestr(name, ".info")) || (dbinfo && CLI_DBEXT(name))) {
ret = cli_load(name, engine, signo, options, dbio);//加载病毒库
if (ret) {
cli_errmsg("cli_tgzload: Can't load %s\n", name);
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
if (!dbinfo) {
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_SUCCESS;
} else {
db = dbinfo;
while (db && strcmp(db->name, name))
db = db->next;
if (!db) {
cli_errmsg("cli_tgzload: File %s not found in .info\n", name);
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
if (dbio->bread) {
if (db->size != dbio->bread) {
cli_errmsg("cli_tgzload: File %s not correctly loaded\n", name);
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
cl_finish_hash(dbio->hashctx, hash);
dbio->hashctx = cl_hash_init("sha256");
if (!(dbio->hashctx)) {
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
if (memcmp(db->hash, hash, 32)) {//验证hash
cli_errmsg("cli_tgzload: Invalid checksum for file %s\n", name);
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_EMALFDB;
}
}
}
}
pad = size % TAR_BLOCKSIZE ? (TAR_BLOCKSIZE - (size % TAR_BLOCKSIZE)) : 0;
if (compr) {
if (off == gzseek(dbio->gzs, 0, SEEK_CUR))
gzseek(dbio->gzs, size + pad, SEEK_CUR);
else if (pad)
gzseek(dbio->gzs, pad, SEEK_CUR);
} else {
if (off == ftell(dbio->fs))
fseek(dbio->fs, size + pad, SEEK_CUR);
else if (pad)
fseek(dbio->fs, pad, SEEK_CUR);
}
}
cli_tgzload_cleanup(compr, dbio, fdd);
return CL_SUCCESS;
}
recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load
/*我们以最简单的hdb病毒库为例,进行讲解,加压后原始内容格式如下
* 44d88612fea8a8f36de82e1278abb02f:68:Eicar-Test-Signature
*/
cl_error_t cli_load(const char *filename, struct cl_engine *engine, unsigned int *signo, unsigned int options, struct cli_dbio *dbio)
{
cl_error_t ret = CL_SUCCESS;
FILE *fs = NULL;
uint8_t skipped = 0;
const char *dbname;
char buff[FILEBUFF];
if (dbio && dbio->chkonly) {
while (cli_dbgets(buff, FILEBUFF, NULL, dbio)) continue;
return CL_SUCCESS;
}
if (!dbio && (fs = fopen(filename, "rb")) == NULL) {
if (options & CL_DB_DIRECTORY) { /* bb#1624 */
if (access(filename, R_OK)) {
if (errno == ENOENT) {
cli_dbgmsg("Detected race condition, ignoring old file %s\n", filename);
return CL_SUCCESS;
}
}
}
cli_errmsg("cli_load(): Can't open file %s\n", filename);
return CL_EOPEN;
}
if ((dbname = strrchr(filename, *PATHSEP)))
dbname++;
else
dbname = filename;
#ifdef HAVE_YARA
if (options & CL_DB_YARA_ONLY) {
if (cli_strbcasestr(dbname, ".yar") || cli_strbcasestr(dbname, ".yara"))
ret = cli_loadyara(fs, engine, signo, options, dbio, filename);
else
skipped = 1;
} else
#endif
if (cli_strbcasestr(dbname, ".db")) {
ret = cli_loaddb(fs, engine, signo, options, dbio, dbname);
} else if (cli_strbcasestr(dbname, ".cvd")) {
ret = cli_cvdload(fs, engine, signo, options, 0, filename, 0);
} else if (cli_strbcasestr(dbname, ".cld")) {
ret = cli_cvdload(fs, engine, signo, options, 1, filename, 0);
} else if (cli_strbcasestr(dbname, ".cud")) {
ret = cli_cvdload(fs, engine, signo, options, 2, filename, 0);
} else if (cli_strbcasestr(dbname, ".crb")) {
ret = cli_loadcrt(fs, engine, dbio);
} else if (cli_strbcasestr(dbname, ".hdb") || cli_strbcasestr(dbname, ".hsb")) {
ret = cli_loadhash(fs, engine, signo, MD5_HDB, options, dbio, dbname);//进入此函数进行读取
}
......
if (fs)
fclose(fs);
if (engine->cb_sigload_progress) {
/* Let the progress callback function know how we're doing */
(void)engine->cb_sigload_progress(engine->num_total_signatures, *signo, engine->cb_sigload_progress_ctx);
}
return ret;
}
recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load=>cli_loadhash
static int cli_loadhash(FILE *fs, struct cl_engine *engine, unsigned int *signo, unsigned int mode, unsigned int options, struct cli_dbio *dbio, const char *dbname)
{
const char *tokens[MD5_TOKENS + 1];
char buffer[FILEBUFF], *buffer_cpy = NULL;
const char *pt, *virname;
int ret = CL_SUCCESS;
unsigned int size_field = 1, md5_field = 0, line = 0, sigs = 0, tokens_count;
unsigned int req_fl = 0;
struct cli_matcher *db;
unsigned long size;
if (mode == MD5_MDB) {
size_field = 0;
md5_field = 1;
db = engine->hm_mdb;
} else if (mode == MD5_HDB)//类型为MD5_HDB, md5_field 为0, size_field 为1
db = engine->hm_hdb;
else if (mode == MD5_IMP)
db = engine->hm_imp;
else
db = engine->hm_fp;
if (!db) {
if (!(db = MPOOL_CALLOC(engine->mempool, 1, sizeof(*db))))
return CL_EMEM;
#ifdef USE_MPOOL
db->mempool = engine->mempool;
#endif
if (mode == MD5_HDB)
engine->hm_hdb = db;
else if (mode == MD5_MDB)
engine->hm_mdb = db;
else if (mode == MD5_IMP)
engine->hm_imp = db;
else
engine->hm_fp = db;
}
if (engine->ignored)
if (!(buffer_cpy = cli_malloc(FILEBUFF))) {
cli_errmsg("cli_loadhash: Can't allocate memory for buffer_cpy\n");
return CL_EMEM;
}
while (cli_dbgets(buffer, FILEBUFF, fs, dbio)) {//循环读取一行内容来处理
line++;
if (buffer[0] == '#')//注释跳过
continue;
cli_chomp(buffer);
if (engine->ignored)
strcpy(buffer_cpy, buffer);
tokens_count = cli_strtokenize(buffer, ':', MD5_TOKENS + 1, tokens);
if (tokens_count < 3) {//少于3个,报错
ret = CL_EMALFDB;
break;
}
if (tokens_count > MD5_TOKENS - 2) {
req_fl = atoi(tokens[MD5_TOKENS - 2]);
if (tokens_count > MD5_TOKENS) {
ret = CL_EMALFDB;
break;
}
if (cl_retflevel() < req_fl)
continue;
if (tokens_count == MD5_TOKENS) {
int max_fl = atoi(tokens[MD5_TOKENS - 1]);
if (cl_retflevel() > (unsigned int)max_fl)
continue;
}
}
if (strcmp(tokens[size_field], "*")) {//指定了size 为68,进行转换成数值类型
size = strtoul(tokens[size_field], (char **)&pt, 10);
if (*pt || !size || size >= 0xffffffff) {
cli_errmsg("cli_loadhash: Invalid value for the size field\n");
ret = CL_EMALFDB;
break;
}
} else {
size = 0;
// The wildcard feature was added in FLEVEL 73, so for backwards
// compatibility with older clients, ensure that a minimum FLEVEL
// is specified. This check doesn't apply to .imp rules, though,
// since this rule category wasn't introduced until FLEVEL 90, and
// has always supported wildcard usage in rules.
if (mode != MD5_IMP && ((tokens_count < MD5_TOKENS - 1) || (req_fl < 73))) {
cli_errmsg("cli_loadhash: Minimum FLEVEL field must be at least 73 for wildcard size hash signatures."
" For reference, running FLEVEL is %d\n",
cl_retflevel());
ret = CL_EMALFDB;
break;
}
}
pt = tokens[2]; /* virname *///病毒名Eicar-Test-Signature
if (engine->pua_cats && (options & CL_DB_PUA_MODE) && (options & (CL_DB_PUA_INCLUDE | CL_DB_PUA_EXCLUDE)))
if (cli_chkpua(pt, engine->pua_cats, options))
continue;
if (engine->ignored && cli_chkign(engine->ignored, pt, buffer_cpy))
continue;
if (engine->cb_sigload) {
const char *dot = strchr(dbname, '.');
if (!dot)
dot = dbname;
else
dot++;
if (engine->cb_sigload(dot, pt, ~options & CL_DB_OFFICIAL, engine->cb_sigload_ctx)) {
cli_dbgmsg("cli_loadhash: skipping %s (%s) due to callback\n", pt, dot);
continue;
}
}
virname = CLI_MPOOL_VIRNAME(engine->mempool, pt, options & CL_DB_OFFICIAL);
if (!virname) {
ret = CL_EMALFDB;
break;
}
/*md5 hash 44d88612fea8a8f36de82e1278abb02f 转换成实际的hash,小一倍长度,并存入一个关于长度的hash数组(我们的长度为16),后面编译时,对其进行排序,匹配时按二分查找定位*/
if (CL_SUCCESS != (ret = hm_addhash_str(db, tokens[md5_field], size, virname))) {
cli_errmsg("cli_loadhash: Malformed hash string at line %u\n", line);
MPOOL_FREE(engine->mempool, (void *)virname);
break;
}
sigs++;//sig计数加一, 一个sig解析成功,为下面的进度计算提供输入
if (engine->cb_sigload_progress && ((*signo + sigs) % 10000 == 0)) {//sig进度条
/* Let the progress callback function know how we're doing */
(void)engine->cb_sigload_progress(engine->num_total_signatures, *signo + sigs, engine->cb_sigload_progress_ctx);
}
}
if (engine->ignored)
free(buffer_cpy);
if (!line) {
cli_errmsg("cli_loadhash: Empty database file\n");
return CL_EMALFDB;
}
if (ret) {
cli_errmsg("cli_loadhash: Problem parsing database at line %u\n", line);
return ret;
}
if (signo)
*signo += sigs;
return CL_SUCCESS;
}
recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load=>cli_loadhash=>hm_addhash_str
int hm_addhash_str(struct cli_matcher *root, const char *strhash, uint32_t size, const char *virusname)
{
enum CLI_HASH_TYPE type;
char binhash[CLI_HASHLEN_MAX];
int hlen;
if (!root || !strhash) {
cli_errmsg("hm_addhash_str: NULL root or hash\n");
return CL_ENULLARG;
}
/* size 0 here is now a wildcard size match */
if (size == (uint32_t)-1) {
cli_errmsg("hm_addhash_str: null or invalid size (%u)\n", size);
return CL_EARG;
}
hlen = strlen(strhash);
switch (hlen) {
case 32:
type = CLI_HASH_MD5;//此例为这个type
break;
case 40:
type = CLI_HASH_SHA1;
break;
case 64:
type = CLI_HASH_SHA256;
break;
default:
cli_errmsg("hm_addhash_str: invalid hash %s -- FIXME!\n", strhash);
return CL_EARG;
}
if (cli_hex2str_to(strhash, (char *)binhash, hlen)) {//hex转换成str
cli_errmsg("hm_addhash_str: invalid hash %s\n", strhash);
return CL_EARG;
}
return hm_addhash_bin(root, binhash, type, size, virusname);//存入哈希数组
}
recvloop=>reload_db=>reload_th=>cl_load=>cli_loaddbdir=>cli_load=>cli_cvdload=>cli_tgzload=>cli_load=>cli_loadhash=>hm_addhash_str=>hm_addhash_bin
int hm_addhash_bin(struct cli_matcher *root, const void *binhash, enum CLI_HASH_TYPE type, uint32_t size, const char *virusname)
{
const unsigned int hlen = hashlen[type];
const struct cli_htu32_element *item;
struct cli_sz_hash *szh;
struct cli_htu32 *ht;
int i;
if (size) {
/* size non-zero, find sz_hash element in size-driven hashtable hash长度哈希表 */
ht = &root->hm.sizehashes[type];
if (!root->hm.sizehashes[type].capacity) {
i = cli_htu32_init(ht, 64, root->mempool);
if (i) return i;
}
item = cli_htu32_find(ht, size);
if (!item) {
struct cli_htu32_element htitem;
szh = MPOOL_CALLOC(root->mempool, 1, sizeof(*szh));
if (!szh) {
cli_errmsg("hm_addhash_bin: failed to allocate size hash\n");
return CL_EMEM;
}
htitem.key = size;//长度作为key
htitem.data.as_ptr = szh;
i = cli_htu32_insert(ht, &htitem, root->mempool);
if (i) {
cli_errmsg("hm_addhash_bin: failed to add item to hashtab");
MPOOL_FREE(root->mempool, szh);
return i;
}
} else
szh = (struct cli_sz_hash *)item->data.as_ptr;
} else {
/* size 0 = wildcard */
szh = &root->hwild.hashes[type];
}
szh->items++;//增加key对应value的元素个数,好重新分配内存
szh->hash_array = MPOOL_REALLOC2(root->mempool, szh->hash_array, hlen * szh->items);
if (!szh->hash_array) {
cli_errmsg("hm_addhash_bin: failed to grow hash array to %u entries\n", szh->items);
szh->items = 0;
MPOOL_FREE(root->mempool, szh->virusnames);
szh->virusnames = NULL;
return CL_EMEM;
}
szh->virusnames = MPOOL_REALLOC2(root->mempool, szh->virusnames, sizeof(*szh->virusnames) * szh->items);//分配内存存入对应的病毒名称
if (!szh->virusnames) {
cli_errmsg("hm_addhash_bin: failed to grow virusname array to %u entries\n", szh->items);
szh->items = 0;
MPOOL_FREE(root->mempool, szh->hash_array);
szh->hash_array = NULL;
return CL_EMEM;
}
memcpy(&szh->hash_array[(szh->items - 1) * hlen], binhash, hlen);//存入病毒的哈希
szh->virusnames[(szh->items - 1)] = virusname;//存入病毒名称
return 0;
}
接着就是循环调用读取文件中的sig然后解析,添加到这个哈希数组中。
我们加载完病毒库后,需要对其进行编译。
recvloop=>reload_db=>reload_th=>cl_engine_compile
cl_error_t cl_engine_compile(struct cl_engine *engine)
{
......
if (engine->hm_hdb)
hm_flush(engine->hm_hdb);//编译哈希数组
TASK_COMPLETE();
.....
engine->dboptions |= CL_DB_COMPILED;
return CL_SUCCESS;
}
recvloop=>reload_db=>reload_th=>cl_engine_compile
/* flush both size-specific and agnostic hash sets */
void hm_flush(struct cli_matcher *root)
{
enum CLI_HASH_TYPE type;
unsigned int keylen;
struct cli_sz_hash *szh;
if (!root)
return;
for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
struct cli_htu32 *ht = &root->hm.sizehashes[type];
const struct cli_htu32_element *item = NULL;
szh = NULL;
if (!root->hm.sizehashes[type].capacity)
continue;
while ((item = cli_htu32_next(ht, item))) {
szh = (struct cli_sz_hash *)item->data.as_ptr;
keylen = hashlen[type];
if (szh->items > 1)
hm_sort(szh, 0, szh->items, keylen);//排序
}
}
for (type = CLI_HASH_MD5; type < CLI_HASH_AVAIL_TYPES; type++) {
szh = &root->hwild.hashes[type];
keylen = hashlen[type];
if (szh->items > 1)
hm_sort(szh, 0, szh->items, keylen);
}
}
recvloop=>reload_db=>reload_th=>cl_engine_compile=>hm_sort
/*md5的长度都是固定的, 所以每个元素的长度也是固定,然后对其进行排序*/
static void hm_sort(struct cli_sz_hash *szh, size_t l, size_t r, unsigned int keylen)
{
uint8_t piv[CLI_HASHLEN_MAX], tmph[CLI_HASHLEN_MAX];
size_t l1, r1;
const char *tmpv;
if (l + 1 >= r)
return;
l1 = l + 1, r1 = r;
memcpy(piv, &szh->hash_array[keylen * l], keylen);
while (l1 < r1) {
if (hm_cmp(&szh->hash_array[keylen * l1], piv, keylen) > 0) {
r1--;
if (l1 == r1) break;
memcpy(tmph, &szh->hash_array[keylen * l1], keylen);
tmpv = szh->virusnames[l1];
memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * r1], keylen);
szh->virusnames[l1] = szh->virusnames[r1];
memcpy(&szh->hash_array[keylen * r1], tmph, keylen);
szh->virusnames[r1] = tmpv;
} else
l1++;
}
l1--;
if (l1 != l) {
memcpy(tmph, &szh->hash_array[keylen * l1], keylen);
tmpv = szh->virusnames[l1];
memcpy(&szh->hash_array[keylen * l1], &szh->hash_array[keylen * l], keylen);
szh->virusnames[l1] = szh->virusnames[l];
memcpy(&szh->hash_array[keylen * l], tmph, keylen);
szh->virusnames[l] = tmpv;
}
hm_sort(szh, l, l1, keylen);
hm_sort(szh, r1, r, keylen);
}
到此,整个病毒库的加载就算是结束了。