摸了两天鱼。。后面保证一天一到两更争取这周把AFLNET基本看完。
接下来看看函数。
- void setup_ipsm()
- void destroy_ipsm()
- u32 get_state_index(u32 state_id)
- void expand_was_fuzzed_map(u32 new_states, u32 new_qentries)
- u32 get_unique_state_count(unsigned int *state_sequence, unsigned int state_count)
- u8 is_state_sequence_interesting(unsigned int *state_sequence, unsigned int state_count)
- void update_region_annotations(struct queue_entry* q)
- u8* choose_source_region(u32 *out_len)
- void update_fuzzs()
- u32 update_scores_and_select_next_state(u8 mode)
- unsigned int choose_target_state(u8 mode)
- struct queue_entry *choose_seed(u32 target_state_id, u8 mode)
- void update_state_aware_variables(struct queue_entry *q, u8 dry_run)
- int send_over_network()
void setup_ipsm()
/* Initialize the implemented state machine as a graphviz graph */
void setup_ipsm()
{
ipsm = agopen("g", Agdirected, 0);
agattr(ipsm, AGNODE, "color", "black"); //Default node colr is black
agattr(ipsm, AGEDGE, "color", "black"); //Default edge color is black
khs_ipsm_paths = kh_init(hs32);
khms_states = kh_init(hms);
}
这里先对之前变量分析的时候这几个变量做一点解释
/* Hash table/map and list */
klist_t(lms) *kl_messages;
khash_t(hs32) *khs_ipsm_paths;
khash_t(hms) *khms_states;
由于C语言没有list、hash map的数据结构,所以导入了klist.h和khash.h来使用list和hash map,保存消息序列的变量kl_messages使用了list,记录状态序列路径的变量和状态集合用hash table。
这里就是个初始化过程,首先使用graphviz的函数打开一个图文件,之后创建了初始的状态,然后初始化khs_ipsm_paths和khms_states这两个变量。关于graphviz后面我去学习学习再总结一下。
void destroy_ipsm()
/* Free memory allocated to state-machine variables */
void destroy_ipsm()
{
agclose(ipsm);
kh_destroy(hs32, khs_ipsm_paths);
state_info_t *state;
kh_foreach_value(khms_states, state, {ck_free(state->seeds); ck_free(state);});
kh_destroy(hms, khms_states);
ck_free(state_ids);
}
关闭ipsm,销毁ipsm_paths,遍历释放khms_states中每个state的空间,(释放空间均使用ck_free()),之后删除khms_states,最后释放state_ids。
u32 get_state_index(u32 state_id)
/* Get state index in the state IDs list, given a state ID */
u32 get_state_index(u32 state_id) {
u32 index = 0;
for (index = 0; index < state_ids_count; index++) {
if (state_ids[index] == state_id) break;
}
return index;
}
这个比较简单没有什么好说的就是获取指定state_id在列表中的位置。
void expand_was_fuzzed_map(u32 new_states, u32 new_qentries)
/* Expand the size of the map when a new seed or a new state has been discovered */
void expand_was_fuzzed_map(u32 new_states, u32 new_qentries) {
int i, j;
//Realloc the memory
was_fuzzed_map = (char **)ck_realloc(was_fuzzed_map, (fuzzed_map_states + new_states) * sizeof(char *));
for (i = 0; i < fuzzed_map_states + new_states; i++)
was_fuzzed_map[i] = (char *)ck_realloc(was_fuzzed_map[i], (fuzzed_map_qentries + new_qentries) * sizeof(char));
//All new cells are marked as -1 -- meaning UNREACHABLE
//Keep other cells untouched
for (i = 0; i < fuzzed_map_states + new_states; i++)
for (j = 0; j < fuzzed_map_qentries + new_qentries; j++)
if ((i >= fuzzed_map_states) || (j >= fuzzed_map_qentries)) was_fuzzed_map[i][j] = -1;
//Update total number of states (rows) and total number of queue entries (columns) in the was_fuzzed_map
fuzzed_map_states += new_states;
fuzzed_map_qentries += new_qentries;
}
先看一下这几个变量的定义
char **was_fuzzed_map = NULL; /* A 2D array keeping state-specific was_fuzzed information */
u32 fuzzed_map_states = 0;
u32 fuzzed_map_qentries = 0;
这里好像没有深究细节的必要,只需要知道在发现了新种子或者新状态时使用该函数进行更新即可。
u32 get_unique_state_count(unsigned int *state_sequence, unsigned int state_count)
/* Get unique state count, given a state sequence */
u32 get_unique_state_count(unsigned int *state_sequence, unsigned int state_count) {
//A hash set is used so that no state is counted twice
khash_t(hs32) *khs_state_ids;
khs_state_ids = kh_init(hs32);
unsigned int discard, state_id, i;
u32 result = 0;
for (i = 0; i < state_count; i++) {
state_id = state_sequence[i];
if (kh_get(hs32, khs_state_ids, state_id) != kh_end(khs_state_ids)) {
continue;
} else {
kh_put(hs32, khs_state_ids, state_id, &discard);
result++;
}
}
kh_destroy(hs32, khs_state_ids);
return result;
}
输入一个状态序列,返回不同状态的总数。
如注释所说,使用了hash map来保证每一个状态不会被计算两次。
u8 is_state_sequence_interesting(unsigned int *state_sequence, unsigned int state_count)
/* Check if a state sequence is interesting (e.g., new state is discovered). Loop is taken into account */
u8 is_state_sequence_interesting(unsigned int *state_sequence, unsigned int state_count) {
//limit the loop count to only 1
u32 *trimmed_state_sequence = NULL;
u32 i, count = 0;
for (i=0; i < state_count; i++) {
if ((i >= 2) && (state_sequence[i] == state_sequence[i - 1]) && (state_sequence[i] == state_sequence[i - 2])) continue;
count++;
trimmed_state_sequence = (u32 *)realloc(trimmed_state_sequence, count * sizeof(unsigned int));
trimmed_state_sequence[count - 1] = state_sequence[i];
}
//Calculate the hash based on the shortened state sequence
u32 hashKey = hash32(trimmed_state_sequence, count * sizeof(unsigned int), 0);
if (trimmed_state_sequence) free(trimmed_state_sequence);
if (kh_get(hs32, khs_ipsm_paths, hashKey) != kh_end(khs_ipsm_paths)) {
return 0;
} else {
int dummy;
kh_put(hs32, khs_ipsm_paths, hashKey, &dummy);
return 1;
}
}
这里是判断状态序列是否有趣,判断标准是是否产生了新的状态是否产生了新的执行路径,首先将状态序列修剪(省去三个连续相同状态的情况)再计算修剪后的状态序列的hash值,判断是否在khs_ipsm_paths出现过来决定是否有趣。
void update_region_annotations(struct queue_entry* q)
/* Update the annotations of regions (i.e., state sequence received from the server) */
void update_region_annotations(struct queue_entry* q)
{
u32 i = 0;
for (i = 0; i < messages_sent; i++) {
if ((response_bytes[i] == 0) || ( i > 0 && (response_bytes[i] - response_bytes[i - 1] == 0))) {
q->regions[i].state_sequence = NULL;
q->regions[i].state_count = 0;
} else {
unsigned int state_count;
q->regions[i].state_sequence = (*extract_response_codes)(response_buf, response_bytes[i], &state_count);
q->regions[i].state_count = state_count;
}
}
}
全局变量
u32 messages_sent = 0;
通过extract_response_codes获取每条消息发送完成后从第一条响应报文到这条消息的响应报文的响应码序列(这里就看作了状态序列),并设置regions[i]中的状态序列和状态数变量。
u8* choose_source_region(u32 *out_len)
/* Choose a region data for region-level mutations */
u8* choose_source_region(u32 *out_len) {
u8 *out = NULL;
*out_len = 0;
struct queue_entry *q = queue;
//randomly select a seed
u32 index = UR(queued_paths);
while (index != 0) {
q = q->next;
index--;
}
//randomly select a region in the selected seed
if (q->region_count) {
u32 reg_index = UR(q->region_count);
u32 len = q->regions[reg_index].end_byte - q->regions[reg_index].start_byte + 1;
if (len <= MAX_FILE) {
out = (u8 *)ck_alloc(len);
if (out == NULL) PFATAL("Unable allocate a memory region to store a region");
*out_len = len;
//Read region data into memory. */
FILE *fp = fopen(q->fname, "rb");
fseek(fp, q->regions[reg_index].start_byte, SEEK_CUR);
fread(out, 1, len, fp);
fclose(fp);
}
}
return out;
}
选择一个部分的数据进行变异
void update_fuzzs()
/* Update #fuzzs visiting a specific state */
void update_fuzzs() {
unsigned int state_count, i, discard;
unsigned int *state_sequence = (*extract_response_codes)(response_buf, response_buf_size, &state_count);
//A hash set is used so that the #paths is not updated more than once for one specific state
khash_t(hs32) *khs_state_ids;
khint_t k;
khs_state_ids = kh_init(hs32);
for(i = 0; i < state_count; i++) {
unsigned int state_id = state_sequence[i];
if (kh_get(hs32, khs_state_ids, state_id) != kh_end(khs_state_ids)) {
continue;
} else {
kh_put(hs32, khs_state_ids, state_id, &discard);
k = kh_get(hms, khms_states, state_id);
if (k != kh_end(khms_states)) {
kh_val(khms_states, k)->fuzzs++;
}
}
}
ck_free(state_sequence);
kh_destroy(hs32, khs_state_ids);
}
这里先去重点看了下Klib中对Khash的用法,
kh_get(hs32, khs_state_ids, state_id) != kh_end(khs_state_ids)
这里应该是如果state_id对应的key已经存在则continue否则插入这个key并进行下面的操作。通过khs_state_ids这个哈希表来确保每个状态只被更新一次。
u32 update_scores_and_select_next_state(u8 mode)
/* Calculate state scores and select the next state */
u32 update_scores_and_select_next_state(u8 mode) {
u32 result = 0, i;
if (state_ids_count == 0) return 0;
u32 *state_scores = NULL;
state_scores = (u32 *)ck_alloc(state_ids_count * sizeof(u32));
if (!state_scores) PFATAL("Cannot allocate memory for state_scores");
khint_t k;
state_info_t *state;
//Update the states' score
for(i = 0; i < state_ids_count; i++) {
u32 state_id = state_ids[i];
k = kh_get(hms, khms_states, state_id);
if (k != kh_end(khms_states)) {
state = kh_val(khms_states, k);
switch(mode) {
case FAVOR:
state->score = ceil(1000 * pow(2, -log10(log10(state->fuzzs + 1) * state->selected_times + 1)) * pow(2, log(state->paths_discovered + 1)));
break;
//other cases are reserved
}
if (i == 0) {
state_scores[i] = state->score;
} else {
state_scores[i] = state_scores[i-1] + state->score;
}
}
}
u32 randV = UR(state_scores[state_ids_count - 1]);
u32 idx = index_search(state_scores, state_ids_count, randV);
result = state_ids[idx];
if (state_scores) ck_free(state_scores);
return result;
}
这是计算状态得分并且选取下一个状态的函数
u32 state_id = state_ids[i];
k = kh_get(hms, khms_states, state_id);
if (k != kh_end(khms_states)) {
state = kh_val(khms_states, k);
取出一个state
计算单个state的分数,之后按照代码描述装填state_scores数组
调用UR函数生成一个小于等于scores总和的随机数并调用index_search返回大于这个随机数的第一个state_scores的index将其作为下一个值得测试的状态返回
这里附上用到的UR函数和index_search函数
/* Return the index of the "region" containing a given value */
u32 index_search(u32 *A, u32 n, u32 val) {
u32 index = 0;
for(index = 0; index < n; index++) {
if (val <= A[index]) break;
}
return index;
}
/* Generate a random number (from 0 to limit - 1). This may
have slight bias. */
static inline u32 UR(u32 limit) {
if (unlikely(!rand_cnt--)) {
u32 seed[2];
ck_read(dev_urandom_fd, &seed, sizeof(seed), "/dev/urandom");
srandom(seed[0]);
rand_cnt = (RESEED_RNG / 2) + (seed[1] % RESEED_RNG);
}
return random() % limit;
}
这个选取策略感觉奇奇怪怪的,有空琢磨琢磨看看
unsigned int choose_target_state(u8 mode)
/* Select a target state at which we do state-aware fuzzing */
unsigned int choose_target_state(u8 mode) {
u32 result = 0;
switch (mode) {
case RANDOM_SELECTION: //Random state selection
selected_state_index = UR(state_ids_count);
result = state_ids[selected_state_index];
break;
case ROUND_ROBIN: //Round-robin state selection
result = state_ids[selected_state_index];
selected_state_index++;
if (selected_state_index == state_ids_count) selected_state_index = 0;
break;
case FAVOR:
/* Do ROUND_ROBIN for a few cycles to get enough statistical information*/
if (state_cycles < 5) {
result = state_ids[selected_state_index];
selected_state_index++;
if (selected_state_index == state_ids_count) {
selected_state_index = 0;
state_cycles++;
}
break;
}
result = update_scores_and_select_next_state(FAVOR);
break;
default:
break;
}
return result;
}
这个应该是重点,上面那个还没有分析啥时候会被使用到,但是这个明确说是在state-aware 状态下选取目标状态的函数。
选取策略有三种,第一种是随机(RANDOM_SELECTION)随机选一个index返回;第二种是轮询(ROUND_ROBIN)一个一个挨着选直到全部选完从头开始继续选;最后一种是偏好模式(FAVOR)在5次循环之前都先执行轮询的方式以积攒数据,之后调用上面分析的update_scores_and_select_next_state()函数进行选取。
struct queue_entry *choose_seed(u32 target_state_id, u8 mode)
/* Select a seed to exercise the target state */
struct queue_entry *choose_seed(u32 target_state_id, u8 mode)
{
khint_t k;
state_info_t *state;
struct queue_entry *result = NULL;
k = kh_get(hms, khms_states, target_state_id);
if (k != kh_end(khms_states)) {
state = kh_val(khms_states, k);
if (state->seeds_count == 0) return NULL;
switch (mode) {
case RANDOM_SELECTION: //Random seed selection
state->selected_seed_index = UR(state->seeds_count);
result = state->seeds[state->selected_seed_index];
break;
case ROUND_ROBIN: //Round-robin seed selection
result = state->seeds[state->selected_seed_index];
state->selected_seed_index++;
if (state->selected_seed_index == state->seeds_count) state->selected_seed_index = 0;
break;
case FAVOR:
if (state->seeds_count > 10) {
//Do seed selection similar to AFL + take into account state-aware information
//e.g., was_fuzzed information becomes state-aware
u32 passed_cycles = 0;
while (passed_cycles < 5) {
result = state->seeds[state->selected_seed_index];
if (state->selected_seed_index + 1 == state->seeds_count) {
state->selected_seed_index = 0;
passed_cycles++;
} else state->selected_seed_index++;
//Skip this seed with high probability if it is neither an initial seed nor a seed generated while the
//current target_state_id was targeted
if (result->generating_state_id != target_state_id && !result->is_initial_seed && UR(100) < 90) continue;
u32 target_state_index = get_state_index(target_state_id);
if (pending_favored) {
/* If we have any favored, non-fuzzed new arrivals in the queue,
possibly skip to them at the expense of already-fuzzed or non-favored
cases. */
if (((was_fuzzed_map[target_state_index][result->index] == 1) || !result->favored) && UR(100) < SKIP_TO_NEW_PROB) continue;
/* Otherwise, this seed is selected */
break;
} else if (!result->favored && queued_paths > 10) {
/* Otherwise, still possibly skip non-favored cases, albeit less often.
The odds of skipping stuff are higher for already-fuzzed inputs and
lower for never-fuzzed entries. */
if (queue_cycle > 1 && (was_fuzzed_map[target_state_index][result->index] == 0)) {
if (UR(100) < SKIP_NFAV_NEW_PROB) continue;
} else {
if (UR(100) < SKIP_NFAV_OLD_PROB) continue;
}
/* Otherwise, this seed is selected */
break;
}
}
} else {
//Do Round-robin if seeds_count of the selected state is small
result = state->seeds[state->selected_seed_index];
state->selected_seed_index++;
if (state->selected_seed_index == state->seeds_count) state->selected_seed_index = 0;
}
break;
default:
break;
}
} else {
PFATAL("AFLNet - the states hashtable has no entries for state %d", target_state_id);
}
return result;
}
这个函数也是重点,作用是在选出目标状态后挑选一个测试用的seed。
挑选策略同样有三种,随机、轮询和偏好。
重点看一下偏好模式。
case FAVOR:
if (state->seeds_count > 10) {
//Do seed selection similar to AFL + take into account state-aware information
//e.g., was_fuzzed information becomes state-aware
u32 passed_cycles = 0;
while (passed_cycles < 5) {
result = state->seeds[state->selected_seed_index];
if (state->selected_seed_index + 1 == state->seeds_count) {
state->selected_seed_index = 0;
passed_cycles++;
} else state->selected_seed_index++;
//Skip this seed with high probability if it is neither an initial seed nor a seed generated while the
//current target_state_id was targeted
if (result->generating_state_id != target_state_id && !result->is_initial_seed && UR(100) < 90) continue;
u32 target_state_index = get_state_index(target_state_id);
if (pending_favored) {
/* If we have any favored, non-fuzzed new arrivals in the queue,
possibly skip to them at the expense of already-fuzzed or non-favored
cases. */
if (((was_fuzzed_map[target_state_index][result->index] == 1) || !result->favored) && UR(100) < SKIP_TO_NEW_PROB) continue;
/* Otherwise, this seed is selected */
break;
} else if (!result->favored && queued_paths > 10) {
/* Otherwise, still possibly skip non-favored cases, albeit less often.
The odds of skipping stuff are higher for already-fuzzed inputs and
lower for never-fuzzed entries. */
if (queue_cycle > 1 && (was_fuzzed_map[target_state_index][result->index] == 0)) {
if (UR(100) < SKIP_NFAV_NEW_PROB) continue;
} else {
if (UR(100) < SKIP_NFAV_OLD_PROB) continue;
}
/* Otherwise, this seed is selected */
break;
}
}
} else {
//Do Round-robin if seeds_count of the selected state is small
result = state->seeds[state->selected_seed_index];
state->selected_seed_index++;
if (state->selected_seed_index == state->seeds_count) state->selected_seed_index = 0;
}
如果该状态的种子数较少(<10)则按照轮询方式处理。
当种子数>10时,按照注释所说,在与AFL同样的种子挑选策略基础上考虑状态机指导。如果该种子既不是初始种子,也不是当前target_state_id作为目标时生成的种子,则以一个大的概率跳过该种子。如果存在被偏好的、没有被fuzz过的新到达的种子,则直接对它们进行测试。
void update_state_aware_variables(struct queue_entry *q, u8 dry_run)
/* Update state-aware variables */
void update_state_aware_variables(struct queue_entry *q, u8 dry_run)
{
khint_t k;
int discard, i;
state_info_t *state;
unsigned int state_count;
if (!response_buf_size || !response_bytes) return;
unsigned int *state_sequence = (*extract_response_codes)(response_buf, response_buf_size, &state_count);
q->unique_state_count = get_unique_state_count(state_sequence, state_count);
if (is_state_sequence_interesting(state_sequence, state_count)) {
//Save the current kl_messages to a file which can be used to replay the newly discovered paths on the ipsm
u8 *temp_str = state_sequence_to_string(state_sequence, state_count);
u8 *fname = alloc_printf("%s/replayable-new-ipsm-paths/id:%s:%s", out_dir, temp_str, dry_run ? basename(q->fname) : "new");
save_kl_messages_to_file(kl_messages, fname, 1, messages_sent);
ck_free(temp_str);
ck_free(fname);
//Update the IPSM graph
if (state_count > 1) {
unsigned int prevStateID = state_sequence[0];
for(i=1; i < state_count; i++) {
unsigned int curStateID = state_sequence[i];
char fromState[STATE_STR_LEN], toState[STATE_STR_LEN];
snprintf(fromState, STATE_STR_LEN, "%d", prevStateID);
snprintf(toState, STATE_STR_LEN, "%d", curStateID);
//Check if the prevStateID and curStateID have been added to the state machine as vertices
//Check also if the edge prevStateID->curStateID has been added
Agnode_t *from, *to;
Agedge_t *edge;
from = agnode(ipsm, fromState, FALSE);
if (!from) {
//Add a node to the graph
from = agnode(ipsm, fromState, TRUE);
if (dry_run) agset(from,"color","blue");
else agset(from,"color","red");
//Insert this newly discovered state into the states hashtable
state_info_t *newState_From = (state_info_t *) ck_alloc (sizeof(state_info_t));
newState_From->id = prevStateID;
newState_From->is_covered = 1;
newState_From->paths = 0;
newState_From->paths_discovered = 0;
newState_From->selected_times = 0;
newState_From->fuzzs = 0;
newState_From->score = 1;
newState_From->selected_seed_index = 0;
newState_From->seeds = NULL;
newState_From->seeds_count = 0;
k = kh_put(hms, khms_states, prevStateID, &discard);
kh_value(khms_states, k) = newState_From;
//Insert this into the state_ids array too
state_ids = (u32 *) ck_realloc(state_ids, (state_ids_count + 1) * sizeof(u32));
state_ids[state_ids_count++] = prevStateID;
if (prevStateID != 0) expand_was_fuzzed_map(1, 0);
}
to = agnode(ipsm, toState, FALSE);
if (!to) {
//Add a node to the graph
to = agnode(ipsm, toState, TRUE);
if (dry_run) agset(to,"color","blue");
else agset(to,"color","red");
//Insert this newly discovered state into the states hashtable
state_info_t *newState_To = (state_info_t *) ck_alloc (sizeof(state_info_t));
newState_To->id = curStateID;
newState_To->is_covered = 1;
newState_To->paths = 0;
newState_To->paths_discovered = 0;
newState_To->selected_times = 0;
newState_To->fuzzs = 0;
newState_To->score = 1;
newState_To->selected_seed_index = 0;
newState_To->seeds = NULL;
newState_To->seeds_count = 0;
k = kh_put(hms, khms_states, curStateID, &discard);
kh_value(khms_states, k) = newState_To;
//Insert this into the state_ids array too
state_ids = (u32 *) ck_realloc(state_ids, (state_ids_count + 1) * sizeof(u32));
state_ids[state_ids_count++] = curStateID;
if (curStateID != 0) expand_was_fuzzed_map(1, 0);
}
//Check if an edge from->to exists
edge = agedge(ipsm, from, to, NULL, FALSE);
if (!edge) {
//Add an edge to the graph
edge = agedge(ipsm, from, to, "new_edge", TRUE);
if (dry_run) agset(edge, "color", "blue");
else agset(edge, "color", "red");
}
//Update prevStateID
prevStateID = curStateID;
}
}
//Update the dot file
s32 fd;
u8* tmp;
tmp = alloc_printf("%s/ipsm.dot", out_dir);
fd = open(tmp, O_WRONLY | O_CREAT, 0600);
if (fd < 0) {
PFATAL("Unable to create %s", tmp);
} else {
ipsm_dot_file = fdopen(fd, "w");
agwrite(ipsm, ipsm_dot_file);
close(fileno(ipsm_dot_file));
ck_free(tmp);
}
}
//Update others no matter the new seed leads to interesting state sequence or not
//Annotate the regions
update_region_annotations(q);
//Update the states hashtable to keep the list of seeds which help us to reach a specific state
//Iterate over the regions & their annotated state (sub)sequences and update the hashtable accordingly
//All seed should "reach" state 0 (initial state) so we add this one to the map first
k = kh_get(hms, khms_states, 0);
if (k != kh_end(khms_states)) {
state = kh_val(khms_states, k);
state->seeds = (void **) ck_realloc (state->seeds, (state->seeds_count + 1) * sizeof(void *));
state->seeds[state->seeds_count] = (void *)q;
state->seeds_count++;
was_fuzzed_map[0][q->index] = 0; //Mark it as reachable but not fuzzed
} else {
PFATAL("AFLNet - the states hashtable should always contain an entry of the initial state");
}
//Now update other states
for(i = 0; i < q->region_count; i++) {
unsigned int regional_state_count = q->regions[i].state_count;
if (regional_state_count > 0) {
//reachable_state_id is the last ID in the state_sequence
unsigned int reachable_state_id = q->regions[i].state_sequence[regional_state_count - 1];
k = kh_get(hms, khms_states, reachable_state_id);
if (k != kh_end(khms_states)) {
state = kh_val(khms_states, k);
state->seeds = (void **) ck_realloc (state->seeds, (state->seeds_count + 1) * sizeof(void *));
state->seeds[state->seeds_count] = (void *)q;
state->seeds_count++;
} else {
//XXX. This branch is supposed to be not reachable
//However, due to some undeterminism, new state could be seen during regions' annotating process
//even though the state was not observed before
//To completely fix this, we should fix all causes leading to potential undeterminism
//For now, we just add the state into the hashtable
state_info_t *newState = (state_info_t *) ck_alloc (sizeof(state_info_t));
newState->id = reachable_state_id;
newState->is_covered = 1;
newState->paths = 0;
newState->paths_discovered = 0;
newState->selected_times = 0;
newState->fuzzs = 0;
newState->score = 1;
newState->selected_seed_index = 0;
newState->seeds = NULL;
newState->seeds = (void **) ck_realloc (newState->seeds, sizeof(void *));
newState->seeds[0] = (void *)q;
newState->seeds_count = 1;
k = kh_put(hms, khms_states, reachable_state_id, &discard);
kh_value(khms_states, k) = newState;
//Insert this into the state_ids array too
state_ids = (u32 *) ck_realloc(state_ids, (state_ids_count + 1) * sizeof(u32));
state_ids[state_ids_count++] = reachable_state_id;
if (reachable_state_id != 0) expand_was_fuzzed_map(1, 0);
}
was_fuzzed_map[get_state_index(reachable_state_id)][q->index] = 0; //Mark it as reachable but not fuzzed
}
}
//Update the number of paths which have traversed a specific state
//It can be used for calculating fuzzing energy
//A hash set is used so that the #paths is not updated more than once for one specific state
khash_t(hs32) *khs_state_ids;
khs_state_ids = kh_init(hs32);
for(i = 0; i < state_count; i++) {
unsigned int state_id = state_sequence[i];
if (kh_get(hs32, khs_state_ids, state_id) != kh_end(khs_state_ids)) {
continue;
} else {
kh_put(hs32, khs_state_ids, state_id, &discard);
k = kh_get(hms, khms_states, state_id);
if (k != kh_end(khms_states)) {
kh_val(khms_states, k)->paths++;
}
}
}
kh_destroy(hs32, khs_state_ids);
//Update paths_discovered
if (!dry_run) {
k = kh_get(hms, khms_states, target_state_id);
if (k != kh_end(khms_states)) {
kh_val(khms_states, k)->paths_discovered++;
}
}
//Free state sequence
if (state_sequence) ck_free(state_sequence);
}
在判断完接收响应包的buffer不为空之后,解析state_sequence然后判断是否是有趣的序列,如果是怎先保存该消息序列,更新IPSM图。这部分由于涉及到graphviz的使用,需要先了解一下graphviz提供的C语言接口。不过这里使用的并不是很复杂,就把整个过程简单写一下。
/* Initialize the implemented state machine as a graphviz graph */
void setup_ipsm()
{
ipsm = agopen("g", Agdirected, 0);
agattr(ipsm, AGNODE, "color", "black"); //Default node colr is black
agattr(ipsm, AGEDGE, "color", "black"); //Default edge color is black
khs_ipsm_paths = kh_init(hs32);
khms_states = kh_init(hms);
}
/* Free memory allocated to state-machine variables */
void destroy_ipsm()
{
agclose(ipsm);
kh_destroy(hs32, khs_ipsm_paths);
state_info_t *state;
kh_foreach_value(khms_states, state, {ck_free(state->seeds); ck_free(state);});
kh_destroy(hms, khms_states);
ck_free(state_ids);
}
运行时,main函数通过调用setup_ipsm()函数创建一个名为ipsm的graph。
在更新的时候,先判断from节点是否存在,如果不存在则创建,之后创建新状态的状态信息并将其插入状态哈希表和状态id列表中。之后将该新状态加入graph中。之后插入新的边。
对于所有种子,即使是不被认为是有趣的种子也需要进行更新。
由于所有种子都是从初始状态开始进行测试,所有首先要更新初始状态的信息,再依次更新其他状态的信息。之后更新被穿过状态的路径数量信息和被发现的路径信息。
然后来看看AFLNET最后一个特殊函数
int send_over_network()
/* Send (mutated) messages in order to the server under test */
int send_over_network()
{
int n;
u8 likely_buggy = 0;
struct sockaddr_in serv_addr;
struct sockaddr_in local_serv_addr;
//Clean up the server if needed
if (cleanup_script) system(cleanup_script);
//Wait a bit for the server initialization
usleep(server_wait_usecs);
//Clear the response buffer and reset the response buffer size
if (response_buf) {
ck_free(response_buf);
response_buf = NULL;
response_buf_size = 0;
}
if (response_bytes) {
ck_free(response_bytes);
response_bytes = NULL;
}
//Create a TCP/UDP socket
int sockfd = -1;
if (net_protocol == PRO_TCP)
sockfd = socket(AF_INET, SOCK_STREAM, 0);
else if (net_protocol == PRO_UDP)
sockfd = socket(AF_INET, SOCK_DGRAM, 0);
if (sockfd < 0) {
PFATAL("Cannot create a socket");
}
//Set timeout for socket data sending/receiving -- otherwise it causes a big delay
//if the server is still alive after processing all the requests
struct timeval timeout;
timeout.tv_sec = 0;
timeout.tv_usec = socket_timeout_usecs;
setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, sizeof(timeout));
memset(&serv_addr, '0', sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(net_port);
serv_addr.sin_addr.s_addr = inet_addr(net_ip);
//This piece of code is only used for targets that send responses to a specific port number
//The Kamailio SIP server is an example. After running this code, the intialized sockfd
//will be bound to the given local port
if(local_port > 0) {
local_serv_addr.sin_family = AF_INET;
local_serv_addr.sin_addr.s_addr = INADDR_ANY;
local_serv_addr.sin_port = htons(local_port);
local_serv_addr.sin_addr.s_addr = inet_addr("127.0.0.1");
if (bind(sockfd, (struct sockaddr*) &local_serv_addr, sizeof(struct sockaddr_in))) {
FATAL("Unable to bind socket on local source port");
}
}
if(connect(sockfd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) {
//If it cannot connect to the server under test
//try it again as the server initial startup time is varied
for (n=0; n < 1000; n++) {
if (connect(sockfd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) == 0) break;
usleep(1000);
}
if (n== 1000) {
close(sockfd);
return 1;
}
}
//retrieve early server response if needed
if (net_recv(sockfd, timeout, poll_wait_msecs, &response_buf, &response_buf_size)) goto HANDLE_RESPONSES;
//write the request messages
kliter_t(lms) *it;
messages_sent = 0;
for (it = kl_begin(kl_messages); it != kl_end(kl_messages); it = kl_next(it)) {
n = net_send(sockfd, timeout, kl_val(it)->mdata, kl_val(it)->msize);
messages_sent++;
//Allocate memory to store new accumulated response buffer size
response_bytes = (u32 *) ck_realloc(response_bytes, messages_sent * sizeof(u32));
//Jump out if something wrong leading to incomplete message sent
if (n != kl_val(it)->msize) {
goto HANDLE_RESPONSES;
}
//retrieve server response
u32 prev_buf_size = response_buf_size;
if (net_recv(sockfd, timeout, poll_wait_msecs, &response_buf, &response_buf_size)) {
goto HANDLE_RESPONSES;
}
//Update accumulated response buffer size
response_bytes[messages_sent - 1] = response_buf_size;
//set likely_buggy flag if AFLNet does not receive any feedback from the server
//it could be a signal of a potentiall server crash, like the case of CVE-2019-7314
if (prev_buf_size == response_buf_size) likely_buggy = 1;
else likely_buggy = 0;
}
HANDLE_RESPONSES:
net_recv(sockfd, timeout, poll_wait_msecs, &response_buf, &response_buf_size);
if (messages_sent > 0 && response_bytes != NULL) {
response_bytes[messages_sent - 1] = response_buf_size;
}
//wait a bit letting the server to complete its remaining task(s)
memset(session_virgin_bits, 255, MAP_SIZE);
while(1) {
if (has_new_bits(session_virgin_bits) != 2) break;
}
close(sockfd);
if (likely_buggy && false_negative_reduction) return 0;
if (terminate_child && (child_pid > 0)) kill(child_pid, SIGTERM);
//give the server a bit more time to gracefully terminate
while(1) {
int status = kill(child_pid, 0);
if ((status != 0) && (errno == ESRCH)) break;
}
return 0;
}
过程不复杂,首先必要时清空服务器,然后等待一段时间待服务器初始化完毕。之后清空响应报文buffer并重置buffer的大小。然后重新创建一个TCP/UDP的socket,之后配置基本参数并设置数据发送接收的延迟时间。
之后进行数据发送,每发送一条消息申请内存用于存储返回的响应报文,如果出现不完整消息发送或者未接收到返回报文则跳到HANDLE_RESPONSES部分处理,之后更新buffer的大小。如果接收到了报文但是报文为空
//set likely_buggy flag if AFLNet does not receive any feedback from the server
//it could be a signal of a potentiall server crash, like the case of CVE-2019-7314
if (prev_buf_size == response_buf_size) likely_buggy = 1;
else likely_buggy = 0;
则认为可能服务器出现了crash,设置一个likely_buggy标志。
HANDLE_RESPONSES:
net_recv(sockfd, timeout, poll_wait_msecs, &response_buf, &response_buf_size);
if (messages_sent > 0 && response_bytes != NULL) {
response_bytes[messages_sent - 1] = response_buf_size;
}
//wait a bit letting the server to complete its remaining task(s)
memset(session_virgin_bits, 255, MAP_SIZE);
while(1) {
if (has_new_bits(session_virgin_bits) != 2) break;
}
close(sockfd);
if (likely_buggy && false_negative_reduction) return 0;
if (terminate_child && (child_pid > 0)) kill(child_pid, SIGTERM);
//give the server a bit more time to gracefully terminate
while(1) {
int status = kill(child_pid, 0);
if ((status != 0) && (errno == ESRCH)) break;
}
return 0;
HANDLE_RESPONSE部分是错误处理部分,首先将相应报文buffer的最后一条消息的字节数设置为最后一次更新时的响应报文buffer大小(即最后一条完整的响应报文接收到后buffer的大小)之后等待一小段时间等待服务完成剩余工作之后对于子进程返回0,对于父进程杀死子进程,之后等待服务器优雅的退出后杀死服务器进程然后退出。