snort 之AC匹配算法

0x01 缘起

最近项目中使用AC算法,前面已经转载几篇文章学习之,就想起目前正在使用但是未深入分析的snort开源IDS项目。研究其高效AC匹配算法。下面主要针对源码讲解,然后添加了源码注释。

写成这篇文章,也是做一个mark,阅读下源码

0x02源码

/* $Id$ */
/*
 ** Copyright (C) 2014-2016 Cisco and/or its affiliates. All rights reserved.
 ** Copyright (C) 2002-2013 Sourcefire, Inc.
 ** Copyright (C) 2002 Martin Roesch <roesch@sourcefire.com>
 **
 ** This program is free software; you can redistribute it and/or modify
 ** it under the terms of the GNU General Public License Version 2 as
 ** published by the Free Software Foundation.  You may not use, modify or
 ** distribute this program under any other version of the GNU General
 ** Public License.
 **
 ** This program is distributed in the hope that it will be useful,
 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ** GNU General Public License for more details.
 **
 ** You should have received a copy of the GNU General Public License
 ** along with this program; if not, write to the Free Software
 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

/*
 **   ACSMX.H
 **
 **
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "sf_types.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifndef ACSMX_H
#define ACSMX_H

/*
 *   Prototypes
 */

#define ALPHABET_SIZE    256

#define ACSM_FAIL_STATE   -1

typedef struct _acsm_userdata
{
		uint32_t ref_count;
		void *id;

} ACSM_USERDATA;

typedef struct _acsm_pattern
{

		struct _acsm_pattern *next;  //链表下一个节点
		unsigned char *patrn;        //转换为大写字母模式串
		unsigned char *casepatrn;    //大小写敏感模式串
		int n;                       //模式串长度
		int nocase;					 //大小写敏感
		int offset;					 //偏移?
		int depth;					 //深度?
		int negative;				 //?
		ACSM_USERDATA *udata;		 //用户数据
		int iid;					 //模式串ID
		void * rule_option_tree;
		void * neg_list;

} ACSM_PATTERN;

//状态机表
typedef struct
{

		//goto
		/* Next state - based on input character */
		int NextState[ALPHABET_SIZE];

		//failure
		/* Failure state - used while building NFA & DFA  */
		int FailState;

		//output
		/* List of patterns that end here, if any */
		ACSM_PATTERN *MatchList;

} ACSM_STATETABLE;

/*
 * State machine Struct
 */
typedef struct
{

		int acsmMaxStates; 	//状态机最大状态数
		int acsmNumStates;	//状态机实际状态数

		ACSM_PATTERN * acsmPatterns;  //模式串链表
		ACSM_STATETABLE * acsmStateTable;  //状态表

		int bcSize;
		short bcShift[256];

		int numPatterns;      //模式总数
		void (*userfree)(void *p);
		void (*optiontreefree)(void **p);
		void (*neg_list_free)(void **p);

} ACSM_STRUCT;

/*
 *   Prototypes
 */
ACSM_STRUCT * acsmNew(void(*userfree)(void *p), void(*optiontreefree)(void **p), void(*neg_list_free)(void **p));

int acsmAddPattern(ACSM_STRUCT * p, unsigned char * pat, int n, int nocase, int offset, int depth, int negative, void * id, int iid);

int acsmCompile(ACSM_STRUCT * acsm, int(*build_tree)(void * id, void **existing_tree), int(*neg_list_func)(void *id, void **list));
struct _SnortConfig;
int acsmCompileWithSnortConf(struct _SnortConfig *, ACSM_STRUCT * acsm, int(*build_tree)(struct _SnortConfig *, void * id, void **existing_tree), int(*neg_list_func)(void *id, void **list));

int acsmSearch(ACSM_STRUCT * acsm, unsigned char * T, int n, int(*Match)(void * id, void *tree, int index, void *data, void *neg_list), void * data, int* current_state);

void acsmFree(ACSM_STRUCT * acsm);
int acsmPatternCount(ACSM_STRUCT * acsm);

int acsmPrintDetailInfo(ACSM_STRUCT *);

int acsmPrintSummaryInfo(void);

#endif

/*
 **
 ** $Id$
 **
 ** Multi-Pattern Search Engine
 **
 ** Aho-Corasick State Machine -  uses a Deterministic Finite Automata - DFA
 **
 ** Copyright (C) 2014-2016 Cisco and/or its affiliates. All rights reserved.
 ** Copyright (C) 2002-2013 Sourcefire, Inc.
 ** Marc Norton
 **
 **
 ** This program is free software; you can redistribute it and/or modify
 ** it under the terms of the GNU General Public License Version 2 as
 ** published by the Free Software Foundation.  You may not use, modify or
 ** distribute this program under any other version of the GNU General
 ** Public License.
 **
 ** This program is distributed in the hope that it will be useful,
 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ** GNU General Public License for more details.
 **
 ** You should have received a copy of the GNU General Public License
 ** along with this program; if not, write to the Free Software
 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 **
 **
 **   Reference - Efficient String matching: An Aid to Bibliographic Search
 **               Alfred V Aho and Margaret J Corasick
 **               Bell Labratories
 **               Copyright(C) 1975 Association for Computing Machinery,Inc
 **
 **   Implemented from the 4 algorithms in the paper by Aho & Corasick
 **   and some implementation ideas from 'Practical Algorithms in C'
 **
 **   Notes:
 **     1) This version uses about 1024 bytes per pattern character - heavy  on the memory.
 **     2) This algorithm finds all occurrences of all patterns within a
 **        body of text.
 **     3) Support is included to handle upper and lower case matching.
 **     4) Some comopilers optimize the search routine well, others don't, this makes all the difference.
 **     5) Aho inspects all bytes of the search text, but only once so it's very efficient,
 **        if the patterns are all large than the Modified Wu-Manbar method is often faster.
 **     6) I don't subscribe to any one method is best for all searching needs,
 **        the data decides which method is best,
 **        and we don't know until after the search method has been tested on the specific data sets.
 **
 **
 **  May 2002  : Marc Norton 1st Version
 **  June 2002 : Modified interface for SNORT, added case support
 **  Aug 2002  : Cleaned up comments, and removed dead code.
 **  Nov 2,2002: Fixed queue_init() , added count=0
 **
 **
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "acsmx.h"
#include "util.h"
#include "snort_debug.h"

#ifdef DYNAMIC_PREPROC_CONTEXT
#include "sf_dynamic_preprocessor.h"
#endif //DYNAMIC_PREPROC_CONTEXT
#define MEMASSERT(p,s) if(!p){fprintf(stderr,"ACSM-No Memory: %s!\n",s);exit(0);}

#ifdef DEBUG_AC
static int max_memory = 0;
#endif

/*static void Print_DFA( ACSM_STRUCT * acsm );*/

/*
 *
 */
static void *
AC_MALLOC(int n)
{
	void *p;
	p = calloc(1, n);
#ifdef DEBUG_AC
	if (p)
	max_memory += n;
#endif
	return p;
}

/*
 *
 */
static void AC_FREE(void *p)
{
	if (p) free(p);
}

/*
 *    Simple QUEUE NODE
 */
typedef struct _qnode
{
	int state;
	struct _qnode *next;
} QNODE;

/*
 *    Simple QUEUE Structure
 */
typedef struct _queue
{
	QNODE * head, *tail;
	int count;
} QUEUE;

/*
 *
 */
static void queue_init(QUEUE * s)
{
	s->head = s->tail = 0;
	s->count = 0;
}

/*
 *  Add Tail Item to queue
 */
static void queue_add(QUEUE * s, int state)
{
	QNODE * q;
	if (!s->head)
	{
		q = s->tail = s->head = (QNODE *) AC_MALLOC(sizeof(QNODE));
		MEMASSERT (q, "queue_add");
		q->state = state;
		q->next = 0;
	}
	else
	{
		q = (QNODE *) AC_MALLOC(sizeof(QNODE));
		MEMASSERT (q, "queue_add");
		q->state = state;
		q->next = 0;
		s->tail->next = q;
		s->tail = q;
	}
	s->count++;
}

/*
 *  Remove Head Item from queue
 */
static int queue_remove(QUEUE * s)
{
	int state = 0;
	QNODE * q;
	if (s->head)
	{
		q = s->head;
		state = q->state;
		s->head = s->head->next;
		s->count--;
		if (!s->head)
		{
			s->tail = 0;
			s->count = 0;
		}
		AC_FREE(q);
	}
	return state;
}

/*
 *
 */
static int queue_count(QUEUE * s)
{
	return s->count;
}

/*
 *
 */
static void queue_free(QUEUE * s)
{
	while (queue_count(s))
	{
		queue_remove(s);
	}
}

/*
 ** Case Translation Table
 */
static unsigned char xlatcase[256];

/*
 * 做一个ASSIC码hash表,查找优化,都转换成大写,在初始化模式字符串时,根据索引查找对应的字符;
 */
static void init_xlatcase()
{
	int i;
	for (i = 0; i < 256; i++)
	{
		xlatcase[i] = (unsigned char) toupper(i);
	}
}

/*
 *
 */
static inline void ConvertCaseEx(unsigned char *d, unsigned char *s, int m)
{
	int i;
	for (i = 0; i < m; i++)
	{
		d[i] = xlatcase[s[i]];
	}
}

/*
 *
 */
static ACSM_PATTERN *
CopyMatchListEntry(ACSM_PATTERN * px)
{
	ACSM_PATTERN * p;
	p = (ACSM_PATTERN *) AC_MALLOC(sizeof(ACSM_PATTERN));
	MEMASSERT (p, "CopyMatchListEntry");
	memcpy(p, px, sizeof(ACSM_PATTERN));
	px->udata->ref_count++;
	p->next = 0;
	return p;
}

/*
 *  Add a pattern to the list of patterns terminated at this state.
 *  Insert at front of list.
 *  到模式终止状态时的输出模式串到匹配链表
 */
static void AddMatchListEntry(ACSM_STRUCT * acsm, int state, ACSM_PATTERN * px)
{
	ACSM_PATTERN * p;
	p = (ACSM_PATTERN *) AC_MALLOC(sizeof(ACSM_PATTERN));
	MEMASSERT (p, "AddMatchListEntry");
	memcpy(p, px, sizeof(ACSM_PATTERN));
	p->next = acsm->acsmStateTable[state].MatchList;
	acsm->acsmStateTable[state].MatchList = p;
}

/*
 Add Pattern States
 添加模式状态,构建goto表和output
 */
static void AddPatternStates(ACSM_STRUCT * acsm, ACSM_PATTERN * p)
{
	unsigned char *pattern;
	int state = 0, next, n;
	n = p->n;
	pattern = p->patrn;


	/*
	 *  Match up pattern with existing states
	 *  用已经构建的状态匹配模式,发现匹配失败然后就进行节点创建。
	 *  构建goto表,进行状态转移;
	 */
	for (; n > 0; pattern++, n--)
	{
		next = acsm->acsmStateTable[state].NextState[*pattern];
		if (next == ACSM_FAIL_STATE) break;
		state = next;
	}


	/*
	 *   Add new states for the rest of the pattern bytes, 1 state per byte
	 *   添加新的状态
	 */
	for (; n > 0; pattern++, n--)
	{
		acsm->acsmNumStates++;
		acsm->acsmStateTable[state].NextState[*pattern] = acsm->acsmNumStates;
		state = acsm->acsmNumStates;
	}
	/*  he/ she/ his /hers
	 *  输入 he 第一个模式:
	 *  	   h      e
	 *  	0 ---> 1 --->2
	 *
	 *	 输入 she 第二个模式:
	 *  	   h      e
	 *  	0 ---> 1 --->2
	 *  	   s      h      e
	 *  	 |---> 3 ---> 4 ---> 5
	 *	输入 his 第三个模式:
	 *  	   h      e
	 *  	0 ---> 1 ---> 2
	 *				  i     s
	 *             | ---> 6 --> 7
	 *  	   s      h      e
	 *  	 |---> 3 ---> 4 ---> 5
	 *	输入 hers 第四个模式:
	 *  	   h      e
	 *  	0 ---> 1 ---> 2
					  r     s
	 *             | ---> 8 --> 9
	 *				  i     s
	 *             | ---> 6 --> 7
	 *  	    s      h      e
	 *  	 |---> 3 ---> 4 ---> 5
	 * */
	AddMatchListEntry(acsm, state, p);
}

/*
 *   Build Non-Deterministic Finite Automata
 *   构建不确定有限自动机,利用队列进行宽搜,按层次遍历每个数节点;
 */
static void Build_NFA(ACSM_STRUCT * acsm)
{
	int r, s;
	int i;
	QUEUE q, *queue = &q;
	ACSM_PATTERN * mlist = 0;
	ACSM_PATTERN * px = 0;


	/* Init a Queue */
	queue_init(queue);


	/* Add the state 0 transitions 1st */
	for (i = 0; i < ALPHABET_SIZE; i++)
	{
		s = acsm->acsmStateTable[0].NextState[i];
		if (s)
		{
			//入队
			queue_add(queue, s);
			//深度为1的失败函数都跳转到0状态;
			acsm->acsmStateTable[s].FailState = 0;
		}
	}

	/* Build the fail state transitions for each valid state
	 * 为每个有效状态构建失败匹配时跳转;
	 * */
	while (queue_count(queue) > 0)
	{
		//出队一个状态--宽搜算法
		r = queue_remove(queue);


		/* Find Final States for any Failure
		 * 在任何状态节点匹配失败都有一个最终失败状态;
		 * */
		for (i = 0; i < ALPHABET_SIZE; i++)
		{
			int fs, next;
			if ((s = acsm->acsmStateTable[r].NextState[i]) != ACSM_FAIL_STATE)
			{
				//不是失败状态就是数下一层状态,就得入队,为下层遍历做准备;
				queue_add(queue, s);
				fs = acsm->acsmStateTable[r].FailState;


				/*
				 *  Locate the next valid state for 'i' starting at s
				 *  当一个状态节点匹配失败后,从这个状态的失败节点,定位下一个有效的节点;
				 */
				while ((next = acsm->acsmStateTable[fs].NextState[i]) == ACSM_FAIL_STATE)
				{
					fs = acsm->acsmStateTable[fs].FailState;
				}

				/*
				 *  Update 's' state failure state to point to the next valid state
				 *  更新当前节点的失败节点;
				 */
				acsm->acsmStateTable[s].FailState = next;


				/*
				 *  Copy 'next'states MatchList to 's' states MatchList,
				 *  we copy them so each list can be AC_FREE'd later,
				 *  else we could just manipulate pointers to fake the copy.
				 */
				for (mlist = acsm->acsmStateTable[next].MatchList; mlist != NULL; mlist = mlist->next)
				{
					px = CopyMatchListEntry(mlist);

					if (!px)
					{
						FatalError("*** Out of memory Initializing Aho Corasick in acsmx.c ****");
					}

					/* Insert at front of MatchList */
					px->next = acsm->acsmStateTable[s].MatchList;
					acsm->acsmStateTable[s].MatchList = px;
				}
			}
		}
	}


	/* Clean up the queue */
	queue_free(queue);
}

/*
 *   Build Deterministic Finite Automata from NFA
 */
static void Convert_NFA_To_DFA(ACSM_STRUCT * acsm)
{
	int r, s;
	int i;
	QUEUE q, *queue = &q;


	/* Init a Queue */
	queue_init(queue);


	/* Add the state 0 transitions 1st */
	for (i = 0; i < ALPHABET_SIZE; i++)
	{
		s = acsm->acsmStateTable[0].NextState[i];
		if (s)
		{
			queue_add(queue, s);
		}
	}

	/* Start building the next layer of transitions */
	while (queue_count(queue) > 0)
	{
		r = queue_remove(queue);


		/* State is a branch state */
		for (i = 0; i < ALPHABET_SIZE; i++)
		{
			if ((s = acsm->acsmStateTable[r].NextState[i]) != ACSM_FAIL_STATE)
			{
				queue_add(queue, s);
			}
			else
			{
				acsm->acsmStateTable[r].NextState[i] = acsm->acsmStateTable[acsm->acsmStateTable[r].FailState]. NextState[i];
			}
		}
	}


	/* Clean up the queue */
	queue_free(queue);
}

/*
 *构建AC多模匹配结构,做一些初始化工作;
 */
ACSM_STRUCT * acsmNew(void(*userfree)(void *p), void(*optiontreefree)(void **p), void(*neg_list_free)(void **p))
{
	ACSM_STRUCT * p;
	//初始化字符串
	init_xlatcase();
	p = (ACSM_STRUCT *) AC_MALLOC(sizeof(ACSM_STRUCT));
	MEMASSERT (p, "acsmNew");
	if (p)
	{
		memset(p, 0, sizeof(ACSM_STRUCT));
		p->userfree = userfree;
		p->optiontreefree = optiontreefree;
		p->neg_list_free = neg_list_free;
	}
	return p;
}

/*
 *   Add a pattern to the list of patterns for this state machine
 *   添加一个模式字符串到状态机,
 *   构建goto(),failure(),output()三个关键函数;
 *
 */
int acsmAddPattern(ACSM_STRUCT * p, unsigned char *pat, int n, int nocase, int offset, int depth, int negative, void * id, int iid)
{
	//申请内存
	ACSM_PATTERN * plist;
	plist = (ACSM_PATTERN *) AC_MALLOC(sizeof(ACSM_PATTERN));
	MEMASSERT (plist, "acsmAddPattern");
	plist->patrn = (unsigned char *) AC_MALLOC(n);
	//将对应的字符都装换成大写,存入到结构中;
	ConvertCaseEx(plist->patrn, pat, n);

	//存入原始模式串,区分大小写
	plist->casepatrn = (unsigned char *) AC_MALLOC(n);
	memcpy(plist->casepatrn, pat, n);

	//用户数据
	plist->udata = (ACSM_USERDATA *) AC_MALLOC(sizeof(ACSM_USERDATA));
	MEMASSERT (plist->udata, "acsmAddPattern");
	plist->udata->ref_count = 1;
	plist->udata->id = id;

	//模式串长度
	plist->n = n;
	//模式串匹配是否大小写敏感
	plist->nocase = nocase;
	plist->negative = negative;
	//偏移
	plist->offset = offset;
	//tree 深度
	plist->depth = depth;
	//模式编号
	plist->iid = iid;
	//模式串挂链表,p为head
	plist->next = p->acsmPatterns;
	p->acsmPatterns = plist;
	p->numPatterns++;
	return 0;
}

/* 构建匹配状态*/
static int acsmBuildMatchStateTrees(ACSM_STRUCT * acsm, int(*build_tree)(void * id, void **existing_tree), int(*neg_list_func)(void *id, void **list))
{
	int i, cnt = 0;
	ACSM_PATTERN * mlist;


	/* Find the states that have a MatchList */
	for (i = 0; i < acsm->acsmMaxStates; i++)
	{
		for (mlist = acsm->acsmStateTable[i].MatchList; mlist != NULL; mlist = mlist->next)
		{
			if (mlist->udata->id)
			{
				if (mlist->negative)
				{
					neg_list_func(mlist->udata->id, &acsm->acsmStateTable[i].MatchList->neg_list);
				}
				else
				{
					build_tree(mlist->udata->id, &acsm->acsmStateTable[i].MatchList->rule_option_tree);
				}
			}

			cnt++;
		}

		if (acsm->acsmStateTable[i].MatchList)
		{
			/* Last call to finalize the tree */
			build_tree(NULL, &acsm->acsmStateTable[i].MatchList->rule_option_tree);
		}
	}

	return cnt;
}

static int acsmBuildMatchStateTreesWithSnortConf(struct _SnortConfig *sc, ACSM_STRUCT * acsm, int(*build_tree)(struct _SnortConfig *, void * id, void **existing_tree), int(*neg_list_func)(void *id, void **list))
{
	int i, cnt = 0;
	ACSM_PATTERN * mlist;


	/* Find the states that have a MatchList */
	for (i = 0; i < acsm->acsmMaxStates; i++)
	{
		for (mlist = acsm->acsmStateTable[i].MatchList; mlist != NULL; mlist = mlist->next)
		{
			if (mlist->udata->id)
			{
				if (mlist->negative)
				{
					neg_list_func(mlist->udata->id, &acsm->acsmStateTable[i].MatchList->neg_list);
				}
				else
				{
					build_tree(sc, mlist->udata->id, &acsm->acsmStateTable[i].MatchList->rule_option_tree);
				}
			}

			cnt++;
		}

		if (acsm->acsmStateTable[i].MatchList)
		{
			/* Last call to finalize the tree */
			build_tree(sc, NULL, &acsm->acsmStateTable[i].MatchList->rule_option_tree);
		}
	}

	return cnt;
}

/*
 *   Compile State Machine
 *   编译状态机
 */
static inline int _acsmCompile(ACSM_STRUCT * acsm)
{
	int i, k;
	ACSM_PATTERN * plist;


	/* Count number of states
	 * 计算树节点状态总数。
	 * */
	acsm->acsmMaxStates = 1;
	/* 遍历模式串链表,将所有模式串总和相加。作为状态机状态总数最大值*/
	for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
	{
		acsm->acsmMaxStates += plist->n;
	}
	/* 申请AC状态表内存 ,每一个状态都要考虑失败、下一个状态跳转、输出匹配状态*/
	acsm->acsmStateTable = (ACSM_STATETABLE *) AC_MALLOC(sizeof(ACSM_STATETABLE) * acsm->acsmMaxStates);
	MEMASSERT (acsm->acsmStateTable, "acsmCompile");
	memset(acsm->acsmStateTable, 0, sizeof(ACSM_STATETABLE) * acsm->acsmMaxStates);


	/* Initialize state zero as a branch
	 * 初始化状态为0
	 * */
	acsm->acsmNumStates = 0;


	/* Initialize all States NextStates to FAILED
	 * 初始化所有状态的下一个状态到失败状态
	 * */
	for (k = 0; k < acsm->acsmMaxStates; k++)
	{
		for (i = 0; i < ALPHABET_SIZE; i++)
		{
			/* 每一个状态,初始化失败, NextState 256与之前的状态一致 。 */
			acsm->acsmStateTable[k].NextState[i] = ACSM_FAIL_STATE;
		}
	}

	/* Add each Pattern to the State Table
	 * 添加每一个模式到状态表,构建goto,output
	 * */
	for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
	{
		AddPatternStates(acsm, plist);
	}

	/* Set all failed state transitions to return to the 0'th state
	 * 设定深度为1的所有状态节点,失败后跳转到返回到O状态;
	 * */
	for (i = 0; i < ALPHABET_SIZE; i++)
	{
		if (acsm->acsmStateTable[0].NextState[i] == ACSM_FAIL_STATE)
		{
			acsm->acsmStateTable[0].NextState[i] = 0;
		}
	}

	/* Build the NFA
	 * 构建NFA,不确定的有限自动机
	 * */
	Build_NFA(acsm);


	/* Convert the NFA to a DFA
	 * 将NFA转换到DFA(确定有穷自动机,,DFA中不会有从同一状态出发的两条边标志有相同的符号)
	 * */

	Convert_NFA_To_DFA(acsm);


	/*
	 printf ("ACSMX-Max Memory: %d bytes, %d states\n", max_memory,
	 acsm->acsmMaxStates);
	 */

	//Print_DFA( acsm );

	return 0;
}

int acsmCompile(ACSM_STRUCT * acsm, int(*build_tree)(void * id, void **existing_tree), int(*neg_list_func)(void *id, void **list))
{
	int rval;

	if ((rval = _acsmCompile(acsm))) return rval;

	if (build_tree && neg_list_func)
	{
		acsmBuildMatchStateTrees(acsm, build_tree, neg_list_func);
	}

	return 0;
}

int acsmCompileWithSnortConf(struct _SnortConfig *sc, ACSM_STRUCT * acsm, int(*build_tree)(struct _SnortConfig *, void * id, void **existing_tree), int(*neg_list_func)(void *id, void **list))
{
	int rval;

	if ((rval = _acsmCompile(acsm))) return rval;

	if (build_tree && neg_list_func)
	{
		acsmBuildMatchStateTreesWithSnortConf(sc, acsm, build_tree, neg_list_func);
	}

	return 0;
}

static unsigned char Tc[64 * 1024];

/*
 *   Search Text or Binary Data for Pattern matches
 */
int acsmSearch(ACSM_STRUCT * acsm, unsigned char *Tx, int n, int(*Match)(void * id, void *tree, int index, void *data, void *neg_list), void *data, int* current_state)
{
	int state = 0;
	ACSM_PATTERN * mlist;
	unsigned char *Tend;
	ACSM_STATETABLE * StateTable = acsm->acsmStateTable;
	int nfound = 0;
	unsigned char *T;
	int index;


	/* Case conversion */
	ConvertCaseEx(Tc, Tx, n);
	T = Tc;
	Tend = T + n;

	if (!current_state)
	{
		return 0;
	}

	state = *current_state;

	for (; T < Tend; T++)
	{
		state = StateTable[state].NextState[*T];

		if (StateTable[state].MatchList != NULL)
		{
			mlist = StateTable[state].MatchList;
			index = T - mlist->n + 1 - Tc;
			nfound++;
			if (Match(mlist->udata->id, mlist->rule_option_tree, index, data, mlist->neg_list) > 0)
			{
				*current_state = state;
				return nfound;
			}
		}
	}
	*current_state = state;
	return nfound;
}

/*
 *   Free all memory
 */
void acsmFree(ACSM_STRUCT * acsm)
{
	int i;
	ACSM_PATTERN * mlist, *ilist;
	for (i = 0; i < acsm->acsmMaxStates; i++)
	{
		mlist = acsm->acsmStateTable[i].MatchList;
		while (mlist)
		{
			ilist = mlist;
			mlist = mlist->next;

			ilist->udata->ref_count--;
			if (ilist->udata->ref_count == 0)
			{
				if (acsm->userfree && ilist->udata->id) acsm->userfree(ilist->udata->id);

				AC_FREE(ilist->udata);
			}

			if (ilist->rule_option_tree && acsm->optiontreefree)
			{
				acsm->optiontreefree(&(ilist->rule_option_tree));
			}

			if (ilist->neg_list && acsm->neg_list_free)
			{
				acsm->neg_list_free(&(ilist->neg_list));
			}

			AC_FREE(ilist);
		}
	}
	AC_FREE(acsm->acsmStateTable);
	mlist = acsm->acsmPatterns;
	while (mlist)
	{
		ilist = mlist;
		mlist = mlist->next;
		AC_FREE(ilist->patrn);
		AC_FREE(ilist->casepatrn);
		AC_FREE(ilist);
	}
	AC_FREE(acsm);
}

int acsmPatternCount(ACSM_STRUCT * acsm)
{
	return acsm->numPatterns;
}

/*
 *
 */
/*
 static void Print_DFA( ACSM_STRUCT * acsm )
 {
 int k;
 int i;
 int next;

 for (k = 0; k < acsm->acsmMaxStates; k++)
 {
 for (i = 0; i < ALPHABET_SIZE; i++)
 {
 next = acsm->acsmStateTable[k].NextState[i];

 if( next == 0 || next ==  ACSM_FAIL_STATE )
 {
 if( isprint(i) )
 printf("%3c->%-5d\t",i,next);
 else
 printf("%3d->%-5d\t",i,next);
 }
 }
 printf("\n");
 }

 }
 */

int acsmPrintDetailInfo(ACSM_STRUCT * p)
{
	if (p) p = p;
	return 0;
}

int acsmPrintSummaryInfo(void)
{
#ifdef XXXXX
	char * fsa[]=
	{
		"TRIE",
		"NFA",
		"DFA",
	};

	ACSM_STRUCT2 * p = &summary.acsm;

	if( !summary.num_states )
	return;

	LogMessage("+--[Pattern Matcher:Aho-Corasick Summary]----------------------\n");
	LogMessage("| Alphabet Size    : %d Chars\n",p->acsmAlphabetSize);
	LogMessage("| Sizeof State     : %d bytes\n",sizeof(acstate_t));
	LogMessage("| Storage Format   : %s \n",sf[ p->acsmFormat ]);
	LogMessage("| Num States       : %d\n",summary.num_states);
	LogMessage("| Num Transitions  : %d\n",summary.num_transitions);
	LogMessage("| State Density    : %.1f%%\n",100.0*(double)summary.num_transitions/(summary.num_states*p->acsmAlphabetSize));
	LogMessage("| Finite Automatum : %s\n", fsa[p->acsmFSA]);
	if( max_memory < 1024*1024 )
	LogMessage("| Memory           : %.2fKbytes\n", (float)max_memory/1024 );
	else
	LogMessage("| Memory           : %.2fMbytes\n", (float)max_memory/(1024*1024) );
	LogMessage("+-------------------------------------------------------------\n");

#endif
	return 0;
}

#ifdef ACSMX_MAIN

/*
 *  Text Data Buffer
 */
unsigned char text[512];

/*
 *    A Match is found
 */
int
MatchFound (unsigned id, int index, void *data)
{
	fprintf (stdout, "%s\n", (char *) id);
	return 0;
}

/*
 *
 */
int
main (int argc, char **argv)
{
	int i, nocase = 0;
	ACSM_STRUCT * acsm;
	if (argc < 3)

	{
		fprintf (stderr,
				"Usage: acsmx pattern word-1 word-2 ... word-n  -nocase\n");
		exit (0);
	}
	acsm = acsmNew ();
	strcpy (text, argv[1]);
	for (i = 1; i < argc; i++)
	if (strcmp (argv[i], "-nocase") == 0)
	nocase = 1;
	for (i = 2; i < argc; i++)

	{
		if (argv[i][0] == '-')
		continue;
		acsmAddPattern (acsm, argv[i], strlen (argv[i]), nocase, 0, 0,
				argv[i], i - 2);
	}
	acsmCompile (acsm);
	acsmSearch (acsm, text, strlen (text), MatchFound, (void *) 0);
	acsmFree (acsm);
	printf ("normal pgm end\n");
	return (0);
}
#endif /*  */


0x03使用场景

主要做文本的多模式匹配。目前正则库较多如pcre,boost::regx等。

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值