如何写一个SQL解析器呢?这里先抛出第一步:单词切分。下面举个简单例子:
select.l 文件:
%{
/* nothing */
enum{
BEGIN_OF_INPUT = 0,
SELECT,
COLUMN,
FROM,
TABLE,
SEP,
END_OF_INPUT
};
int state = BEGIN_OF_INPUT;
%}
%%
\n {
if (SEP == state)
{
printf("========= Cong!!! found a valid sql ==========\n");
}
else if (BEGIN_OF_INPUT < state)
{
printf("========= :-( not a valid sql ==========\n");
}
state = BEGIN_OF_INPUT;
printf("please input a sql and press Enter\n");
}
SELECT {
if (state == (SELECT - 1))
state++;
printf("select state=%d\n", state);
}
FROM {
if (state == (FROM - 1))
state++;
printf("from state=%d\n", state);
}
[a-zA-Z_]+ {
if (state == (COLUMN - 1) || state == (TABLE - 1))
state++;
printf("any word state=%d\n", state);
}
; {
if (state == (SEP - 1))
{
state++;
}
printf("seperator. state=%d\n", state);
}
. ; /* ignore others */
%%
main()
{
yylex();
}
lex select.l
gcc lex.yy.c -ll
运行:
./a.out
效果截图:
注意:
写lex文件规则的时候,严格的规则必须写在松散的规则前面。下面是一个错误的例子:
[a-zA-Z_]+ {
if (state == (COLUMN - 1) || state == (TABLE - 1))
state++;
printf("any word state=%d\n", state);
}
FROM {
if (state == (FROM - 1))
state++;
printf("from state=%d\n", state);
}
因为FROM这个永远都走不到,全部都被[a-zA-Z_]拦住了。
================================================================
5年前写过的关于lex yacc的文章: