一个简单C语言的词法分析器
语言的词法构成:
标识符
id 同C语言标识符
常量
num 数字
ch 字符
str 字符串
关键字
kw_int int
kw_char char
kw_void void
kw_if if
kw_else else
kw_switch switch
kw_case case
kw_default default
kw_while while
kw_do do
kw_for for
kw_break break
kw_continue continue
kw_return return
运算符
add +
sub -
mul *
div /
mod %
inc ++
dec --
not !
and &&
or ||
assign =
gt >
ge >=
lt <
le <=
equ ==
nequ !=
分界符
comma ,
colon :
simcon ;
lparen (
rparen )
lbrac {
rbrac }
代码:
#include<stdio.h>
#include<string.h>
char input[200];//存放输入字符串
char token[10];//存放构成单词符号的字符串
char toke;
char ch; //存放当前读入字符
int fg; //switch标记
int num; //存放整形值
int p=0;//input[]下标
//二维字符数组,存放关键字
char character[13][10]={"int","char","void","if","else","switch","case","default","while","do","for","break","continue","return"};
//二维字符数组,存放含义定义
char index[42][15]={"id","num","str","kw_int","kw_char","kw_void","kw_if","kw_else","kw_switch","kw_case","kw_default","kw_while","kw_do","kw_for","kw_break","kw_continue","kw_return","add","sub","mul","div","mod","inc","dec","not","and","or","assign","gt","ge","lt","le","equ","nequ","comma","colon","simcon","lparen","rparen","lbrac","rbrac","ch"};
main()
{
printf("请输入代码(结尾以'#'作为结束):\n");
do
{
ch=getchar();
input[p++]=ch;
}while(ch!='#');
p=0;
do
{
scaner();
switch(fg)
{
case 0:printf("** %s-->%s **\n",token,index[fg]);break;//标识符
case 1:printf("** %d-->%s **\n",num,index[fg]);break;//数字
case 2:printf("** %s-->%s **\n",token,index[fg]);break;//字符串
case 3:printf("** %s-->%s **\n",token,index[fg]);break;//关键字
case 4:printf("** %s-->%s **\n",token,index[fg]);break;
case 5:printf("** %s-->%s **\n",token,index[fg]);break;
case 6:printf("** %s-->%s **\n",token,index[fg]);break;
case 7:printf("** %s-->%s **\n",token,index[fg]);break;
case 8:printf("** %s-->%s **\n",token,index[fg]);break;
case 9:printf("** %s-->%s **\n",token,index[fg]);break;
case 10:printf("** %s-->%s **\n",token,index[fg]);break;
case 11:printf("** %s-->%s **\n",token,index[fg]);break;
case 12:printf("** %s-->%s **\n",token,index[fg]);break;
case 13:printf("** %s-->%s **\n",token,index[fg]);break;
case 14:printf("** %s-->%s **\n",token,index[fg]);break;
case 15:printf("** %s-->%s **\n",token,index[fg]);break;
case 16:printf("** %s-->%s **\n",token,index[fg]);break;
case 17:printf("** %s-->%s **\n",token,index[fg]);break;//运算符
case 18:printf("** %s-->%s **\n",token,index[fg]);break;
case 19:printf("** %s-->%s **\n",token,index[fg]);break;
case 20:printf("** %s-->%s **\n",token,index[fg]);break;
case 21:printf("** %s-->%s **\n",token,index[fg]);break;
case 22:printf("** %s-->%s **\n",token,index[fg]);break;
case 23:printf("** %s-->%s **\n",token,index[fg]);break;
case 24:printf("** %s-->%s **\n",token,index[fg]);break;
case 25:printf("** %s-->%s **\n",token,index[fg]);break;
case 26:printf("** %s-->%s **\n",token,index[fg]);break;
case 27:printf("** %s-->%s **\n",token,index[fg]);break;
case 28:printf("** %s-->%s **\n",token,index[fg]);break;
case 29:printf("** %s-->%s **\n",token,index[fg]);break;
case 30:printf("** %s-->%s **\n",token,index[fg]);break;
case 31:printf("** %s-->%s **\n",token,index[fg]);break;
case 32:printf("** %s-->%s **\n",token,index[fg]);break;
case 33:printf("** %s-->%s **\n",token,index[fg]);break;
case 34:printf("** %s-->%s **\n",token,index[fg]);break;
case 35:printf("** %s-->%s **\n",token,index[fg]);break;
case 36:printf("** %s-->%s **\n",token,index[fg]);break;
case 37:printf("** %s-->%s **\n",token,index[fg]);break;
case 39:printf("** %s-->%s **\n",token,index[fg]);break;
case 40:printf("** %s-->%s **\n",token,index[fg]);break;
case 41:printf("** %c-->%s **\n",toke,index[fg]);break;//字符
case -1:printf("分析器不存在该词法\n"); break;
}
}while(fg!=42);
getch(); //用于让程序停留在显示页面
}
scaner()
{//词法扫描程序
int m=0;//token[]下标
int n;
toke=NULL;
//清空token[]
for(n=0;n<5;n++)
token[n]=NULL;
//获取第一个不为0字符
ch=input[p++];
while(ch==' ')ch=input[p++];
while(ch=='\n')ch=input[p++];
//数字处理
if(ch<='9'&&ch>='0')
{
num=0;
while(ch<='9'&&ch>='0')
{
num=num*10+ch-'0';
ch=input[p++];
}
ch=input[--p];
fg=1;
}
//关键字处理/标识符处理
else if((ch<='z'&&ch>='a')||(ch<='Z'&&ch>='A')||(ch=='_'))
{
int flag=0;
while((ch<='z'&&ch>='a')||(ch<='Z'&&ch>='A')||(ch<='9'&&ch>='0')||(ch=='_'))
{
token[m++]=ch;
ch=input[p++];
}
token[m++]='\0';
ch=input[--p];
for(n=0;n<14;n++)//关键字
{
if(strcmp(token,character[n])==0)//strcmp()比较两个字符串,相等返回0
{
flag=1;
fg=n+3;
break;
}
}
if(flag==0)
{
fg=0;
}
}
//运算符分界符处理
else
{
switch(ch)
{
case '+':
m=0;
token[m++]=ch;
ch=input[p++];
if(ch=='+') //产生++
{
fg=22;
token[m++]=ch;
}
else //产生+
{
fg=17;
ch=input[--p];
}
break;
case '-':
m=0;
token[m++]=ch;
ch=input[p++];
if(ch=='-') //产生--
{
fg=23;
token[m++]=ch;
}
else //产生-
{
fg=18;
ch=input[--p];
}
break;
case '>':
m=0;
token[m++]=ch;
ch=input[p++];
if(ch=='=') //产生>=
{
fg=29;
token[m++]=ch;
}
else //产生>
{
fg=28;
ch=input[--p];
}
break;
case '<':
m=0;
token[m++]=ch;
ch=input[p++];
if(ch=='=') //产生<=
{
fg=31;
token[m++]=ch;
}
else //产生<
{
fg=30;
ch=input[--p];
}
break;
case '=':
m=0;
token[m++]=ch;
ch=input[p++];
if(ch=='=') //产生==
{
fg=32;
token[m++]=ch;
}
else //产生=
{
fg=27;
ch=input[--p];
}
break;
case '!':
m=0;
token[m++]=ch;
ch=input[p++];
if(ch=='=') //产生!=
{
fg=33;
token[m++]=ch;
}
else //产生!
{
fg=24;
ch=input[--p];
}
break;
case '&':
m=0;
token[m++]=ch;
ch=input[p++];
if(ch=='&') //产生&&
{
fg=25;
token[m++]=ch;
}
else //无产生
{
fg=41;
ch=input[--p];
}
break;
case '|':
m=0;
token[m++]=ch;
ch=input[p++];
if(ch=='|') //产生||
{
fg=26;
token[m++]=ch;
}
else //无产生
{
fg=41;
ch=input[--p];
}
break;
case '*':fg=19;token[0]=ch;break;
case '/':fg=20;token[0]=ch;break;
case '%':fg=21;token[0]=ch;break;
case ',':fg=34;token[0]=ch;break;
case ':':fg=35;token[0]=ch;break;
case ';':fg=36;token[0]=ch;break;
case '(':fg=37;token[0]=ch;break;
case ')':fg=38;token[0]=ch;break;
case '{':fg=39;token[0]=ch;break;
case '}':fg=40;token[0]=ch;break;
case '"':
ch=input[p++];
m=0;
token[m++]=ch;
do
{
ch=input[p++];
token[m++]=ch;
}while(ch!='"');
fg=2;
break;
case '\'':
ch=input[p++];
m=0;
toke=ch;
do
{
ch=input[p++];
token[m++]=ch;
}while(ch!='\'');
if(token[0]=='\'')
{
fg=41;
}
else
{
fg=-1;
}
break;
case '#':fg=42;break;
default:fg=-1;
}
}
}
完成!