利用状态图实现词法分析

最新推荐文章于 2024-04-15 10:43:09 发布

cn854

最新推荐文章于 2024-04-15 10:43:09 发布

阅读量6.2k

点赞数 1

分类专栏：编译文章标签： token c insert 数据结构存储测试

本文链接：https://blog.csdn.net/cn854/article/details/697242

版权

编译专栏收录该内容

1 篇文章 0 订阅

订阅专栏

实验一：词法分析程序

03070020 曹宁

一. 实验目的

基本掌握计算机语言的词法分析程序的开发方法。

二. 实验内容

编制一个能够分析三种整数、标识符、主要运算符和主要关键字的词法分析程序。

三. 实验环境

PC微机

DOS操作系统或 Windows 操作系统

Turbo C 程序集成环境或 Visual C++ 程序集成环境

四. 实验内容

1. 根据以下的正规式，编制正规文法，画出状态图；

标识符	letter(letter\|digit)* letter->A\|B\|…\|Z\|a\|b\|…\|z digit->0\|1\|2\|3\|4\|…\|9
十进制整数	0 \| (1\|2\|3\|4\|5\|6\|7\|8\|9)(0\|1\|2\|3\|4\|5\|6\|7\|8\|9)^*
八进制整数	0(1\|2\|3\|4\|5\|6\|7)(0\|1\|2\|3\|4\|5\|6\|7)^*
十六进制整数	0x(0\|1\|2\|3\|4\|5\|6\|7\|8\|9\|a\|b\|c\|d\|e\|f)(0\|1\|2\|3\|4\|5\|6\|7\|8\|9\|a\|b\|c\|d\|e\|f)^*
运算符和分隔符	+ - * / > < = ( ) ；
关键字	if then else while do

2. 根据状态图，设计词法分析函数int nextToken()，完成以下功能：

1）从文件读入数据，分析出一个单词。

2）返回单词种别（用整数表示），

3）返回单词属性（不同的属性可以放在不同的全局变量中）。

3. 编写测试程序，反复调用函数int nextToken()，输出单词种别和属性。

五. 实验步骤

1. 根据状态图，设计词法分析算法

标识符
正规式	id->letter(letter\|digit)* letter->A\|B\|…\|Z\|a\|b\|…\|z digit->0\|1\|2\|3\|4\|…\|9
正规文法	S->aB’\|bB’\|…\|zB’\|AB’\|BB’\|…\|ZB’ B’->0B’\|1B’\|…\|9B’
状态图

八进制整数
正规式	0(1\|2\|3\|4\|5\|6\|7)(0\|1\|2\|3\|4\|5\|6\|7)^*
正规文法	S->01B\|02B\|…\|07B B->0B\|1B\|…\|7B\|
十进制整数
正规式	0 \| (1\|2\|3\|4\|5\|6\|7\|8\|9)(0\|1\|2\|3\|4\|5\|6\|7\|8\|9)^*
正规文法	S->0\|1B\|2B\|…\|9B B->0B\|1B\|…\|9B\|
十六进制整数
正规式	0x(0\|1\|2\|3\|4\|5\|6\|7\|8\|9\|a\|b\|c\|d\|e\|f)(0\|1\|2\|3\|4\|5\|6\|7\|8\|9\|a\|b\|c\|d\|e\|f)^*
正规文法	S->0(x\|X)(1B’\|2B’\|…\|9B’\|aB’\|bB’\|…\|fB’\|AB’\|…\|FB’) B’->0B’\|1B’\|…\|7B’\|
识别这三种数字的状态图

运算符和分隔符
状态图

2. 采用C语言，设计函数scan( )，实现该算法

程序中的变量和函数声明

//对外函数

extern void initLexer();//打开文件，初始化词法分析器

extern int nextToken();//获得一个token

//对外变量

extern int attr=-1;//是数值的时候存储数值，是标识符时存储在名字表中的位置

extern int lineNo=1;//显示行数

extern char *keyWord[]={

"if",

"else",

"then",

"while",

"do"

};

//内部函数

static int fail();//换状态图

static char getAnChar();//在文件中读取一个字符，指针下移一位

static void ungetAnChar();//在文件中指针回退一位

static void getNum(int type);//根据type，获得十进制数值，存储在attr

static int lookup(const char *s);//在符号表中查找ID

static int insert(const char *s);//返回再符号表中的下标位置

static int isKeyWord(char word[]);//判断是否为关键字

//内部变量

static char lexBuf[100];//字符缓存，用来存储当前分析的字

static int state=1, start=1;//当前状态和状态表的开始状态

static int currentPos=0;//文件中当前指针的位置

static int tokenBeginning=0;//进入一个状态表时指针位置，即换状态表时的回退位置。

static FILE *fp;//被编译的文件指针

static int smptableLength=0;//当前符号表的长度

int nextToken(){

int length=0;

char c;

int keyWordPos=0;//在关键字数组中的下标

state=1;start=1;

//存储开始位置

tokenBeginning=currentPos;

//状态图的实现

while(1){

switch(state){

case 1:

c=getAnChar();

if(c==' '||c=='/t'||c=='/r'){

tokenBeginning++;

}

else if(c=='/n'){

lineNo++;

tokenBeginning++;

}

else if(isalpha(c)){

state=2;

lexBuf[length++]=c;

}

else if(c==EOF) return FILEEND;

else state=fail();

break;

case 2:

c=getAnChar();

if(isdigit(c)||isalpha(c)){

state=2;

lexBuf[length++]=c;

}

else state=3;

break;

case 3:

ungetAnChar();

lexBuf[length]='/0';

keyWordPos=isKeyWord(lexBuf);

if(keyWordPos!=-1){

//是关键字

return IF+keyWordPos;

}

//不是关键字

attr=insert(lexBuf);//把ID在名字表中的数组下标存储在attr中

return ID;

case 4:

c=getAnChar();

if(c=='0') state=5;

else if(c>='1' && c<='9'){

state=12;

lexBuf[length++]=c;

}

else state=fail();

break;

case 5:

c=getAnChar();

if(c=='x') state=6;

else if(c>='1'&& c<='8'){

state=10;

lexBuf[length++]=c;

}

else state=13;

break;

case 6:

c=getAnChar();

if(c>='1' && c<='9' || c>='a'&& c<='f' || c>='A' && c<='B'){

state=7;

lexBuf[length++]=c;

}

else state=fail();

break;

case 7:

c=getAnChar();

if(c>='1' && c<='9' || c>='a'&& c<='f' || c>='A' && c<='B'||c=='0'){

state=7;

lexBuf[length++]=c;

}

else state=8;

break;

case 8:

ungetAnChar();

lexBuf[length]='/0';

getNum(INT16);//把数值存在attr中

return INT16;

case 10:

c=getAnChar();

if(c>='0'&& c<='8'){

state=10;

lexBuf[length++]=c;

}

else state=11;

break;

case 11:

ungetAnChar();

lexBuf[length]='/0';

getNum(INT8);//把数值存在attr中

return INT8;

case 12:

c=getAnChar();

if(c>='0' && c<='9'){

state=12;

lexBuf[length++]=c;

}

else state=13;

break;

case 13:

ungetAnChar();

lexBuf[length]='/0';

getNum(INT10);//把数值存在attr中

return INT10;

case 14:

c=getAnChar();

if(c=='+') state=15;

else if(c=='-') state=16;

else if(c=='*') state=17;

else if(c=='/') state=18;

else if(c=='>') state=19;

else if(c=='<') state=20;

else if(c=='=') state=21;

else if(c=='(') state=22;

else if(c==')') state=23;

else if(c==';') state=24;

else state=fail();

break;

case 15:

return ADD;

case 16:

return SUB;

case 17:

return MUL;

case 18:

return DIV;

case 19:

return GT;

case 20:

return LT;

case 21:

return EQ;

case 22:

return LBR;

case 23:

return RBR;

case 24:

return SEM;

}

3. 编制测试程序（主函数main）。

void main(){

int token;

int oldLine=-1;

initLexer();

while(1){

token=nextToken();

if(token==FILEEND) break;

if(oldLine!=lineNo) {

printf("___________Line %d____________/n",lineNo);

oldLine=lineNo;

}

if(token==ID){//标识符

printf("ID:%d,Pos:%d/n",token,attr);

}

else {

switch(token){

case INT16:case INT8:case INT10://数字

printf("%d/t/t%d/n",token,attr);

break;

default: printf("%d/n",token);//关键字

}

token=nextToken();

}

}//*/

4. 调试程序：输入一组单词，检查输出结果

1 92+data> 0x 3f 04 while

1x3 x3 x3 x44 00

以上都是一行的没有出现什么问题，但是当对多行进行词法分析时，总是出错，通过打印读到的字符发现有字符13（ASCII），查资料才知是’/t’，把光标移到当前行的起始位置，我不知道为什么会有这个字符，添加滤掉这个字符的逻辑，才使词法分析成功。

while (1) do num=10;

num=11;

if (1) then caoning=0x11;

if (1) than caoning=878;

5. 词法分析程序的数据结构与算法

符号表的数据结构：采用的是结构数组

struct ENTYE{

char word[100];

float value;

};

int smptableLength=0;

struct ENTYE smptable[200];

在配以int lookup(const char *s)和int insert(const char *s)对这个符号表操作。

/*查找s在符号表的位置，没有返回-1*/

int lookup(const char *s){

int i;

for (i=0;i<smptableLength;i++){

if (strcmp(smptable[i].word,s)==0) return i;

}

return -1;

}

/*返回s在符号表中的位置*/

int insert(const char *s){

int i=lookup(s);

if(i==-1){

strcpy(smptable[smptableLength].word,s)；

//smptableLength++;

return smptableLength++;

}

return i;

}

六. 心得体会

这次词法分析的实验本身没有什么难度，但是在做这实验之前感觉没谱，所以踏下心仔细的阅读Aho的《编译原理》中前三章，受到很大启发，尤其是利用switch语句把状态图实现的技术，可谓一绝，这也是我学习词法分析的最大的收获。

在做语法分析的时候，对词法分析进行了一些修改，学习了一下不同文件内的函数和变量的使用，在做词法分析的时候架构没有做好，比如哪些函数和变量作为内部的，哪些是提供外部使用的接口，比较混乱，也没有进行大量测试，导致语法分析时受阻重重，最后停下语法分析的编程，对此词法分析修改，使程序清晰易读，并进行了大量的测试，再作语法分析时就容易多了。这也是个教训，以后编程首先得考虑好，再一步一步来。就会省很多麻烦。