编译原理实验:词法分析

1、编写生成词法分析程序的文件lex.l。 由平台自动使用Flex对lex.l进行编译,生成词法分析程序的源程文件lex.yy.c,并使用GCC进行编译得到词法分析程序的可执行文件 以二元组的方式显示源程序中的单词,遇到错误字符,显示错误信息。

编程要求

根据提示,在右侧编辑器补充代码完成lex.l文件。 待分析的单词及其种别码约定如下: 1 int 2 while 3 if 4 else 5 return 50 标识符 51 整常数 60 + 61 - 62 * 63 / 64 = 65 < 66 <= 67 > 68 >= 69 == 70 != 71 { 72 } 73 ( 74 ) 75 , 76 ; 

%option  yylineno
/**************以下为定义段******************/
%{
#include "stdio.h"
#include "string.h"
%}

id   [A-Za-z][A-Za-z0-9]*  
intconst    [0-9]+

/**************下面为需要填写的规则段******************/
%%

"int"  { printf("(1,-) ");}
"while" { printf("(2,-) ",yytext);}
"if" { printf("(3,-) ",yytext);}
"else" { printf("(4,-) ",yytext);}
"return" { printf("(5,-) ",yytext);}
[a-zA-Z]+ { printf("(50,\"%s\") ",yytext);}
[0-9]+ { printf("(51,%s) ",yytext);}
"+" { printf("(60,-) ",yytext);}
"-" { printf("(61,-) ",yytext);}
"*" { printf("(62,-) ",yytext);}
"/" { printf("(63,-) ",yytext);}
"=" { printf("(64,-) ",yytext);}
"<" { printf("(65,-) ",yytext);}
"<=" { printf("(66,-) ",yytext);}
">" { printf("(67,-) ",yytext);}
">=" { printf("(68,-) ",yytext);}
"==" { printf("(69,-) ",yytext);}
"!=" { printf("(70,-) ",yytext);}
"{" {{ printf("(71,-) ",yytext);}}
"}" {{ printf("(72,-) ",yytext);}}
"(" {{ printf("(73,-) ",yytext);}}
")" {{ printf("(74,-) ",yytext);}}
"," {{ printf("(75,-) ",yytext);}}
";" {{ printf("(76,-) ",yytext);}}
[ \r\t\n]     {} 
.		      {printf("\n<error character \'%s\' in line %d> ",yytext,yylineno);}
%%

/**************以下为用户子程序段******************/
int main(int argc,char *argv[])
{
yylineno=1;      //行序号变量,每次遇到\n后会自动加1
yyin=fopen(argv[1],"r");
if (!yyin) return 0;
while (yylex());
return 0;
}
int yywrap() { return 1;}

参考《lex与yacc》第二版

简单理解:遇到相应的词汇做相应动作

2、用C/C++编写生成词法分析程序。 以二元组的方式显示源程序中的单词,遇到错误字符,显示错误信息。
 

#include "stdio.h"  
#include "stdlib.h"  
#include "ctype.h"  
#include "string.h" 
#define MAXWORD 50            //能够分析出的最长单词
#define MAXFILEWORD 500        //用于指定存放文件内容数组的最大容量
char Keyword[][7] = { "int","while","if","else","return"}; 
int reserve(char str[])
{
	for (int i = 0; i<15; i++)
	{
		if (strcmp(str, Keyword[i]) == 0)
		{
			return (i + 1);	//关键字的种别码为i+1 的值 
		}
	}
	return -1;                //不是关键字
}

void getsym(FILE *fp)
{
	char ch[MAXFILEWORD];
	int n = 0;
	while ((ch[n++] = fgetc(fp)) != EOF)
		;
	ch[--n] = '\0';    //如果是n的话会把EOF读进来
	int line = 1;       //记录错误行号
	int p = 0;
	while (ch[p] != '\0')
	{
		if (ch[p] == '\n')
			line++;
		while ((ch[p] == ' ') || (ch[p] == '\n') || (ch[p] == '\t'))
			p++;
		int i = 0;
		char str[MAXWORD] = {};
		int addr = 0;

		if (isalpha(ch[p]))
		{
			do {
				str[i++] = ch[p++];

			} while (isalpha(ch[p]) || isdigit(ch[p]));
			str[i] = '\0';
			addr = reserve(str);
			if (addr == -1){
				//非标识符
				printf("(50,\"%s\") ",str); 
			}else{
				printf("(%d,-) ", addr); 
			}
		}
		else if (isdigit(ch[p]))
		{
			do {
				str[i++] = ch[p++];
			} while (isdigit(ch[p]));   //出循环时,ch[p]已不是数字
			str[i] = '\0';
			addr = 51 ;
			printf("(%d,%s) ", addr ,str); //纯数字
		}
		else if (ch[p] == '<')
		{
			if (ch[p + 1] == '=') {
				str[0] = ch[p];
				str[1] = ch[p + 1];
				str[2] = '\0';
				addr = 66;
				printf("(%d,-) ", addr);
				p = p + 2;
			}
			else if (ch[p + 1] == '>')
			{
				str[0] = ch[p];
				str[1] = ch[p + 1];
				str[2] = '\0';
				addr = 6;
				printf("(%d,-) ", addr);
				p = p + 2;
			}
			else
			{
				str[0] = ch[p];
				str[1] = '\0';
				addr = 65;
				printf("(%d,-) ", addr);
				p++;
			}
		}
		else if (ch[p] == '>')
		{
			if (ch[p + 1] == '=') {
				str[0] = ch[p];
				str[1] = ch[p + 1];
				str[2] = '\0';
				addr = 68;
				printf("(%d,-) ", addr);
				p = p + 2;
			}
			else
			{
				str[0] = ch[p];
				str[1] = '\0';
				addr = 67;
				printf("(%d,-) ", addr);
				p++;
			}
		}
		else if (ch[p] == '+')
		{
			addr = 60; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
		else if (ch[p] == '-')
		{
			addr = 61; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
		else if (ch[p] == '*')
		{
			addr = 62; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
		else if (ch[p] == '/')
		{
			addr = 63; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
		else if (ch[p] == '=')
		{
			if (ch[p + 1] == '=') {
				str[0] = ch[p];
				str[1] = ch[p + 1];
				str[2] = '\0';
				addr = 69;
				printf("(%d,-) ", addr);
				p = p + 2;
			}
			else
			{
				addr = 64; str[0] = ch[p]; str[1] = '\0';
				printf("(%d,-) ", addr);
				p++;
			}

		}
		else if (ch[p] == '(')
		{
			addr = 73; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
		else if (ch[p] == ')')
		{
			addr = 74; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
        else if (ch[p] == '{')
		{
			addr = 71; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
		else if (ch[p] == '}')
		{
			addr = 72; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
         else if (ch[p] == ',')
		{
			addr = 75; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
        else if (ch[p] == ';')
		{
			addr = 76; str[0] = ch[p]; str[1] = '\0';
			printf("(%d,-) ", addr);
			p++;
		}
		else if (ch[p] == '!')
		{
			if (ch[p + 1] == '=') {
				str[0] = ch[p];
				str[1] = ch[p + 1];
				str[2] = '\0';
				addr = 70;
				printf("(%d,-) ", addr);
				p = p + 2;
			}
		}
		
		
	}
}


int main(int argc,char *argv[])    
{    
    FILE *fp;    
    fp=fopen(argv[1],"r");         //如果用C++实现,自行修改文件打开方式
    if (!fp) {printf("Fail Open\n");return 0;}    
 /***********在下面添加程序,根据文件指针读取测试文件中进行词法分析*********/  
    //循环读取文件的每一行数据
    getsym(fp);
    //操作结束后关闭文件
    fclose(fp);
    return 0;

    /*********************************************************************/  
    return 1;  
} 

说明:按照一个个读的顺序,每次遇到空格或者换行就跳过,用一个str暂存字符串,按照规则匹配。if else 用有穷自动机画个图逻辑就非常清楚了。以上代码其实比较冗余。

还有一种简化思路:代码解耦,将扫描和操作部分分开成两个函数。扫描函数将把全文分成一个个单词,对每个单词进行匹配。 操作部分会用if else 匹配并给出相应的address(地址码)来确定是哪种类型的单词。筛错机制不变,利用行号和第几个char来定位。

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
设计思想 (1)程序主体结构部分: 说明部分 %% 规则部分 %% 辅助程序部分 (2)主体结构的说明 在这里说明部分告诉我们使用的LETTER,DIGIT, IDENT(标识符,通常定义为字母开头的字母数字串)和STR(字符串常量,通常定义为双引号括起来的一串字符)是什么意思.这部分也可以包含一些初始化代码.例如用#include来使用标准的头文件和前向说明(forward ,references).这些代码应该再标记"%{"和"%}"之间;规则部分>可以包括任何你想用来分析的代码;我们这里包括了忽略所有注释中字符的功能,传送ID名称和字符串常量内容到主调函数和main函数的功能. (3)实现原理 程序中先判断这个句语句中每个单元为关键字、常数、运算符、界符,对与不同的单词符号给出不同编码形式的编码,用以区分之。 PL/0语言的EBNF表示 <常量定义>::=<标识符>=<无符号整数>; <标识符>::=<字母>={<字母>|<数字>}; <加法运算符>::=+|- <乘法运算符>::=*|/ <关系运算符>::==|#|<|<=|>|>= <字母>::=a|b|…|X|Y|Z <数字>::=0|1|2|…|8|9 三:设计过程 1. 关键字:void,main,if,then,break,int,Char,float,include,for,while,printfscanf 并为小。 2."+”;”-”;”*”;”/”;”:=“;”:”;”<“;”<=“;”>“;”>=“;”<>“;”=“;”(“;”)”;”;”;”#”为运算符。 3. 其他标记 如字符串,表示以字母开头的标识符。 4. 空格符跳过。 5. 各符号对应种别码 关键字分别对应1-13 运算符分别对应401-418,501-513。 字符串对应100 常量对应200 结束符# 四:举例说明 目标:实现对常量的判别 代码: digit [0-9] letter [A-Za-z] other_char [!-@\[-~] id ({letter}|[_])({letter}|{digit}|[_])* string {({letter}|{digit}|{other_char})+} int_num {digit}+ %% [ |\t|\n]+ "auto"|"double"|"int"|"struct"|"break"|"else"|"long"|"switch"|"case"|"enum"|"register"|"typedef"|"char"|"extern"|"return"|"union"|"const"|"float"|"short"|"unsigned"|"continue"|"for"|"signed"|"void"|"default"|"goto"|"sizeof"|"do"|"if"|"static"|"while"|"main" {Upper(yytext,yyleng);printf("%s,NULL\n",yytext);} \"([!-~])*\" {printf("CONST_string,%s\n",yytext);} -?{int_num}[.]{int_num}?([E][+|-]?{int_num})? {printf("CONST_real,%s\n",yytext);} "0x"?{int_num} {printf("CONST_int,%s\n",yytext);} ","|";"|"("|")"|"{"|"}"|"["|"]"|"->"|"."|"!"|"~"|"++"|"--"|"*"|"&"|"sizeof"|"/"|"%"|"+"|"-"|">"|"<"|">="|"<="|"=="|"!="|"&"|"^"|"|"|"&"|"||"|"+="|"-="|"*="|"/="|"%="|">>="|"<<="|"&="|"^="|"|="|"=" {printf("%s,NULL\n",yytext);} {id} {printf("ID,%s\n",yytext);} {digit}({letter})+ {printf("error1:%s\n",yytext);} %% #include <ctype.h> Upper(char *s,int l) { int i; for(i=0;i<l;i++) { s[i]=toupper(s[i]); } } yywrap() { return 1; } 五:DFA 六:数据测试 七:心得体会 其实匹配并不困难,主要是C++知识要求相对较高,只要把握住指针就好了。 附源程序: #include<iostream.h> #include<stdio.h> #include<stdlib.h> #include<string.h> int i,j,k,flag,number,status; /*status which is use to judge the string is keywords or not!*/ char ch; char words[10] = {" "}; char program[500]; int Scan(char program[]) { char *keywords[13] = {"void","main","if","then","break","int", "char","float","include","for","while","printf", "scanf"}; number = 0; status = 0; j = 0; ch = program[i++]; /* To handle the lettle space ands tab*/ /*handle letters*/ if ((ch >= 'a') && (ch <= 'z' )) { while ((ch >= 'a') && (ch <= 'z' )) { words[j++]=ch; ch=program[i++]; } i--; words[j++] = '\0'; for (k = 0; k < 13; k++) if (strcmp (words,keywords[k]) == 0) switch(k) { case 0:{ flag = 1; status = 1; break; } case 1:{ flag = 2; status = 1; break; } case 2:{ flag = 3; status = 1; break; } case 3:{ flag = 4; status = 1; break; } case 4:{ flag = 5; status = 1; break; } case 5:{ flag = 6; status = 1; break; } case 6:{ flag = 7; status = 1; break; } case 7:{ flag = 8; status = 1; break; } case 8:{ flag = 9; status = 1; break; } case 9:{ flag = 10; status = 1; break; } case 10:{ flag = 11; status = 1; break; } case 11:{ flag = 12; status = 1; break; } case 12:{ flag = 13; status = 1; break; } } if (status == 0) { flag = 100; } } /*handle digits*/ else if ((ch >= '0') && (ch <= '9')) { number = 0; while ((ch >= '0' ) && (ch <= '9' )) { number = number*10+(ch-'0'); ch = program[i++]; } flag = 200; i--; } /*opereation and edge handle*/ else switch (ch) { case '=':{ if (ch == '=') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 401; } else { i--; flag = 402; } break; } case'>':{ if (ch == '>') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 403; } else { i--; flag = 404; } break; } case'<':{ if (ch == '<') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 405; } else { i--; flag = 406; } break; } case'!':{ if (ch == '!') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 407; } else { i--; flag = 408; } break; } case'+':{ if (ch == '+') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 409; } else if (ch == '+') { words[j++] = ch; words[j] = '\0'; flag = 410; } else { i--; flag = 411; } break; } case'-':{ if (ch == '-') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 412; } else if( ch == '-') { words[j++] = ch; words[j] = '\0'; flag = 413; } else { i--; flag = 414; } break; } case'*':{ if (ch == '*') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 415; } else { i--; flag = 416; } break; } case'/':{ if (ch == '/') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 417; } else { i--; flag = 418; } break; } case';':{ words[j] = ch; words[j+1] = '\0'; flag = 501; break; } case'(':{ words[j] = ch; words[j+1] = '\0'; flag = 502; break; } case')':{ words[j] = ch; words[j+1] = '\0'; flag = 503; break; } case'[':{ words[j] = ch; words[j+1] = '\0'; flag = 504; break; } case']':{ words[j] = ch; words[j+1] = '\0'; flag = 505; break; } case'{':{ words[j] = ch; words[j+1] = '\0'; flag = 506; break; } case'}':{ words[j] = ch; words[j+1] = '\0'; flag = 507; break; } case':':{ words[j] = ch; words[j+1] = '\0'; flag = 508; break; } case'"':{ words[j] = ch; words[j+1] = '\0'; flag = 509; break; } case'%':{ if (ch == '%') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 510; } else { i--; flag = 511; } break; } case',':{ words[j] = ch; words[j+1] = '\0'; flag = 512; break; } case'#':{ words[j] = ch; words[j+1] = '\0'; flag = 513; break; } case'@':{ words[j] = '#'; flag = 0; break; } default:{ flag = -1; break; } } return flag; } main() { i=0; printf("please input a program end with @"); do { ch = getchar(); program[i++] = ch; }while(ch != '@'); i = 0; do{ flag = Scan(program); if (flag == 200) { printf("(%2d,%4d)",flag,number); } else if (flag == -1) { printf("(%d,error)",flag); } else { printf("(%2d,%4s)",flag,words); } }while (flag != 0); system("pause"); }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值