1、编写生成词法分析程序的文件lex.l。 由平台自动使用Flex对lex.l进行编译,生成词法分析程序的源程文件lex.yy.c,并使用GCC进行编译得到词法分析程序的可执行文件 以二元组的方式显示源程序中的单词,遇到错误字符,显示错误信息。
编程要求
根据提示,在右侧编辑器补充代码完成lex.l文件。 待分析的单词及其种别码约定如下: 1 int
2 while
3 if
4 else
5 return
50 标识符
51 整常数
60 +
61 -
62 *
63 /
64 =
65 <
66 <=
67 >
68 >=
69 ==
70 !=
71 {
72 }
73 (
74 )
75 ,
76 ;
%option yylineno
/**************以下为定义段******************/
%{
#include "stdio.h"
#include "string.h"
%}
id [A-Za-z][A-Za-z0-9]*
intconst [0-9]+
/**************下面为需要填写的规则段******************/
%%
"int" { printf("(1,-) ");}
"while" { printf("(2,-) ",yytext);}
"if" { printf("(3,-) ",yytext);}
"else" { printf("(4,-) ",yytext);}
"return" { printf("(5,-) ",yytext);}
[a-zA-Z]+ { printf("(50,\"%s\") ",yytext);}
[0-9]+ { printf("(51,%s) ",yytext);}
"+" { printf("(60,-) ",yytext);}
"-" { printf("(61,-) ",yytext);}
"*" { printf("(62,-) ",yytext);}
"/" { printf("(63,-) ",yytext);}
"=" { printf("(64,-) ",yytext);}
"<" { printf("(65,-) ",yytext);}
"<=" { printf("(66,-) ",yytext);}
">" { printf("(67,-) ",yytext);}
">=" { printf("(68,-) ",yytext);}
"==" { printf("(69,-) ",yytext);}
"!=" { printf("(70,-) ",yytext);}
"{" {{ printf("(71,-) ",yytext);}}
"}" {{ printf("(72,-) ",yytext);}}
"(" {{ printf("(73,-) ",yytext);}}
")" {{ printf("(74,-) ",yytext);}}
"," {{ printf("(75,-) ",yytext);}}
";" {{ printf("(76,-) ",yytext);}}
[ \r\t\n] {}
. {printf("\n<error character \'%s\' in line %d> ",yytext,yylineno);}
%%
/**************以下为用户子程序段******************/
int main(int argc,char *argv[])
{
yylineno=1; //行序号变量,每次遇到\n后会自动加1
yyin=fopen(argv[1],"r");
if (!yyin) return 0;
while (yylex());
return 0;
}
int yywrap() { return 1;}
参考《lex与yacc》第二版
简单理解:遇到相应的词汇做相应动作
2、用C/C++编写生成词法分析程序。 以二元组的方式显示源程序中的单词,遇到错误字符,显示错误信息。
#include "stdio.h"
#include "stdlib.h"
#include "ctype.h"
#include "string.h"
#define MAXWORD 50 //能够分析出的最长单词
#define MAXFILEWORD 500 //用于指定存放文件内容数组的最大容量
char Keyword[][7] = { "int","while","if","else","return"};
int reserve(char str[])
{
for (int i = 0; i<15; i++)
{
if (strcmp(str, Keyword[i]) == 0)
{
return (i + 1); //关键字的种别码为i+1 的值
}
}
return -1; //不是关键字
}
void getsym(FILE *fp)
{
char ch[MAXFILEWORD];
int n = 0;
while ((ch[n++] = fgetc(fp)) != EOF)
;
ch[--n] = '\0'; //如果是n的话会把EOF读进来
int line = 1; //记录错误行号
int p = 0;
while (ch[p] != '\0')
{
if (ch[p] == '\n')
line++;
while ((ch[p] == ' ') || (ch[p] == '\n') || (ch[p] == '\t'))
p++;
int i = 0;
char str[MAXWORD] = {};
int addr = 0;
if (isalpha(ch[p]))
{
do {
str[i++] = ch[p++];
} while (isalpha(ch[p]) || isdigit(ch[p]));
str[i] = '\0';
addr = reserve(str);
if (addr == -1){
//非标识符
printf("(50,\"%s\") ",str);
}else{
printf("(%d,-) ", addr);
}
}
else if (isdigit(ch[p]))
{
do {
str[i++] = ch[p++];
} while (isdigit(ch[p])); //出循环时,ch[p]已不是数字
str[i] = '\0';
addr = 51 ;
printf("(%d,%s) ", addr ,str); //纯数字
}
else if (ch[p] == '<')
{
if (ch[p + 1] == '=') {
str[0] = ch[p];
str[1] = ch[p + 1];
str[2] = '\0';
addr = 66;
printf("(%d,-) ", addr);
p = p + 2;
}
else if (ch[p + 1] == '>')
{
str[0] = ch[p];
str[1] = ch[p + 1];
str[2] = '\0';
addr = 6;
printf("(%d,-) ", addr);
p = p + 2;
}
else
{
str[0] = ch[p];
str[1] = '\0';
addr = 65;
printf("(%d,-) ", addr);
p++;
}
}
else if (ch[p] == '>')
{
if (ch[p + 1] == '=') {
str[0] = ch[p];
str[1] = ch[p + 1];
str[2] = '\0';
addr = 68;
printf("(%d,-) ", addr);
p = p + 2;
}
else
{
str[0] = ch[p];
str[1] = '\0';
addr = 67;
printf("(%d,-) ", addr);
p++;
}
}
else if (ch[p] == '+')
{
addr = 60; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == '-')
{
addr = 61; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == '*')
{
addr = 62; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == '/')
{
addr = 63; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == '=')
{
if (ch[p + 1] == '=') {
str[0] = ch[p];
str[1] = ch[p + 1];
str[2] = '\0';
addr = 69;
printf("(%d,-) ", addr);
p = p + 2;
}
else
{
addr = 64; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
}
else if (ch[p] == '(')
{
addr = 73; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == ')')
{
addr = 74; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == '{')
{
addr = 71; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == '}')
{
addr = 72; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == ',')
{
addr = 75; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == ';')
{
addr = 76; str[0] = ch[p]; str[1] = '\0';
printf("(%d,-) ", addr);
p++;
}
else if (ch[p] == '!')
{
if (ch[p + 1] == '=') {
str[0] = ch[p];
str[1] = ch[p + 1];
str[2] = '\0';
addr = 70;
printf("(%d,-) ", addr);
p = p + 2;
}
}
}
}
int main(int argc,char *argv[])
{
FILE *fp;
fp=fopen(argv[1],"r"); //如果用C++实现,自行修改文件打开方式
if (!fp) {printf("Fail Open\n");return 0;}
/***********在下面添加程序,根据文件指针读取测试文件中进行词法分析*********/
//循环读取文件的每一行数据
getsym(fp);
//操作结束后关闭文件
fclose(fp);
return 0;
/*********************************************************************/
return 1;
}
说明:按照一个个读的顺序,每次遇到空格或者换行就跳过,用一个str暂存字符串,按照规则匹配。if else 用有穷自动机画个图逻辑就非常清楚了。以上代码其实比较冗余。
还有一种简化思路:代码解耦,将扫描和操作部分分开成两个函数。扫描函数将把全文分成一个个单词,对每个单词进行匹配。 操作部分会用if else 匹配并给出相应的address(地址码)来确定是哪种类型的单词。筛错机制不变,利用行号和第几个char来定位。