贵州大学-编译原理实验1-词法分析器_贵州大学编译原理-CSDN博客

本文链接：https://blog.csdn.net/m0_54202648/article/details/130972617

贵州大学-编译原理实验1-词法分析器

考虑如下C语言子集

单词	类别编码	助记符	值
break	1	BREAK	_
char	2	CHAR	_
do	3	DO	_
double	4	DOUBLE	_
else	5	ELSE	_
if	6	IF	_
int	7	INT	_
return	8	RETURN	_
void	9	VOID	_
while	10	WHILE	_
标识符	11	ID	构成标识符的字符串
常数	12	NUM	数值
字符串	13	STRING	字符串
+	14	ADD	_
–	15	SUB	_
*	16	MUL	_
/	17	DIV	_
>	18	GT	_
>=	19	GE	_
<	20	LT	_
<=	21	LE	_
==	22	EQ	_
!=	23	NE	_
=	24	ASSIGN	_
{	25	LB	_
}	26	RB	_
(	27	LR	_
)	28	RR	_
,	29	COMMA	_
;	30	SEMI	_

单词的正则定义如下：

D = [0-9]

L = [a-zA-Z_]

H = [a-fA-F0-9]

E = [Ee][±]?{D}+

FS = (f|F|l|L)

IS = (u|U|l|L)*

(1) 标识符

id = {L}({L}|{D})*

(2) 常数

num =

0[xX]{H}+{IS}? |

0{D}+{IS}? |

{D}+{IS}? |

L?‘(\.|[^\’])+’ |

{D}+{E}{FS}? |

{D}*“.”{D}+({E})?{FS}? |

{D}+“.”{D}*({E})?{FS}?

(3) 字符串

string = L?“(\.|[^\”])*"

对给定的源程序进行词法分析，每个单词一行，以二元组的形式输出结果。

例如，下面的源程序代码

void main()

{

/* compute 1 + 2 + … + 100 */

double sum = 0.0;

double x = 1.0;

while (x <= 100) sum = sum + x;

printf(“sum = %f\n”, sum);

}

词法分析的结果为

(VOID, _)

(ID, “main”)

(LR, _)

(RR, _)

(LB, _)

(DOUBLE, _)

(ID, “sum”)

(ASSIGN, _)

(NUM, 0.0)

(SEMI, _)

(DOUBLE, _)

(ID, “x”)

(ASSIGN, _)

(NUM, 1.0)

(SEMI, _)

(WHILE, _)

(LR, _)

(ID, “x”)

(LE, _)

(NUM, 100)

(RR, _)

(ID, “sum”)

(ASSIGN, _)

(ID, “sum”)

(ADD, _)

(ID, “x”)

(SEMI, _)

(ID, “printf”)

(LR, _)

(STRING, “sum = %f\n”)

(COMMA, _)

(ID, “sum”)

(RR, _)

(SEMI, _)

(RB, _)

词法分析一般使用 lex 实现，手写代码会比较复杂；

windows 平台一般使用 flex ;lex 和flex 基本上差不多。

【Lex制作词法分析器——实验】 https://www.bilibili.com/video/BV1K84y1Y7G9/?share_source=copy_web&vd_source=a80491be26f09f5f59bd172bab9bc237

参考这个视频，挺简单的

可以自己学一下 lex 简单入门，花个10分钟应该差不多了，了解一下基本的结构

视频里的编译命令

lex C.lex
 testin.c
cc lex.yy.c -ll

%{
	#include <stdio.h>
	#include <stdlib.h>
	int count = 0;
%}
D		[0-9]
L		[a-zA-Z_]
H		[a-fA-F0-9]
E 		[Ee][+-]?{D}+
FS 		(f|F|l|L)
IS 		(u|U|l|L)*
id 		{L}({L}|{D})*
delim		[ \t\n\r]
whitespace	{delim}+
num  		0[xX]{H}+{IS}?|0{D}+{IS}?|{D}+{IS}?|L?'(\\.|[^\\'])+'|{D}+{E}{FS}?|{D}*"."{D}+({E})?{FS}?|{D}+"."{D}*({E})?{FS}?
string 		L?\"(\\.|[^\\"])*\"
zhushi 		\/\*([^\*^\/]*|[\*^\/*]*|[^\**\/]*)*\*\/
other .
%%
<INITIAL>"break"					{ count++;printf("%d\t(BREAK,_)\n",count,yytext);}
<INITIAL>"char"					{ count++;printf("%d\t(CHAR,_)\n",count,yytext);}
<INITIAL>"do"					{ count++;printf("%d\t(DO,_)\n",count,yytext);}
<INITIAL>"double"					{ count++;printf("%d\t(DOUBLE,_)\n",count,yytext);}
<INITIAL>"else"					{ count++;printf("%d\t(ELSE,_)\n",count,yytext);}
<INITIAL>"if"					{ count++;printf("%d\t(IF,_)\n",count,yytext);}
<INITIAL>"int"					{ count++;printf("%d\t(INT,_)\n",count,yytext);}
<INITIAL>"return"					{ count++;printf("%d\t(RETURN,_)\n",count,yytext);}
<INITIAL>"void"					{ count++;printf("%d\t(VOID,_)\n",count,yytext);}
<INITIAL>"while"					{ count++;printf("%d\t(WHILE,_)\n",count,yytext);}
<INITIAL>">="					{ count++;printf("%d\t(GE,_)\n",count,yytext);}
<INITIAL>"<="					{ count++;printf("%d\t(LE,_)\n",count,yytext);}
<INITIAL>"=="					{ count++;printf("%d\t(EQ,_)\n",count,yytext);}
<INITIAL>"!="					{ count++;printf("%d\t(NE,_)\n",count,yytext);}


{id}    {count++;printf("%d\t(ID,\"%s\")\n",count,yytext);}
{num}	{count++;printf("%d\t(NUM,\"%s\")\n",count,yytext);}
{whitespace}    {/*empty*/}
{string} {count++;printf("%d\t(string,%s)\n",count,yytext);}
{zhushi}  {/*empty*/}
"(" {count++;printf("%d\t(LR,_)\n",count,yytext);}
")" {count++;printf("%d\t(RR,_)\n",count,yytext);}
"{" {count++;printf("%d\t(LB,_)\n",count,yytext);}
"}" {count++;printf("%d\t(RB,_)\n",count,yytext);}
"," {count++;printf("%d\t(COMMA,_)\n",count,yytext);}
";" {count++;printf("%d\t(SEMI,_)\n",count,yytext);}
"=" {count++;printf("%d\t(ASSIGN,_)\n",count,yytext);}
"+" {count++;printf("%d\t(ADD,_)\n",count,yytext);}
"<" {count++;printf("%d\t(LT,_)\n",count,yytext);}
">" {count++;printf("%d\t(GT,_)\n",count,yytext);}
"-" {count++;printf("%d\t(SUB,_)\n",count,yytext);}`
"/" {count++;printf("%d\t(DIV,_)\n",count,yytext);}
"*" {count++;printf("%d\t(MUL,_)\n",count,yytext);}
{other} {count++;printf("%d\t(wrong)\n",count,yytext);}
%%
int main(){
	yyin=fopen("F:/UnxUtils/test.c","r");
	yylex();
	return 0;
}
 int yywrap(){
 	return 1;
 }