贵州大学-编译原理实验2-句法分析器
考虑下面的C语言子集的文法,其中<>括起来的为非终结符,粗体为终结符。
® <statement_list>
<statement_list> ®
| <statement_list>
® <compound_statement>
| <expression_statement>
| <selection_statement>
| <iteration_statement>
<compound_statement> ® { }
| { <statement_list> }
<expression_statement> ® ;
| ;
® <assignment_expression>
| , <assignment_expression>
<assignment_expression> ® <equality_expression>
| ID = <assignment_expression>
<equality_expression> ® <relational_expression>
| <equality_expression> == <relational_expression>
| <equality_expression> != <relational_expression>
<relational_expression> ® <additive_expression>
| <relational_expression> < <additive_expression>
| <relational_expression> > <additive_expression>
| <relational_expression> <= <additive_expression>
| <relational_expression> >= <additive_expression>
<additive_expression> ® <multiplicative_expression>
| <additive_expression> + <multiplicative_expression>
| <additive_expression> - <multiplicative_expression>
<multiplicative_expression> ® <primary_expression>
| <multiplicative_expression> ***** <primary_expression>
| <multiplicative_expression> / <primary_expression>
<primary_expression> ® ID | NUM | STRING | ( )
<selection_statement> ® IF ( )
<iteration_statement> ® WHILE ( )
| DO WHILE ( ) ;
要求:对给定的C语言程序进行句法分析,输出得到的分析树。
例如,下面的源程序代码
sum = 0.0;
x = 1.0;
while (x <= 100) sum = sum + x;
句法分析结果为
|-- <statement_list>
|-- <statement_list>
| |-- <statement_list>
| | |--
| | |-- <expression_statement>
| | |--
| | | |-- <assignment_expression>
| | | |-- ID
| | | |-- =
| | | |-- <assignment_expression>
| | | |-- <equality_expression>
| | | |-- <relational_expression>
| | | |-- <additive_expression>
| | | |-- <multiplicative_expression>
| | | |-- <primary_expression>
| | | |-- NUM
| | |-- ;
| |--
| |-- <expression_statement>
| |--
| | |-- <assignment_expression>
| | |-- ID
| | |-- =
| | |-- <assignment_expression>
| | |-- <equality_expression>
| | |-- <relational_expression>
| | |-- <additive_expression>
| | |-- <multiplicative_expression>
| | |-- <primary_expression>
| | |-- NUM
| |-- ;
|--
|-- <iteration_statement>
|-- WHILE
|-- (
|--
| |-- <assignment_expression>
| |-- <equality_expression>
| |-- <relational_expression>
| |-- <relational_expression>
| | |-- <additive_expression>
| | |-- <multiplicative_expression>
| | |-- <primary_expression>
| | |-- ID
| |-- <=
| |-- <additive_expression>
| |-- <multiplicative_expression>
| |-- <primary_expression>
| |-- NUM
|-- )
|--
|-- <expression_statement>
|--
| |-- <assignment_expression>
| |-- ID
| |-- =
| |-- <assignment_expression>
| |-- <equality_expression>
| |-- <relational_expression>
| |-- <additive_expression>
| |-- <additive_expression>
| | |-- <multiplicative_expression>
| | |-- <primary_expression>
| | |-- ID
| |-- +
| |-- <multiplicative_expression>
| |-- <primary_expression>
| |-- ID
|-- ;
讲道理,编译原理以后应该变成选修课了,不会还有人选这个课吧
qj 检查实验问的是真细 啊。(我错了,我们秦老师还是挺听劝的,爱了爱了)行,废话不多说,进入正题。
这是编译器编译的基本步骤,本次实验的目标是要构建出一个语法分析树,
所以也需要用到词法分析的内容。
词法分析产生的记号流 ----> 语法分析
当然语法分析可使用的工具挺多,比如最早出现的yacc ,现在很流行的antlr 等。
不像词法分析器,用了lex 之后几乎都不怎么需要写代码,尽管我们使用了自动生成语法的工具,仍然需要写一定代码。 所以现在很多语言其实都是自己写代码来实现语法分析,我也尝试过,写LL(1) ,恶心死我了,还是用工具吧。
首先要安装我们需要的工具
链接:https://pan.baidu.com/s/1-M-Mqgg9HezGnhHRZzu6pQ?pwd=cpa1
提取码:cpa1
下载后解压
大家应该知道啥是环境变量吧,要在命令行 cmd 中运行这些程序,我们需要将其路径添加到Path 中。
配置好后
flex -V //注意是大写的V
bison -V //同大写的V
gcc -v //注意是小写的v
看这几个是否有正确输出
接着大家自学一下基本的yacc 和flex 语法吧
https://blog.csdn.net/weixin_44007632/article/details/108666375
可以参考这个文档,讲的非常详细。
我来稍微说一下思路吧
其中的yylex 函数就是词法分析器产生的,下面有个 pro.l 文件,编译的时候会联合编译。
如果没有 用lex,你自己写了yylex 函数也是可以滴,网上有点参考应该挺多
之后一部分就是文法定义了,
yacc 是自底向上的,所以当匹配到
命名为 pro.y
%{
#include <stdio.h>
#include <stdlib.h> //malloc函数
#include <string.h>
#include <stdbool.h> // 引入 布尔类型
int yylex(void);
void yyerror(char *);
extern FILE * yyin; //声明yyin,yyout 变量,定义在后面
extern FILE * yyout;
//多叉树定义
struct TNode{
char *label;
struct TNode *bro;
struct TNode *son;
};
typedef struct TNode *Tree;
Tree CreateTree(char *label) //创建一个label为传入参数的树节点
{
Tree T = (Tree)malloc(sizeof(struct TNode));
if (T != NULL) {
T->bro = NULL;
T->son = NULL;
T->label = malloc(strlen(label) + 1); // +1 用于存储字符串结尾的'\0'
if (T->label != NULL) {
strcpy(T->label, label);
}
}
return T;
}
void addSon(Tree father,Tree son) //前向插入son链表 先来的是右儿子
{
son->bro = father->son;
father->son = son;
return;
}
//栈定义
struct SNode{
Tree *Data;
int Top;
int MaxSize;
};
typedef struct SNode *Stack;
Stack CreateStack( int MaxSize ) //创建一个长度为MaxSize的栈
{
Stack S = (Stack)malloc(sizeof(struct SNode));
S->Data = (Tree *)malloc(MaxSize * sizeof(Tree));
S->Top = -1;
S->MaxSize = MaxSize;
return S;
}
bool IsFull( Stack S )
{
return (S->Top == S->MaxSize-1);
}
bool push( Stack S, Tree X )
{
if ( IsFull(S) ) {
printf("堆栈满");
return false;
}
else {
S->Data[++(S->Top)] = X;
return true;
}
}
bool IsEmpty( Stack S )
{
return (S->Top == -1);
}
Tree pop( Stack S )
{
if ( IsEmpty(S) ) {
printf("堆栈空");
return NULL;
}
else
return ( S->Data[(S->Top)--] );
}
char indent[100];
void addIndent(int m)
{
if(m == 3){ //没有兄弟节点的情况
strcat(indent," ");
}
else{ //有兄弟节点的情况
strcat(indent,"| ");
}
}
void subIndent(int m)
{
int l = strlen(indent);
if(m == 3){
l -= 3;
}
else{
l -= 4;
}
indent[l] = '\0';
}
void disp(Tree T)
{
if(strcmp(T->label, "<program>") == 0){
printf("%s\n",T->label);
addIndent(3);
disp(T->son);
}
do{
printf("%s|-- %s\n",indent,T->label);
if(T->son != NULL){
if(T->bro != NULL)
addIndent(4);
else
addIndent(3);
disp(T->son);
if(T->bro != NULL)
subIndent(4);
else
subIndent(3);
}
T = T->bro;
}while(T != NULL);
}
Stack S = NULL;
Tree current = NULL;
%}
%token BREAK CHAR DO DOUBLE ELSE IF INT RETURN VOID WHILE ID NUM STRING ADD SUB MUL DIV GT GE LT LE EQ NE ASSIGN LB RB LR RR COMMA SEMI ERROR
%%
program : statement_list {
current = CreateTree("<program>");
addSon(current,pop(S));
push(S,current);
}
;
statement_list : statement {
current = CreateTree("<statement_list>");
addSon(current,pop(S));
push(S,current);
}
| statement_list statement {
current = CreateTree("<statement_list>");
addSon(current,pop(S));
addSon(current,pop(S));
push(S,current);
}
;
statement : compound_statement {
current = CreateTree("<statement>");
addSon(current,pop(S));
push(S,current);
}
| expression_statement {
current = CreateTree("<statement>");
addSon(current,pop(S));
push(S,current);
}
| selection_statement {
current = CreateTree("<statement>");
addSon(current,pop(S));
push(S,current);
}
| iteration_statement {
current = CreateTree("<statement>");
addSon(current,pop(S));
push(S,current);
}
;
compound_statement : LB RB {
current = CreateTree("<compound_statement>");
addSon(current,CreateTree(")"));
addSon(current,CreateTree("("));
push(S,current);
}
| LB statement_list RB {
current = CreateTree("<compound_statement>");
addSon(current,CreateTree(")"));
addSon(current,pop(S));
addSon(current,CreateTree("("));
push(S,current);
}
;
expression_statement : SEMI {
current = CreateTree("<expression_statement>");
addSon(current,CreateTree(";"));
push(S,current);
}
| expression SEMI {
current = CreateTree("<expression_statement>");
addSon(current,CreateTree(";"));
addSon(current,pop(S));
push(S,current);
}
;
expression : assignment_expression {
current = CreateTree("<expression>");
addSon(current,pop(S));
push(S,current);
}
| expression COMMA assignment_expression {
current = CreateTree("<expression>");
addSon(current,pop(S));
addSon(current,CreateTree(","));
addSon(current,pop(S));
push(S,current);
}
;
assignment_expression : equality_expression {
current = CreateTree("<assignment_expression>");
addSon(current,pop(S));
push(S,current);
}
| ID ASSIGN assignment_expression {
current = CreateTree("<assignment_expression>");
addSon(current,pop(S));
addSon(current,CreateTree("="));
addSon(current,CreateTree("ID"));
push(S,current);
}
;
equality_expression : relational_expression {
current = CreateTree("<equality_expression>");
addSon(current,pop(S));
push(S,current);
}
| equality_expression EQ relational_expression {
current = CreateTree("<equality_expression>");
addSon(current,pop(S));
addSon(current,CreateTree("=="));
addSon(current,pop(S));
push(S,current);
}
| equality_expression NE relational_expression {
current = CreateTree("<equality_expression>");
addSon(current,pop(S));
addSon(current,CreateTree("!="));
addSon(current,pop(S));
push(S,current);
}
;
relational_expression : additive_expression {
current = CreateTree("<relational_expression>");
addSon(current,pop(S));
push(S,current);
}
| relational_expression LT additive_expression {
current = CreateTree("<relational_expression>");
addSon(current,pop(S));
addSon(current,CreateTree("<"));
addSon(current,pop(S));
push(S,current);
}
| relational_expression GT additive_expression {
current = CreateTree("<relational_expression>");
addSon(current,pop(S));
addSon(current,CreateTree(">"));
addSon(current,pop(S));
push(S,current);
}
| relational_expression LE additive_expression {
current = CreateTree("<relational_expression>");
addSon(current,pop(S));
addSon(current,CreateTree("<="));
addSon(current,pop(S));
push(S,current);
}
| relational_expression GE additive_expression {
current = CreateTree("<relational_expression>");
addSon(current,pop(S));
addSon(current,CreateTree(">="));
addSon(current,pop(S));
push(S,current);
}
;
additive_expression : multiplicative_expression {
current = CreateTree("<additive_expression>");
addSon(current,pop(S));
push(S,current);
}
| additive_expression ADD multiplicative_expression {
current = CreateTree("<additive_expression>");
addSon(current,pop(S));
addSon(current,CreateTree("+"));
addSon(current,pop(S));
push(S,current);
}
| additive_expression SUB multiplicative_expression {
current = CreateTree("<additive_expression>");
addSon(current,pop(S));
addSon(current,CreateTree("-"));
addSon(current,pop(S));
push(S,current);
}
;
multiplicative_expression : primary_expression {
current = CreateTree("<multiplicative_expression>");
addSon(current,pop(S));
push(S,current);
}
| multiplicative_expression MUL primary_expression {
current = CreateTree("<multiplicative_expression>");
addSon(current,pop(S));
addSon(current,CreateTree("*"));
addSon(current,pop(S));
push(S,current);
}
| multiplicative_expression DIV primary_expression {
current = CreateTree("<multiplicative_expression>");
addSon(current,pop(S));
addSon(current,CreateTree("/"));
addSon(current,pop(S));
push(S,current);
}
;
primary_expression : ID {
current = CreateTree("<primary_expression>");
addSon(current,CreateTree("ID"));
push(S,current);
}
| NUM {
current = CreateTree("<primary_expression>");
addSon(current,CreateTree("NUM"));
push(S,current);
}
| STRING {
current = CreateTree("<primary_expression>");
addSon(current,CreateTree("STRING"));
push(S,current);
}
| LR expression RR {
current = CreateTree("<primary_expression>");
addSon(current,CreateTree(")"));
addSon(current,pop(S));
addSon(current,CreateTree("("));
push(S,current);
}
;
selection_statement : IF LR expression RR statement {
current = CreateTree("<selection_statement>");
addSon(current,pop(S));
addSon(current,CreateTree(")"));
addSon(current,pop(S));
addSon(current,CreateTree("("));
addSon(current,CreateTree("IF"));
push(S,current);
}
;
iteration_statement : WHILE LR expression RR statement {
current = CreateTree("<iteration_statement>");
addSon(current,pop(S));
addSon(current,CreateTree(")"));
addSon(current,pop(S));
addSon(current,CreateTree("("));
addSon(current,CreateTree("WHILE"));
push(S,current);
}
| DO statement WHILE LR expression RR SEMI {
addSon(current,CreateTree(";"));
addSon(current,CreateTree(")"));
addSon(current,pop(S));
addSon(current,CreateTree("("));
addSon(current,CreateTree("WHILE"));
addSon(current,pop(S));
addSon(current,CreateTree("DO"));
push(S,current);
}
;
%%
void yyerror(char *str){
fprintf(stderr,"error:%s\n",str);
}
int yywrap(){
return 1;
}
int main(int argc, char **argv)
{
if (argc>1)
{
FILE *file;
file=fopen(argv[1],"r");
if(file)
yyin=file;
}
if (argc>2)
{
FILE *file;
file=fopen(argv[2],"w");
if(file)
{
yyout=file;
}
}
S = CreateStack(65535);
yyparse();
current = pop(S);
disp(current);
return 0;
}
命名为 pro.l
%{
#include<stdio.h>
#include "y.tab.h"
void yyerror(char *);
void yywrap();
%}
DIGIT [0-9]
LETTER [a-zA-Z_]
HEX [a-fA-F0-9]
EX [Ee][+-]?{DIGIT}+
FS (f|F|l|L)
IS (u|U|l|L)*
XID {LETTER}({LETTER}|{DIGIT})*
XNUM 0[xX]{HEX}+{IS}?|0{DIGIT}+{IS}?|{DIGIT}+{IS}?|L?'(\\.|[^\\'])+'|{DIGIT}+{EX}{FS}?|{DIGIT}*"."{DIGIT}+({EX})?{FS}?|{DIGIT}+"."{DIGIT}*({EX})?{FS}?
XSTRING L?\"(\\.|[^\\"])*\"
COMMENT "/*"([^\*]|(\*)*[^\*/])*(\*)*"*/"
%%
{COMMENT} {}
break {return BREAK;}
char {return CHAR;}
do {return DO;}
double {return DOUBLE;}
else {return ELSE;}
if {return IF;}
int {return INT;}
return {return RETURN;}
void {return VOID;}
while {return WHILE;}
"+" {return ADD;}
"–" {return SUB;}
"*" {return MUL;}
"/" {return DIV;}
">" {return GT;}
">=" {return GE;}
"<" {return LT;}
"<=" {return LE;}
"==" {return EQ;}
"!=" {return NE;}
"=" {return ASSIGN;}
"{" {return LB;}
"}" {return RB;}
"(" {return LR;}
")" {return RR;}
"," {return COMMA;}
";" {return SEMI;}
{XID} {return ID;}
{XNUM} {return NUM;}
{XSTRING} {return STRING;}
\n {}
[ \t]+ {}
. {return ERROR;}
%%
每次编译这两玩意,非常麻烦
可以简单写个脚本
命名为 test.bat
@echo off
set str=%1
@echo on
bison --yacc -dv %str%.y
flex %str%.l
gcc -o %str% y.tab.c lex.yy.c
在cmd 中打开,
输入test.bat pro 就会自动帮你编译
最后我们执行 .exe 文件就行了,就得到了分析树
开头第一句报错是因为老师给的文法没有 else 。没得事的