基于前面的实验,编写一个程序对使用 C–语言书写的源代码进行语法分析,并打印语法树,实验使用的主要分析工具是Bison,使用C语言完成程序。
- 基本要求
a. 对程序进行语法分析,输出语法分析结果;
b. 能够识别多个位置的语法错误。 - 附加要求
a. 按照正确的缩进格式输出带有行号信息的语法树;
b. 能够处理简单的语法错误;
#ifndef TREE_H
#define TREE_H
#include <string.h>
(1)为了输出语法分析树,我们需要识别每个结点的类型,因此定义了结点类型为 int,并且规定了4种节点类型:
typedef int NODE_TYPE; // 结点类型
// 非终结符类型
// 终结符类型
#define INT_TYPE 1 // int 类型
#define FLOAT_TYPE 2 // float 类型
#define STRING_TYPE 3 // 可打印类型
struct Node {
struct Node *child; // 儿子结点
struct Node *brother; // 兄弟节点
int lineNum; // 行号
char *name; // 结点名字
NODE_TYPE type; // 结点类型
union {
char *id_name; // ID 名字
int intValue; // int 值
float floatValue; // float 值
struct Node *head; // 语法分析树的根结点
// 函数的声明们
struct Node *createNode(char *name, int line, NODE_TYPE type);
struct Node *insertNode(struct Node *node, char *name, int line, NODE_TYPE type);
void printNode(struct Node *node, FILE *f);
void printTree(struct Node* head, int depth, FILE *f);
#include <malloc.h>
#include <stdio.h>
#include "tree.h"
* 创建一个新的结点并返回
* @param name 结点的名字
* @param line 结点所在的行数
* @param type 结点的类型
* @return 新结点的指针
struct Node *createNode(char *name, int line, NODE_TYPE type) {
struct Node *pNode = (struct Node *) malloc(sizeof(struct Node));
pNode->brother = NULL; // 新结点的兄弟为空
pNode->child = NULL; // 新结点的子女为空
pNode->lineNum = line; // 记录行号,之后输出有用
pNode->type = type; // 记录结点类型,根据结点类型来输出
pNode->name = strdup(name); // 使用字符串拷贝赋予新结点的结点名
pNode->intValue = 1; // 将 int 值默认设为 1
return pNode; // 返回 pNode
* 向语法分析树中插入
* @param node 底层结点(孩子结点)
* @param name 父结点的名字
* @param line 父结点所在的行数
* @param type 父结点的类型
* @return 父结点的指针
struct Node *insertNode(struct Node *node, char *name, int line, NODE_TYPE type) {
struct Node *father = (struct Node *) malloc(sizeof(struct Node));
father->child = node; // 给输入结点一个爹
father->brother = NULL; // 父亲结点的兄弟为空
father->lineNum = line; // 记录行号,之后输出有用
father->type = type; // 记录结点类型,根据结点类型来输出
father->name = strdup(name); // 使用字符串拷贝赋予新结点的结点名
father->intValue = 1; // 将 int 值默认设为 1
head = father; // 将 head 置为 father
//printf("%s %d\n",name,line);
// if (node)
// fprintf(stdout, "%s -> %s line : %d\n", father -> name, node -> name, line);
return father; // 返回 father
* 根据结点的类型进行打印
* @param node 结点指针
* @param f 输出位置
void printNode(struct Node *node, FILE *f) {
if (node->type == STRING_TYPE)
fprintf(f, "%s : %s\n", node->name, node->id_name); // string 类型的结点输出结点名和结点内容
else if (node->type == INT_TYPE)
fprintf(f, "INT : %d\n", node->intValue); // int 类型的结点输出 INT 和结点值
else if (node->type == FLOAT_TYPE)
fprintf(f, "FLOAT : %f\n", node->floatValue); // float 类型的结点输出 FLOAT 和结点值
fprintf(f, "%s (%d)\n", node->name, node->lineNum); // 非终结符输出结点名字和行号
* 递归法遍历打印语法树
* @param head 语法树的根结点
* @param depth 语法树的深度
* @param f 输出位置
void printTree(struct Node *head, int depth, FILE *f) {
if (head == NULL) return; // 遇到空结点,函数结束
for (int i = 0; i < depth; ++i)
fprintf(f, "\t"); // 打印语法树所需的空白(制表符)
printNode(head, f);
printTree(head->child, depth + 1, f); // 考虑该结点的孩子,深度加一,进入下一层递归
printTree((head->brother), depth, f); // 考虑该结点的兄弟,深度不变,进入下一层递归
extern int errors;
/* ... */
. {
printf("Error type A at Line %d: Mysterious charaters \'%s\'.\n", yylineno, yytext);
// 默认制表符的长度
#define TABLEN 4
/* ... */
{TAB} { yycolumn += TABLEN; } /* 默认制表符长度为 4 */
(3)为了更好的记录编译器读取的位置信息,重定义了宏 YY_USER_ACTION。虽然此次实验并没有用到,但是相信以后的实验或许会使用。
// 初始化列数
int yycolumn = 1;
// 每次定义用户操作之前执行的代码
#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno; \
yylloc.first_column = yycolumn; \
yylloc.last_column = yycolumn + yyleng - 1; \
yycolumn += yyleng;
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
extern int yylineno;
void yyerror(char*);
void myerror(char *msg);
int yylex();
int errors = 0; // 记录找出的语法错误次数
#define YYSTYPE struct Node* // 将所有的 token 类型设置为 Node*
// 定义结合性和优先级次序
%left OR
%left AND
%nonassoc RELOP
%left STAR DIV
%right DOT LP LB RP RB
%nonassoc ELSE
#include "lex.yy.c"
int main(int argc, char **argv) {
if (argc <= 1) return 1;
FILE *f = fopen(argv[1], "r");
if (!f) {
return 1;
yylineno = 1;
// 输出语法树
f = fopen("output.txt", "w");
if (!f) {
return 1;
if (errors == 0) {
f = fopen("output.txt", "w");
printTree(head, 0, f);
return 0;
>>> flex lizi.l
>>> bison -d syntax.y
>>> gcc syntax.tab.c -lfl -ly -o parser tree.c
得到可执行程序 parser,使用 parser 读取文件进行编译:
>>> ./parser text.cmm
%{ #include "syntax.tab.h" #include "tree.h" extern int errors; YYLTYPE yylloc; // 初始化列数 int yycolumn = 1; // 默认制表符的长度 #define TABLEN 4 // 每次定义用户操作之前执行的代码 #define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno; \ yylloc.first_column = yycolumn; \ yylloc.last_column = yycolumn + yyleng - 1; \ yycolumn += yyleng; %} %option yylineno digit [0-9] letter [a-zA-Z] unsignedint [1-9]{digit}* /* int 类型匹配 */ INT10 [+-]?(0|{unsignedint}) INT8 [+-]?0(0|([1-7][0-7]*)) INT16 [+-]?(0(x|X))(0|([1-9A-Fa-f][0-9A-Fa-f]*)) INT {INT10}|{INT8} /* float 类型匹配 */ FLOAT1 [+-]?({digit}+)?\.{digit}+? FLOAT2 [+-]?({digit}+)[eE][+-]?({digit})+ FLOAT3 [+-]?({digit}+)?\.({digit}+)?([eE][+-]?{digit}+) FLOAT {FLOAT1}|{FLOAT2}|{FLOAT3} /* 其余终结符的匹配 */ ID ({letter}|_)({letter}|_|{digit})* SEMI ; COMMA , ASSIGNOP = RELOP (>|<|>=|<=|==|!=) PLUS \+ MINUS \- STAR \* DIV \/ AND && OR \|\| DOT \. NOT ! TYPE (int|float) LP \( RP \) LB \[ RB \] LC \{ RC \} STRUCT struct IF if ELSE else RETURN return WHILE while LF \n OTHER [\r] TAB [\t] SPACE [ ] %% /*-----------------------------------------| | 跳过单行注释 | |-----------------------------------------*/ "//" { char c; while ((c = input()) != '\n'); } /*-----------------------------------------| | 跳过多行注释 | |-----------------------------------------*/ "/*" { char c; while ((c = input()) != EOF) { if (c == '*') { c = input(); if (c == '/'){ break; } } } if (c == EOF) printf("Error type B at Line %d: LACK OF */.\n", yylineno); } /*-----------------------------------------| | 终结符的匹配及对应的其操作 | |-----------------------------------------*/ {DOT} { struct Node *pNode = createNode("DOT", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return DOT; } {TYPE} { struct Node *pNode = createNode("TYPE", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return TYPE; } {INT} { struct Node *pNode = createNode("INT", 0, INT_TYPE); yylval = pNode; pNode->intValue = atoi(yytext); return INT; } {FLOAT} { struct Node *pNode = createNode("FLOAT", 0, FLOAT_TYPE); yylval = pNode; pNode->floatValue = atof(yytext); return FLOAT; } {SEMI} { struct Node *pNode = createNode("SEMI", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return SEMI; } {COMMA} { struct Node *pNode = createNode("COMMA", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return COMMA; } {ASSIGNOP} { struct Node *pNode = createNode("ASSIGNOP", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return ASSIGNOP; } {RELOP} { struct Node *pNode = createNode("RELOP", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return RELOP; } {PLUS} { struct Node *pNode = createNode("PLUS", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return PLUS; } {MINUS} { struct Node *pNode = createNode("MINUS", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return MINUS; } {STAR} { struct Node *pNode = createNode("STAR", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return STAR; } {DIV} { struct Node *pNode = createNode("DIV", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return DIV; } {AND} { struct Node *pNode = createNode("AND", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return AND; } {OR} { struct Node *pNode = createNode("OR", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return OR; } {NOT} { struct Node *pNode = createNode("NOT", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return NOT; } {LP} { struct Node *pNode = createNode("LP", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return LP; } {RP} { struct Node *pNode = createNode("RP", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return RP; } {LB} { struct Node *pNode = createNode("LB", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return LB; } {RB} { struct Node *pNode = createNode("RB", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return RB; } {LC} { struct Node *pNode = createNode("LC", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return LC; } {RC} { struct Node *pNode = createNode("RC", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return RC; } {STRUCT} { struct Node *pNode = createNode("STRUCT", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return STRUCT; } {RETURN} { struct Node *pNode = createNode("RETURN", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return RETURN; } {IF} { struct Node *pNode = createNode("IF", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return IF; } {ELSE} { struct Node *pNode = createNode("ELSE", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return ELSE; } {WHILE} { struct Node *pNode = createNode("WHILE", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return WHILE; } {ID} { struct Node *pNode = createNode("ID", 0, STRING_TYPE); yylval = pNode; pNode->id_name = strdup(yytext); return ID; } {LF} { yycolumn = 1; } /* 换行后,更新列数 */ {OTHER} { } /* 其他字符匹配不给予操作 */ {TAB} { yycolumn += TABLEN; } /* 默认制表符长度为 4 */ {SPACE} { yycolumn += 1; } /* 遇到空格,长度为 1 */ . { errors++; printf("Error type A at Line %d: Mysterious charaters \'%s\'.\n", yylineno, yytext); } /* 其他不匹配的终结符,报错 */ %%
%{ #include <stdio.h> #include <stdlib.h> #include "tree.h" extern int yylineno; void yyerror(char*); void myerror(char *msg); int yylex(); int errors = 0; // 记录找出的语法错误次数 #define YYSTYPE struct Node* // 将所有的 token 类型设置为 Node* %} %token INT /* int 类型 */ %token FLOAT /* float 类型 */ %token TYPE /* TYPE 终结符 */ %token LF /* 换行符 \n */ %token ID /* 标识符 */ %token SEMI COMMA DOT /* 结束符号 ; , */ %token ASSIGNOP RELOP /* 比较赋值符号 = > < >= <= == != */ %token PLUS MINUS STAR DIV /* 运算符 + - * / */ %token AND OR NOT /* 判断符号 && || ! */ %token LP RP LB RB LC RC /* 括号 ( ) [ ] { } */ %token STRUCT /* struct */ %token RETURN /* return */ %token IF /* if */ %token ELSE /* else */ %token WHILE /* while */ // 定义结合性和优先级次序 %right ASSIGNOP %left OR %left AND %nonassoc RELOP %left PLUS MINUS %left STAR DIV %right NAGATE NOT %right DOT LP LB RP RB %nonassoc LOWER_THAN_ELSE %nonassoc ELSE %% /*-----------------------------------------| | High-level Definitions | |-----------------------------------------*/ Program : ExtDefList { $$ = insertNode($1, "Program", $1->lineNum, NONTERMINAL); } ; ExtDefList : ExtDef ExtDefList { $$ = insertNode($1, "ExtDefList", @1.first_line, NONTERMINAL); $1->brother = $2; } | { $$ = insertNode(NULL, "ExtDefList", yylineno, NONTERMINAL); } ; ExtDef : Specifier ExtDecList SEMI { $$ = insertNode($1, "ExtDef", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Specifier SEMI { $$ = insertNode($1, "ExtDef", @1.first_line, NONTERMINAL); $1->brother = $2; } | Specifier FunDec CompSt { $$ = insertNode($1, "ExtDef", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } ; ExtDecList : VarDec { $$ = insertNode($1, "ExtDecList", @1.first_line, NONTERMINAL); } | VarDec COMMA ExtDecList { $$ = insertNode($1, "ExtDecList", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } ; /*-----------------------------------------| | Specifiers | |-----------------------------------------*/ Specifier : TYPE { $$ = insertNode($1, "Specifier", @1.first_line, NONTERMINAL); } | StructSpecifier { $$ = insertNode($1, "Specifier", @1.first_line, NONTERMINAL); } ; StructSpecifier : STRUCT OptTag LC DefList RC { $$ = insertNode($1, "StructSpecifier", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; $3->brother = $4; $4->brother = $5; } | STRUCT Tag { $$ = insertNode($1, "StructSpecifier", @1.first_line, NONTERMINAL); $1->brother = $2; } ; OptTag : ID { $$ = insertNode($1, "OptTag", @1.first_line, NONTERMINAL); } | { $$ = insertNode(NULL, "OptTag", yylineno, NONTERMINAL); } ; Tag : ID { $$ = insertNode($1, "Tag", @1.first_line, NONTERMINAL); } ; /*-----------------------------------------| | Declarators | |-----------------------------------------*/ VarDec : ID { $$ = insertNode($1, "VarDec", @1.first_line, NONTERMINAL); } | VarDec LB INT RB { $$ = insertNode($1, "VarDec", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; $3->brother = $4; } ; FunDec : ID LP VarList RP { $$ = insertNode($1, "FunDec", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; $3->brother = $4; } | ID LP RP { $$ = insertNode($1, "FunDec", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } ; VarList : ParamDec COMMA VarList { $$ = insertNode($1, "VarList", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | ParamDec { $$ = insertNode($1, "VarList", @1.first_line, NONTERMINAL); } ; ParamDec : Specifier VarDec { $$ = insertNode($1, "ParamDec", @1.first_line, NONTERMINAL); $1->brother = $2; } ; /*-----------------------------------------| | Statements | |-----------------------------------------*/ CompSt : LC DefList StmtList RC { $$ = insertNode($1, "CompSt", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; $3->brother = $4; } ; StmtList : Stmt StmtList { $$ = insertNode($1, "StmtList", @1.first_line, NONTERMINAL); $1->brother = $2; } | { $$ = insertNode(NULL, "FunDec", yylineno, NONTERMINAL); } ; Stmt : Exp SEMI { $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL); $1->brother = $2; } | CompSt { $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL); } | RETURN Exp SEMI { $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | IF LP Exp RP Stmt{ $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; $3->brother = $4; $4->brother = $5; } | IF LP Exp RP Stmt ELSE Stmt { $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; $3->brother = $4; $4->brother = $5; $5->brother = $6; $6->brother = $7; } | WHILE LP Exp RP Stmt { $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; $3->brother = $4; $4->brother = $5; } | error RC{ char msg[100]; sprintf( msg, "error RC:Missing \";\""); // printf("8\n"); myerror( msg ); } ; /*-----------------------------------------| | Local Definitions | |-----------------------------------------*/ DefList : Def DefList { $$ = insertNode($1, "DefList", @1.first_line, NONTERMINAL); $1->brother = $2; } | { $$ = insertNode(NULL, "DefList", yylineno, NONTERMINAL); } ; Def : Specifier DecList SEMI { $$ = insertNode($1, "Def", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } ; DecList : Dec { $$ = insertNode($1, "DecList", @1.first_line, NONTERMINAL); } | Dec COMMA DecList { $$ = insertNode($1, "DecList", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } ; Dec : VarDec { $$ = insertNode($1, "Dec", @1.first_line, NONTERMINAL); } | VarDec ASSIGNOP Exp { $$ = insertNode($1, "Dec", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } ; /*-----------------------------------------| | Expressions | |-----------------------------------------*/ Exp : Exp ASSIGNOP Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Exp AND Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Exp OR Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Exp RELOP Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Exp PLUS Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Exp MINUS Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Exp STAR Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Exp DIV Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | LP Exp RP { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | MINUS Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; } | NOT Exp { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; } | ID LP Args RP { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; $3->brother = $4; } | ID LP RP { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Exp LB Exp RB { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; $3->brother = $4; } | Exp DOT ID { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | ID { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); } | INT { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); } | FLOAT { $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL); } | error RB { char msg[100]; sprintf( msg, "Missing \"]\""); // printf("3\n"); myerror( msg ); // error } | error INT { char msg[100]; sprintf( msg, "error INT:Missing \"]\""); // printf("3\n"); myerror( msg ); // error } | FLOAT error ID{ char msg[100]; sprintf( msg, "Syntax error."); // printf("6\n"); myerror( msg ); } | INT error ID{ char msg[100]; sprintf( msg, "INT error ID:Missing \";\""); // printf("7\n"); myerror( msg ); } | INT error INT{ char msg[100]; sprintf( msg, "INT error INT:Missing \";\""); // printf("8\n"); myerror( msg ); } ; Args : Exp COMMA Args { $$ = insertNode($1, "CompSt", @1.first_line, NONTERMINAL); $1->brother = $2; $2->brother = $3; } | Exp { $$ = insertNode($1, "CompSt", @1.first_line, NONTERMINAL); } ; %% #include "lex.yy.c" int main(int argc, char **argv) { if (argc <= 1) return 1; FILE *f = fopen(argv[1], "r"); if (!f) { perror(argv[1]); return 1; } yylineno = 1; yyrestart(f); yyparse(); // 输出语法树 f = fopen("output.txt", "w"); if (!f) { perror(argv[1]); return 1; } if (errors == 0) { f = fopen("output.txt", "w"); printTree(head, 0, f); } return 0; } // 重载,令 yyerror 函数失效 void yyerror(char *msg) { // fprintf(stderr, "Error type B at Line %d: %s\n", yylineno, msg); // printf( "%d: %s\n", yylineno, msg); // errors++; } // 设置自定义的 myerror void myerror(char *msg) { fprintf(stderr, "Error type B at Line %d: %s \n", yylineno, msg); errors++; }