2021-10-29 编译原理实验2——语法分析

一、实验要求

基于前面的实验,编写一个程序对使用 C–语言书写的源代码进行语法分析,并打印语法树,实验使用的主要分析工具是Bison,使用C语言完成程序。

  • 基本要求
    a. 对程序进行语法分析,输出语法分析结果;
    b. 能够识别多个位置的语法错误。
  • 附加要求
    a. 按照正确的缩进格式输出带有行号信息的语法树;
    b. 能够处理简单的语法错误;

二、tree.h

#ifndef TREE_H
#define TREE_H

#include <string.h>

(1)为了输出语法分析树,我们需要识别每个结点的类型,因此定义了结点类型为 int,并且规定了4种节点类型:

typedef int NODE_TYPE;        // 结点类型
// 非终结符类型
#define NONTERMINAL 0         
// 终结符类型
#define INT_TYPE 1            // int 类型
#define FLOAT_TYPE 2          // float 类型
#define STRING_TYPE 3         // 可打印类型

(2)规定结点的类型,这里采用“孩子—兄弟”结点:

struct Node {
    struct Node *child;       // 儿子结点
    struct Node *brother;     // 兄弟节点
    int lineNum;              // 行号
    char *name;               // 结点名字
    NODE_TYPE type;           // 结点类型
    union {
        char *id_name;        // ID 名字
        int intValue;         // int 值
        float floatValue;     // float 值
    };
};

(3)定义语法树的根结点,每次新建立一个结点,都使得head为其父母:

struct Node *head;              // 语法分析树的根结点

(4)有关语法树的函数声明:

// 函数的声明们
struct Node *createNode(char *name, int line, NODE_TYPE type);
struct Node *insertNode(struct Node *node, char *name, int line, NODE_TYPE type);
void printNode(struct Node *node, FILE *f);
void printTree(struct Node* head, int depth, FILE *f);
#endif

三、tree.c

#include <malloc.h>
#include <stdio.h>
#include "tree.h"

(1)创建新结点。这里创建的结点相当于叶子结点,是终结符对应创建的结点,因此主要应用于lizi.l中,即flex中终结符匹配时对应的操作。

/**
 * 创建一个新的结点并返回
 * @param name 结点的名字
 * @param line 结点所在的行数
 * @param type 结点的类型
 * @return 新结点的指针
 */
struct Node *createNode(char *name, int line, NODE_TYPE type) {
    struct Node *pNode = (struct Node *) malloc(sizeof(struct Node));
    pNode->brother = NULL;         // 新结点的兄弟为空
    pNode->child = NULL;           // 新结点的子女为空
    pNode->lineNum = line;         // 记录行号,之后输出有用
    pNode->type = type;            // 记录结点类型,根据结点类型来输出
    pNode->name = strdup(name);    // 使用字符串拷贝赋予新结点的结点名
    pNode->intValue = 1;           // 将 int 值默认设为 1
    //printf("%s\n",name);
    return pNode;                  // 返回 pNode
}

(2)插入新结点。因为算法的思想是自底向上,因此这里插入的是双亲结点。新创建一个结点,赋予其初值,然后设置其子女为我们给定的结点,最后将head指向这个双亲结点,使得每次创建新结点完成后head都指向语法树的根结点。

/**
 * 向语法分析树中插入
 * @param node 底层结点(孩子结点)
 * @param name 父结点的名字
 * @param line 父结点所在的行数
 * @param type 父结点的类型
 * @return 父结点的指针
 */
struct Node *insertNode(struct Node *node, char *name, int line, NODE_TYPE type) {
    struct Node *father = (struct Node *) malloc(sizeof(struct Node));
    father->child = node;           // 给输入结点一个爹
    father->brother = NULL;         // 父亲结点的兄弟为空
    father->lineNum = line;         // 记录行号,之后输出有用
    father->type = type;            // 记录结点类型,根据结点类型来输出
    father->name = strdup(name);    // 使用字符串拷贝赋予新结点的结点名
    father->intValue = 1;           // 将 int 值默认设为 1
    head = father;                  // 将 head 置为 father
    //printf("%s %d\n",name,line);
    // if (node)
    //  fprintf(stdout, "%s -> %s   line : %d\n", father -> name, node -> name, line);
    return father;                  // 返回 father
}

(3)根据结点的类型打印输出,就相当于打印语法树的一行信息。

/**
 * 根据结点的类型进行打印
 * @param node 结点指针
 * @param f 输出位置
 */
void printNode(struct Node *node, FILE *f) {
    if (node->type == STRING_TYPE)
        fprintf(f, "%s : %s\n", node->name, node->id_name);     // string 类型的结点输出结点名和结点内容
    else if (node->type == INT_TYPE)
        fprintf(f, "INT : %d\n", node->intValue);               // int 类型的结点输出 INT 和结点值
    else if (node->type == FLOAT_TYPE)
        fprintf(f, "FLOAT : %f\n", node->floatValue);           // float 类型的结点输出 FLOAT 和结点值
    else
        fprintf(f, "%s (%d)\n", node->name, node->lineNum);     // 非终结符输出结点名字和行号
}

(4)采用深度优先的方式(DFS)遍历语法树,方式采用的是函数递归。这里使用fprint函数,可以指定输出文件,也可以直接输出控制台。这里选择的是输出在output.txt文件里,方便查看、编辑和调试。

/**
 * 递归法遍历打印语法树
 * @param head   语法树的根结点
 * @param depth  语法树的深度
 * @param f 输出位置
 */
void printTree(struct Node *head, int depth, FILE *f) {
    if (head == NULL) return;                       // 遇到空结点,函数结束
    for (int i = 0; i < depth; ++i)
        fprintf(f, "\t");                         // 打印语法树所需的空白(制表符)
    printNode(head, f);
    printTree(head->child, depth + 1, f);       // 考虑该结点的孩子,深度加一,进入下一层递归
    printTree((head->brother), depth, f);       // 考虑该结点的兄弟,深度不变,进入下一层递归
}

四、lizi.l的修改

(1)errors表示编译器识别出的语法错误数量,方便判断程序是否有语法错误从而输出语法树。

extern int errors;

/* ... */

. {
    errors++;
    printf("Error type A at Line %d: Mysterious charaters \'%s\'.\n", yylineno, yytext);
}  

(2)吸取上次实验的教训,这里设置了tab的长度,记录编译程序读取到的位置列信息:

// 默认制表符的长度
#define TABLEN 4

/* ... */

{TAB} { yycolumn += TABLEN; }    /* 默认制表符长度为 4 */

(3)为了更好的记录编译器读取的位置信息,重定义了宏 YY_USER_ACTION。虽然此次实验并没有用到,但是相信以后的实验或许会使用。

// 初始化列数
int yycolumn = 1;

// 每次定义用户操作之前执行的代码
#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno; \
    yylloc.first_column = yycolumn; \
    yylloc.last_column = yycolumn + yyleng - 1; \
    yycolumn += yyleng;

五、syntax.y的修改与main函数

(1)syntax.y新添加与修改的部分

%{
    #include <stdio.h>
    #include <stdlib.h>
    #include "tree.h"

    extern int yylineno;
    void yyerror(char*);
    void myerror(char *msg);
    int yylex();
    int errors = 0;                 // 记录找出的语法错误次数
    #define YYSTYPE struct Node*    // 将所有的 token 类型设置为 Node*
%}

// 定义结合性和优先级次序
%right ASSIGNOP
%left OR
%left AND
%nonassoc RELOP
%left PLUS MINUS
%left STAR DIV
%right NAGATE NOT
%right DOT LP LB RP RB
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE

(2)main函数的变动

#include "lex.yy.c"

int main(int argc, char **argv) {
    if (argc <= 1) return 1;
    FILE *f = fopen(argv[1], "r");
    if (!f) {
        perror(argv[1]);
        return 1;
    }
    yylineno = 1;
    yyrestart(f);
    yyparse();

如果语法错误为0个,打印语法树到output.txt中

    // 输出语法树
    f = fopen("output.txt", "w");
    if (!f) {   
        perror(argv[1]);
        return 1;
    }
    if (errors == 0) {
        f = fopen("output.txt", "w");
        printTree(head, 0, f);
    }
    return 0;
}

六、编译代码

输入指令:

>>> flex lizi.l
>>> bison -d syntax.y
>>> gcc syntax.tab.c -lfl -ly -o parser tree.c

得到可执行程序 parser,使用 parser 读取文件进行编译:

>>> ./parser text.cmm

七、附完整代码

  1. lizi.l

    %{
        #include "syntax.tab.h"
        #include "tree.h"
    
        extern int errors;
        YYLTYPE yylloc;
    
        // 初始化列数
    	int yycolumn = 1;
    
        // 默认制表符的长度
        #define TABLEN 4
    
        // 每次定义用户操作之前执行的代码
        #define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno; \
            yylloc.first_column = yycolumn; \
            yylloc.last_column = yycolumn + yyleng - 1; \
            yycolumn += yyleng;
    %}
    
    %option yylineno
    
    digit [0-9]
    letter [a-zA-Z]
    unsignedint [1-9]{digit}*
    
    /* int 类型匹配 */
    INT10 [+-]?(0|{unsignedint})
    INT8 [+-]?0(0|([1-7][0-7]*))
    INT16 [+-]?(0(x|X))(0|([1-9A-Fa-f][0-9A-Fa-f]*))
    INT {INT10}|{INT8}
    
    /* float 类型匹配 */
    FLOAT1 [+-]?({digit}+)?\.{digit}+?
    FLOAT2 [+-]?({digit}+)[eE][+-]?({digit})+
    FLOAT3 [+-]?({digit}+)?\.({digit}+)?([eE][+-]?{digit}+)
    FLOAT {FLOAT1}|{FLOAT2}|{FLOAT3}
    
    /* 其余终结符的匹配 */
    ID ({letter}|_)({letter}|_|{digit})*
    SEMI ;
    COMMA ,
    ASSIGNOP =
    RELOP (>|<|>=|<=|==|!=)
    PLUS \+
    MINUS \-
    STAR \*
    DIV \/
    AND &&
    OR \|\|
    DOT \.
    NOT !
    TYPE (int|float)
    LP \(
    RP \)
    LB \[
    RB \]
    LC \{
    RC \}
    STRUCT struct
    IF if
    ELSE else
    RETURN return
    WHILE while
    LF \n
    OTHER [\r]
    TAB [\t]
    SPACE [ ]
    
    %%
    
     /*-----------------------------------------|
     |                跳过单行注释                |
     |-----------------------------------------*/
    "//" {   
        char c;
        while ((c = input()) != '\n');
    }
    
     /*-----------------------------------------|
     |                跳过多行注释                |
     |-----------------------------------------*/
    "/*" {
        char c;
        while ((c = input()) != EOF) {
            if (c == '*') {
                c = input();
                if (c == '/'){
                    break;
                }
            }
        }
        if (c == EOF) printf("Error type B at Line %d: LACK OF */.\n", yylineno);
    }
    
    
     /*-----------------------------------------|
     |          终结符的匹配及对应的其操作          |
     |-----------------------------------------*/
    {DOT} { 
        struct Node *pNode = createNode("DOT", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return DOT; 
    }
    {TYPE} { 
        struct Node *pNode = createNode("TYPE", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return TYPE; 
    }
    {INT} { 
        struct Node *pNode = createNode("INT", 0, INT_TYPE);
        yylval = pNode;
        pNode->intValue = atoi(yytext);
        return INT; 
    }
    {FLOAT} { 
        struct Node *pNode = createNode("FLOAT", 0, FLOAT_TYPE);
        yylval = pNode;
        pNode->floatValue = atof(yytext);
        return FLOAT; 
    }
    {SEMI} { 
        struct Node *pNode = createNode("SEMI", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return SEMI; 
    }
    {COMMA} { 
        struct Node *pNode = createNode("COMMA", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return COMMA; 
    }
    {ASSIGNOP} { 
        struct Node *pNode = createNode("ASSIGNOP", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return ASSIGNOP; 
    }
    {RELOP}  { 
        struct Node *pNode = createNode("RELOP", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return RELOP; 
    }
    {PLUS} { 
        struct Node *pNode = createNode("PLUS", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return PLUS; 
    }
    {MINUS} { 
        struct Node *pNode = createNode("MINUS", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return MINUS; 
    }
    {STAR} { 
        struct Node *pNode = createNode("STAR", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return STAR; 
    }
    {DIV} { 
        struct Node *pNode = createNode("DIV", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return DIV; 
    }
    {AND} { 
        struct Node *pNode = createNode("AND", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return AND; 
    }
    {OR} { 
        struct Node *pNode = createNode("OR", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return OR; 
    }
    {NOT} { 
        struct Node *pNode = createNode("NOT", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return NOT; 
    }
    {LP} { 
        struct Node *pNode = createNode("LP", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return LP; 
    }
    {RP} { 
        struct Node *pNode = createNode("RP", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return RP; 
    }
    {LB} { 
        struct Node *pNode = createNode("LB", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return LB; 
    }
    {RB} { 
        struct Node *pNode = createNode("RB", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return RB; 
    }
    {LC} { 
        struct Node *pNode = createNode("LC", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return LC; 
    }
    {RC}  { 
        struct Node *pNode = createNode("RC", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return RC; 
    }
    {STRUCT} { 
        struct Node *pNode = createNode("STRUCT", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return STRUCT; 
    }
    {RETURN} { 
        struct Node *pNode = createNode("RETURN", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return RETURN; 
    }
    {IF} { 
        struct Node *pNode = createNode("IF", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return IF; 
    }
    {ELSE} { 
        struct Node *pNode = createNode("ELSE", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return ELSE; 
    }
    {WHILE} { 
        struct Node *pNode = createNode("WHILE", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return WHILE; 
    }
    
    {ID} { 
        struct Node *pNode = createNode("ID", 0, STRING_TYPE);
        yylval = pNode;
        pNode->id_name = strdup(yytext);
        return ID; 
    }
    {LF} { yycolumn = 1; }           /* 换行后,更新列数 */
    {OTHER} { }                      /* 其他字符匹配不给予操作 */
    {TAB} { yycolumn += TABLEN; }    /* 默认制表符长度为 4 */
    {SPACE} { yycolumn += 1; }       /* 遇到空格,长度为 1 */
    . {
        errors++;
        printf("Error type A at Line %d: Mysterious charaters \'%s\'.\n", yylineno, yytext);
    }                                /* 其他不匹配的终结符,报错 */
    %% 
    
    
  2. syntax.y

    %{
        #include <stdio.h>
        #include <stdlib.h>
        #include "tree.h"
    
        extern int yylineno;
        void yyerror(char*);
        void myerror(char *msg);
        int yylex();
        int errors = 0;                 // 记录找出的语法错误次数
        #define YYSTYPE struct Node*    // 将所有的 token 类型设置为 Node*
    %}
    
    
    %token INT                         /* int 类型 */
    %token FLOAT                       /* float 类型 */
    %token TYPE                        /* TYPE 终结符 */
    %token LF                          /* 换行符 \n */
    %token ID                          /* 标识符 */ 
    %token SEMI COMMA DOT              /* 结束符号 ; , */
    %token ASSIGNOP RELOP              /* 比较赋值符号 = > < >= <= == != */
    %token PLUS MINUS STAR DIV         /* 运算符 + - * / */
    %token AND OR NOT                  /* 判断符号 && || ! */
    %token LP RP LB RB LC RC           /* 括号 ( ) [ ] { } */
    %token STRUCT                      /* struct */
    %token RETURN                      /* return */
    %token IF                          /* if */
    %token ELSE                        /* else */
    %token WHILE                       /* while */
    
    // 定义结合性和优先级次序
    %right ASSIGNOP
    %left OR
    %left AND
    %nonassoc RELOP
    %left PLUS MINUS
    %left STAR DIV
    %right NAGATE NOT
    %right DOT LP LB RP RB
    %nonassoc LOWER_THAN_ELSE
    %nonassoc ELSE
    
    %%
    
     /*-----------------------------------------|
     |          High-level Definitions          |
     |-----------------------------------------*/
    Program : ExtDefList {
            $$ = insertNode($1, "Program", $1->lineNum, NONTERMINAL);
        }
        ;
    
    ExtDefList : ExtDef ExtDefList {
            $$ = insertNode($1, "ExtDefList", @1.first_line, NONTERMINAL);
            $1->brother = $2;
        }
        | {
            $$ = insertNode(NULL, "ExtDefList", yylineno, NONTERMINAL);
        }
    
        ;
    
    ExtDef : Specifier ExtDecList SEMI {
            $$ = insertNode($1, "ExtDef", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Specifier SEMI {
            $$ = insertNode($1, "ExtDef", @1.first_line, NONTERMINAL);
            $1->brother = $2;
        }
        | Specifier FunDec CompSt {
            $$ = insertNode($1, "ExtDef", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
    
    
        ;
    
    ExtDecList : VarDec {
            $$ = insertNode($1, "ExtDecList", @1.first_line, NONTERMINAL);
        }
        | VarDec COMMA ExtDecList {
            $$ = insertNode($1, "ExtDecList", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
    
    
        ;
    
     /*-----------------------------------------|
     |                Specifiers                |
     |-----------------------------------------*/
    Specifier : TYPE {
            $$ = insertNode($1, "Specifier", @1.first_line, NONTERMINAL);
        }
        | StructSpecifier {
            $$ = insertNode($1, "Specifier", @1.first_line, NONTERMINAL);
        }
        ;
    
    StructSpecifier : STRUCT OptTag LC DefList RC {
            $$ = insertNode($1, "StructSpecifier", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
            $3->brother = $4;
            $4->brother = $5;
        }
        | STRUCT Tag {
            $$ = insertNode($1, "StructSpecifier", @1.first_line, NONTERMINAL);
            $1->brother = $2;
        }
    
        ;
    
    OptTag : ID {
            $$ = insertNode($1, "OptTag", @1.first_line, NONTERMINAL);
        }
        | {
            $$ = insertNode(NULL, "OptTag", yylineno, NONTERMINAL);
        }
        ;
    
    Tag : ID {
            $$ = insertNode($1, "Tag", @1.first_line, NONTERMINAL);
        }
        ;
    
     /*-----------------------------------------|
     |               Declarators                |
     |-----------------------------------------*/
    VarDec : ID {
            $$ = insertNode($1, "VarDec", @1.first_line, NONTERMINAL);
        }
        | VarDec LB INT RB {
            $$ = insertNode($1, "VarDec", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
            $3->brother = $4;
        }
        ;
    
    FunDec : ID LP VarList RP {
            $$ = insertNode($1, "FunDec", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
            $3->brother = $4;
        }
        | ID LP RP {
            $$ = insertNode($1, "FunDec", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        ;
    
    VarList : ParamDec COMMA VarList {
            $$ = insertNode($1, "VarList", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | ParamDec {
            $$ = insertNode($1, "VarList", @1.first_line, NONTERMINAL);
        }
        ;
    
    ParamDec : Specifier VarDec {
            $$ = insertNode($1, "ParamDec", @1.first_line, NONTERMINAL);
            $1->brother = $2;
        }
        ;
    
     /*-----------------------------------------|
     |                Statements                |
     |-----------------------------------------*/
    CompSt : LC DefList StmtList RC {
            $$ = insertNode($1, "CompSt", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
            $3->brother = $4;
        }
        ;
    
    StmtList : Stmt StmtList {
            $$ = insertNode($1, "StmtList", @1.first_line, NONTERMINAL);
            $1->brother = $2;
        }
        | {
            $$ = insertNode(NULL, "FunDec", yylineno, NONTERMINAL);
        }
    
        ;
        
    Stmt : Exp SEMI {
            $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL);
            $1->brother = $2;
        }
        | CompSt {
            $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL);
        }
        | RETURN Exp SEMI {
            $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | IF LP Exp RP Stmt{
            $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
            $3->brother = $4;
            $4->brother = $5;
        }
        | IF LP Exp RP Stmt ELSE Stmt {
            $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
            $3->brother = $4;
            $4->brother = $5;
            $5->brother = $6;
            $6->brother = $7;
        }
        | WHILE LP Exp RP Stmt {
            $$ = insertNode($1, "Stmt", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
            $3->brother = $4;
            $4->brother = $5;
        }
        | error RC{ 
            char msg[100];
            sprintf( msg, "error RC:Missing \";\"");
            // printf("8\n");
            myerror( msg );  
        }
    
        ;
    
     /*-----------------------------------------|
     |             Local Definitions            |
     |-----------------------------------------*/
    DefList : Def DefList {
            $$ = insertNode($1, "DefList", @1.first_line, NONTERMINAL);
            $1->brother = $2;
        }
        | {
            $$ = insertNode(NULL, "DefList", yylineno, NONTERMINAL);
        }
        ;
    
    Def : Specifier DecList SEMI {
            $$ = insertNode($1, "Def", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
    
        ;
    
    DecList : Dec {
            $$ = insertNode($1, "DecList", @1.first_line, NONTERMINAL);
        }
        | Dec COMMA DecList {
            $$ = insertNode($1, "DecList", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
    
        ;
    
    Dec : VarDec {
            $$ = insertNode($1, "Dec", @1.first_line, NONTERMINAL);
        }
        | VarDec ASSIGNOP Exp {
            $$ = insertNode($1, "Dec", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        ;
    
     /*-----------------------------------------|
     |               Expressions                |
     |-----------------------------------------*/
    Exp : Exp ASSIGNOP Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Exp AND Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Exp OR Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Exp RELOP Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Exp PLUS Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Exp MINUS Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Exp STAR Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Exp DIV Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | LP Exp RP {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | MINUS Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
        }
        | NOT Exp {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
        }
        | ID LP Args RP {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
            $3->brother = $4;
        }
        | ID LP RP {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Exp LB Exp RB {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
            $3->brother = $4;
        }
        | Exp DOT ID {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | ID {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
        }
        | INT {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
        }
        | FLOAT {
            $$ = insertNode($1, "Exp", @1.first_line, NONTERMINAL);
        }
        | error RB { 
            char msg[100];
            sprintf( msg, "Missing \"]\"");
            // printf("3\n");
            myerror( msg );                // error
        }
        | error INT { 
            char msg[100];
            sprintf( msg, "error INT:Missing \"]\"");
            // printf("3\n");
            myerror( msg );                // error
        }
        | FLOAT error ID{ 
            char msg[100];
            sprintf( msg, "Syntax error.");
            // printf("6\n");
            myerror( msg );  
        }
        | INT error ID{ 
            char msg[100];
            sprintf( msg, "INT error ID:Missing \";\"");
            // printf("7\n");
            myerror( msg );  
        }
        | INT error INT{ 
            char msg[100];
            sprintf( msg, "INT error INT:Missing \";\"");
            // printf("8\n");
            myerror( msg );  
        }
    
        
        ;
    
    Args : Exp COMMA Args {
            $$ = insertNode($1, "CompSt", @1.first_line, NONTERMINAL);
            $1->brother = $2;
            $2->brother = $3;
        }
        | Exp {
            $$ = insertNode($1, "CompSt", @1.first_line, NONTERMINAL);
        }
        ;
    
    %%
    
    #include "lex.yy.c"
    
    int main(int argc, char **argv) {
        if (argc <= 1) return 1;
        FILE *f = fopen(argv[1], "r");
        if (!f) {
            perror(argv[1]);
            return 1;
        }
        yylineno = 1;
        yyrestart(f);
        yyparse();
    
        // 输出语法树
        f = fopen("output.txt", "w");
        if (!f) {   
            perror(argv[1]);
            return 1;
        }
        if (errors == 0) {
            f = fopen("output.txt", "w");
            printTree(head, 0, f);
        }
    
        return 0;
    }
    
    // 重载,令 yyerror 函数失效
    void yyerror(char *msg)
    {
        // fprintf(stderr, "Error type B at Line %d: %s\n", yylineno,  msg);
        //  printf( "%d: %s\n", yylineno,  msg);
        // errors++;
    }
    
    // 设置自定义的 myerror
    void myerror(char *msg)
    {
        fprintf(stderr, "Error type B at Line %d: %s \n", yylineno,  msg);
        errors++;
    }
    
设计思想 (1)程序主体结构部分: 说明部分 %% 规则部分 %% 辅助程序部分 (2)主体结构的说明 在这里说明部分告诉我们使用的LETTER,DIGIT, IDENT(标识符,通常定义为字母开头的字母数字串)和STR(字符串常量,通常定义为双引号括起来的一串字符)是什么意思.这部分也可以包含一些初始化代码.例如用#include来使用标准的头文件和前向说明(forward ,references).这些代码应该再标记"%{"和"%}"之间;规则部分>可以包括任何你想用来分析的代码;我们这里包括了忽略所有注释中字符的功能,传送ID名称和字符串常量内容到主调函数和main函数的功能. (3)实现原理 程序中先判断这个句语句中每个单元为关键字、常数、运算符、界符,对与不同的单词符号给出不同编码形式的编码,用以区分之。 PL/0语言的EBNF表示 <常量定义>::=<标识符>=<无符号整数>; <标识符>::=<字母>={<字母>|<数字>}; <加法运算符>::=+|- <乘法运算符>::=*|/ <关系运算符>::==|#|<|<=|>|>= <字母>::=a|b|…|X|Y|Z <数字>::=0|1|2|…|8|9 三:设计过程 1. 关键字:void,main,if,then,break,int,Char,float,include,for,while,printfscanf 并为小写。 2."+”;”-”;”*”;”/”;”:=“;”:”;”<“;”<=“;”>“;”>=“;”<>“;”=“;”(“;”)”;”;”;”#”为运算符。 3. 其他标记 如字符串,表示以字母开头的标识符。 4. 空格符跳过。 5. 各符号对应种别码 关键字分别对应1-13 运算符分别对应401-418,501-513。 字符串对应100 常量对应200 结束符# 四:举例说明 目标:实现对常量的判别 代码: digit [0-9] letter [A-Za-z] other_char [!-@\[-~] id ({letter}|[_])({letter}|{digit}|[_])* string {({letter}|{digit}|{other_char})+} int_num {digit}+ %% [ |\t|\n]+ "auto"|"double"|"int"|"struct"|"break"|"else"|"long"|"switch"|"case"|"enum"|"register"|"typedef"|"char"|"extern"|"return"|"union"|"const"|"float"|"short"|"unsigned"|"continue"|"for"|"signed"|"void"|"default"|"goto"|"sizeof"|"do"|"if"|"static"|"while"|"main" {Upper(yytext,yyleng);printf("%s,NULL\n",yytext);} \"([!-~])*\" {printf("CONST_string,%s\n",yytext);} -?{int_num}[.]{int_num}?([E][+|-]?{int_num})? {printf("CONST_real,%s\n",yytext);} "0x"?{int_num} {printf("CONST_int,%s\n",yytext);} ","|";"|"("|")"|"{"|"}"|"["|"]"|"->"|"."|"!"|"~"|"++"|"--"|"*"|"&"|"sizeof"|"/"|"%"|"+"|"-"|">"|"<"|">="|"<="|"=="|"!="|"&"|"^"|"|"|"&"|"||"|"+="|"-="|"*="|"/="|"%="|">>="|"<<="|"&="|"^="|"|="|"=" {printf("%s,NULL\n",yytext);} {id} {printf("ID,%s\n",yytext);} {digit}({letter})+ {printf("error1:%s\n",yytext);} %% #include <ctype.h> Upper(char *s,int l) { int i; for(i=0;i<l;i++) { s[i]=toupper(s[i]); } } yywrap() { return 1; } 五:DFA 六:数据测试 七:心得体会 其实匹配并不困难,主要是C++知识要求相对较高,只要把握住指针就好了。 附源程序: #include<iostream.h> #include<stdio.h> #include<stdlib.h> #include<string.h> int i,j,k,flag,number,status; /*status which is use to judge the string is keywords or not!*/ char ch; char words[10] = {" "}; char program[500]; int Scan(char program[]) { char *keywords[13] = {"void","main","if","then","break","int", "char","float","include","for","while","printf", "scanf"}; number = 0; status = 0; j = 0; ch = program[i++]; /* To handle the lettle space ands tab*/ /*handle letters*/ if ((ch >= 'a') && (ch <= 'z' )) { while ((ch >= 'a') && (ch <= 'z' )) { words[j++]=ch; ch=program[i++]; } i--; words[j++] = '\0'; for (k = 0; k < 13; k++) if (strcmp (words,keywords[k]) == 0) switch(k) { case 0:{ flag = 1; status = 1; break; } case 1:{ flag = 2; status = 1; break; } case 2:{ flag = 3; status = 1; break; } case 3:{ flag = 4; status = 1; break; } case 4:{ flag = 5; status = 1; break; } case 5:{ flag = 6; status = 1; break; } case 6:{ flag = 7; status = 1; break; } case 7:{ flag = 8; status = 1; break; } case 8:{ flag = 9; status = 1; break; } case 9:{ flag = 10; status = 1; break; } case 10:{ flag = 11; status = 1; break; } case 11:{ flag = 12; status = 1; break; } case 12:{ flag = 13; status = 1; break; } } if (status == 0) { flag = 100; } } /*handle digits*/ else if ((ch >= '0') && (ch <= '9')) { number = 0; while ((ch >= '0' ) && (ch <= '9' )) { number = number*10+(ch-'0'); ch = program[i++]; } flag = 200; i--; } /*opereation and edge handle*/ else switch (ch) { case '=':{ if (ch == '=') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 401; } else { i--; flag = 402; } break; } case'>':{ if (ch == '>') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 403; } else { i--; flag = 404; } break; } case'<':{ if (ch == '<') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 405; } else { i--; flag = 406; } break; } case'!':{ if (ch == '!') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 407; } else { i--; flag = 408; } break; } case'+':{ if (ch == '+') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 409; } else if (ch == '+') { words[j++] = ch; words[j] = '\0'; flag = 410; } else { i--; flag = 411; } break; } case'-':{ if (ch == '-') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 412; } else if( ch == '-') { words[j++] = ch; words[j] = '\0'; flag = 413; } else { i--; flag = 414; } break; } case'*':{ if (ch == '*') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 415; } else { i--; flag = 416; } break; } case'/':{ if (ch == '/') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 417; } else { i--; flag = 418; } break; } case';':{ words[j] = ch; words[j+1] = '\0'; flag = 501; break; } case'(':{ words[j] = ch; words[j+1] = '\0'; flag = 502; break; } case')':{ words[j] = ch; words[j+1] = '\0'; flag = 503; break; } case'[':{ words[j] = ch; words[j+1] = '\0'; flag = 504; break; } case']':{ words[j] = ch; words[j+1] = '\0'; flag = 505; break; } case'{':{ words[j] = ch; words[j+1] = '\0'; flag = 506; break; } case'}':{ words[j] = ch; words[j+1] = '\0'; flag = 507; break; } case':':{ words[j] = ch; words[j+1] = '\0'; flag = 508; break; } case'"':{ words[j] = ch; words[j+1] = '\0'; flag = 509; break; } case'%':{ if (ch == '%') words[j++] = ch; words[j] = '\0'; ch = program[i++]; if (ch == '=') { words[j++] = ch; words[j] = '\0'; flag = 510; } else { i--; flag = 511; } break; } case',':{ words[j] = ch; words[j+1] = '\0'; flag = 512; break; } case'#':{ words[j] = ch; words[j+1] = '\0'; flag = 513; break; } case'@':{ words[j] = '#'; flag = 0; break; } default:{ flag = -1; break; } } return flag; } main() { i=0; printf("please input a program end with @"); do { ch = getchar(); program[i++] = ch; }while(ch != '@'); i = 0; do{ flag = Scan(program); if (flag == 200) { printf("(%2d,%4d)",flag,number); } else if (flag == -1) { printf("(%d,error)",flag); } else { printf("(%2d,%4s)",flag,words); } }while (flag != 0); system("pause"); }
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

蔗理苦

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值